From 319fac14fe979375faf5fb93db5efbc3c6d0a64c Mon Sep 17 00:00:00 2001 From: ines Date: Tue, 3 Oct 2017 14:28:18 +0200 Subject: [PATCH] Update global config and landing page --- website/_data.json | 49 +++++++---- website/_harp.json | 80 ++++-------------- website/index.jade | 199 ++++++++++++++++++++++----------------------- 3 files changed, 145 insertions(+), 183 deletions(-) diff --git a/website/_data.json b/website/_data.json index 525c70d80..53543b2d0 100644 --- a/website/_data.json +++ b/website/_data.json @@ -3,24 +3,22 @@ "landing": true, "logos": [ { - "quora": [ "https://www.quora.com", 150 ], - "chartbeat": [ "https://chartbeat.com", 200 ], - "duedil": [ "https://www.duedil.com", 150 ], - "stitchfix": [ "https://www.stitchfix.com", 190 ] + "airbnb": [ "https://www.airbnb.com", 150, 45], + "quora": [ "https://www.quora.com", 120, 34 ], + "retriever": [ "https://www.retriever.no", 150, 33 ], + "stitchfix": [ "https://www.stitchfix.com", 150, 18 ] }, { - "wayblazer": [ "http://wayblazer.com", 200 ], - "indico": [ "https://indico.io", 150 ], - "chattermill": [ "https://chattermill.io", 175 ], - "turi": [ "https://turi.com", 150 ], - "kip": [ "http://kipthis.com", 70 ] - }, + "chartbeat": [ "https://chartbeat.com", 180, 25 ], + "allenai": [ "https://allenai.org", 220, 37 ] + } + ], + "features": [ { - "socrata": [ "https://www.socrata.com", 150 ], - "cytora": [ "http://www.cytora.com", 125 ], - "signaln": [ "http://signaln.com", 150 ], - "wonderflow": [ "http://www.wonderflow.co", 200 ], - "synapsify": [ "http://www.gosynapsify.com", 150 ] + "thoughtworks": ["https://www.thoughtworks.com/radar/tools", 150, 28], + "wapo": ["https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/", 100, 77], + "venturebeat": ["https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/", 150, 19], + "microsoft": ["https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/", 130, 28] } ] }, @@ -34,7 +32,24 @@ "landing": true }, - "announcement" : { - "title": "Important Announcement" + "styleguide": { + "title": "Styleguide", + "sidebar": { + "Styleguide": { "": "styleguide" }, + "Resources": { + "Website Source": "https://github.com/explosion/spacy/tree/master/website", + "Contributing Guide": "https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md" + } + }, + "menu": { + "Introduction": "intro", + "Logo": "logo", + "Colors": "colors", + "Typography": "typography", + "Elements": "elements", + "Components": "components", + "Embeds": "embeds", + "Markup Reference": "markup" + } } } diff --git a/website/_harp.json b/website/_harp.json index 1c27426f4..55035c32f 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -11,12 +11,9 @@ "COMPANY": "Explosion AI", "COMPANY_URL": "https://explosion.ai", "DEMOS_URL": "https://demos.explosion.ai", + "MODELS_REPO": "explosion/spacy-models", - "SPACY_VERSION": "1.8", - "LATEST_NEWS": { - "url": "https://github.com/explosion/spaCy/releases/tag/v2.0.0-alpha", - "title": "Test spaCy v2.0.0 alpha!" - }, + "SPACY_VERSION": "2.0", "SOCIAL": { "twitter": "spacy_io", @@ -27,25 +24,23 @@ }, "NAVIGATION": { - "Home": "/", - "Usage": "/docs/usage", - "Reference": "/docs/api", - "Demos": "/docs/usage/showcase", - "Blog": "https://explosion.ai/blog" + "Usage": "/usage", + "Models": "/models", + "API": "/api" }, "FOOTER": { "spaCy": { - "Usage": "/docs/usage", - "API Reference": "/docs/api", - "Tutorials": "/docs/usage/tutorials", - "Showcase": "/docs/usage/showcase" + "Usage": "/usage", + "Models": "/models", + "API Reference": "/api", + "Resources": "/usage/resources" }, "Support": { "Issue Tracker": "https://github.com/explosion/spaCy/issues", "StackOverflow": "http://stackoverflow.com/questions/tagged/spacy", - "Reddit usergroup": "https://www.reddit.com/r/spacynlp/", - "Gitter chat": "https://gitter.im/explosion/spaCy" + "Reddit Usergroup": "https://www.reddit.com/r/spacynlp/", + "Gitter Chat": "https://gitter.im/explosion/spaCy" }, "Connect": { "Twitter": "https://twitter.com/spacy_io", @@ -74,21 +69,11 @@ {"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" }, {"id": "gpu", "title": "GPU", "help": "Run spaCy on GPU to make it faster. Requires an NVDIA graphics card with CUDA 2+. See section below for more info."}] }, - { "id": "model", "title": "Models", "multiple": true, "options": [ - { "id": "en", "title": "English", "meta": "50MB" }, - { "id": "de", "title": "German", "meta": "645MB" }, - { "id": "fr", "title": "French", "meta": "1.33GB" }, - { "id": "es", "title": "Spanish", "meta": "377MB"}] - } + { "id": "model", "title": "Models", "multiple": true } ], "QUICKSTART_MODELS": [ - { "id": "lang", "title": "Language", "options": [ - { "id": "en", "title": "English", "checked": true }, - { "id": "de", "title": "German" }, - { "id": "fr", "title": "French" }, - { "id": "es", "title": "Spanish" }] - }, + { "id": "lang", "title": "Language"}, { "id": "load", "title": "Loading style", "options": [ { "id": "spacy", "title": "Use spacy.load()", "checked": true, "help": "Use spaCy's built-in loader to load the model by name." }, { "id": "module", "title": "Import as module", "help": "Import the model explicitly as a Python module." }] @@ -98,50 +83,15 @@ } ], - "MODELS": { - "en": [ - { "id": "en_core_web_sm", "lang": "English", "feats": [1, 1, 1, 1], "size": "50 MB", "license": "CC BY-SA", "def": true }, - { "id": "en_core_web_md", "lang": "English", "feats": [1, 1, 1, 1], "size": "1 GB", "license": "CC BY-SA" }, - { "id": "en_depent_web_md", "lang": "English", "feats": [1, 1, 1, 0], "size": "328 MB", "license": "CC BY-SA" }, - { "id": "en_vectors_glove_md", "lang": "English", "feats": [1, 0, 0, 1], "size": "727 MB", "license": "CC BY-SA" } - ], - "de": [ - { "id": "de_core_news_md", "lang": "German", "feats": [1, 1, 1, 1], "size": "645 MB", "license": "CC BY-SA" } - ], - "fr": [ - { "id": "fr_depvec_web_lg", "lang": "French", "feats": [1, 1, 0, 1], "size": "1.33 GB", "license": "CC BY-NC" } - ], - "es": [ - { "id": "es_core_web_md", "lang": "Spanish", "feats": [1, 1, 1, 1], "size": "377 MB", "license": "CC BY-SA"} - ] - }, - - "EXAMPLE_SENTENCES": { - "en": "This is a sentence.", - "de": "Dies ist ein Satz.", - "fr": "C'est une phrase.", - "es": "Esto es una frase." - }, - "ALPHA": true, - "V_CSS": "1.6", - "V_JS": "1.2", + "V_CSS": "2.0", + "V_JS": "2.0", "DEFAULT_SYNTAX": "python", "ANALYTICS": "UA-58931649-1", "MAILCHIMP": { "user": "spacy.us12", "id": "83b0498b1e7fa3c91ce68c3f1", "list": "89ad33e698" - }, - "BADGES": { - "pipy": { - "badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square", - "link": "https://pypi.python.org/pypi/spacy" - }, - "conda": { - "badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg", - "link": "https://anaconda.org/conda-forge/spacy" - } } } } diff --git a/website/index.jade b/website/index.jade index 9336d5c34..0155ab295 100644 --- a/website/index.jade +++ b/website/index.jade @@ -8,61 +8,48 @@ include _includes/_mixins | Natural Language#[br] | Processing - h2.c-landing__title.o-block.u-heading-1 - | in Python + h2.c-landing__title.o-block.u-heading-3 + span.u-text-label.u-text-label--light in Python - +landing-badge(gh("spaCy") + "/releases/tag/v2.0.0-alpha", "v2alpha", "Try spaCy v2.0.0 alpha!") ++grid.o-content.c-landing__blocks + +grid-col("third").c-landing__card.o-card.o-grid.o-grid--space + +h(3) Fastest in the world + p + | spaCy excels at large-scale information extraction tasks. + | It's written from the ground up in carefully memory-managed + | Cython. Independent research has confirmed that spaCy is + | the fastest in the world. If your application needs to + | process entire web dumps, spaCy is the library you want to + | be using. - +grid.o-content - +grid-col("third").o-card - +h(2) Fastest in the world - p - | spaCy excels at large-scale information extraction tasks. - | It's written from the ground up in carefully memory-managed - | Cython. Independent research has confirmed that spaCy is - | the fastest in the world. If your application needs to - | process entire web dumps, spaCy is the library you want to - | be using. + +button("/usage/facts-figures", true, "primary") + | Facts & figures - +button("/docs/api", true, "primary") - | Facts & figures + +grid-col("third").c-landing__card.o-card.o-grid.o-grid--space + +h(3) Get things done + p + | spaCy is designed to help you do real work — to build real + | products, or gather real insights. The library respects + | your time, and tries to avoid wasting it. It's easy to + | install, and its API is simple and productive. We like to + | think of spaCy as the Ruby on Rails of Natural Language + | Processing. - +grid-col("third").o-card - +h(2) Get things done - p - | spaCy is designed to help you do real work — to build real - | products, or gather real insights. The library respects - | your time, and tries to avoid wasting it. It's easy to - | install, and its API is simple and productive. I like to - | think of spaCy as the Ruby on Rails of Natural Language - | Processing. + +button("/usage", true, "primary") + | Get started - +button("/docs/usage", true, "primary") - | Get started + +grid-col("third").c-landing__card.o-card.o-grid.o-grid--space + +h(3) Deep learning + p + | spaCy is the best way to prepare text for deep learning. + | It interoperates seamlessly with TensorFlow, PyTorch, + | scikit-learn, Gensim and the + | rest of Python's awesome AI ecosystem. spaCy helps you + | connect the statistical models trained by these libraries + | to the rest of your application. - +grid-col("third").o-card - +h(2) Deep learning - p - | spaCy is the best way to prepare text for deep learning. - | It interoperates seamlessly with - | #[+a("https://www.tensorflow.org") TensorFlow], - | #[+a("https://keras.io") Keras], - | #[+a("http://scikit-learn.org") Scikit-Learn], - | #[+a("https://radimrehurek.com/gensim") Gensim] and the - | rest of Python's awesome AI ecosystem. spaCy helps you - | connect the statistical models trained by these libraries - | to the rest of your application. - - +button("/docs/usage/deep-learning", true, "primary") - | Read more - -.o-inline-list.o-block.u-border-bottom.u-text-small.u-text-center.u-padding-small - +a(gh("spaCy") + "/releases") - strong.u-text-label.u-color-subtle #[+icon("code", 18)] Latest release: - | v#{SPACY_VERSION} - - if LATEST_NEWS - +a(LATEST_NEWS.url) #[+tag.o-icon New!] #{LATEST_NEWS.title} + +button("/usage/deep-learning", true, "primary") + | Read more .o-content +grid @@ -92,67 +79,77 @@ include _includes/_mixins +h(2) Features +list +item Non-destructive #[strong tokenization] - +item Syntax-driven sentence segmentation + +item Support for #[strong #{LANG_COUNT}+ languages] + +item #[strong #{MODEL_COUNT} statistical models] for #{MODEL_LANG_COUNT} languages +item Pre-trained #[strong word vectors] + +item Easy #[strong deep learning] integration +item Part-of-speech tagging +item #[strong Named entity] recognition +item Labelled dependency parsing + +item Syntax-driven sentence segmentation + +item Built in #[strong visualizers] for syntax and NER +item Convenient string-to-hash mapping +item Export to numpy data arrays - +item GIL-free #[strong multi-threading] +item Efficient binary serialization - +item Easy #[strong deep learning] integration - +item Statistical models for #[strong English] and #[strong German] + +item Easy #[strong model packaging] and deployment +item State-of-the-art speed +item Robust, rigorously evaluated accuracy ++landing-banner("Convolutional neural network models", "New in v2.0") + p + | spaCy v2.0 features new neural models for #[strong tagging], + | #[strong parsing] and #[strong entity recognition]. The models have + | been designed and implemented from scratch specifically for spaCy, to + | give you an unmatched balance of speed, size and accuracy. A novel + | bloom embedding strategy with subword features is used to support + | huge vocabularies in tiny tables. Convolutional layers with residual + | connections, layer normalization and maxout non-linearity are used, + | giving much better efficiency than the standard BiLSTM solution. + | Finally, the parser and NER use an imitation learning objective to + | deliver accuracy in-line with the latest research systems, + | even when evaluated from raw text. With these innovations, spaCy + | v2.0's models are #[strong 10× smaller], + | #[strong 20% more accurate], and #[strong just as fast] as the + | previous generation. + + .o-block-small.u-text-right + +button("/models", true, "secondary-light") Download models + ++landing-logos("spaCy is trusted by", logos) + +button(gh("spacy") + "/stargazers", false, "secondary", "small") + | and many more + ++landing-logos("Featured on", features).o-block-small + ++landing-banner("Prodigy: Radically efficient machine teaching", "From the makers of spaCy") + p + | Prodigy is an #[strong annotation tool] so efficient that data scientists can + | do the annotation themselves, enabling a new level of rapid + | iteration. Whether you're working on entity recognition, intent + | detection or image classification, Prodigy can help you + | #[strong train and evaluate] your models faster. Stream in your own examples or + | real-world data from live APIs, update your model in real-time and + | chain models together to build more complex systems. + + .o-block-small.u-text-right + +button("https://prodi.gy", true, "secondary-light") Try it out + +.o-content + +grid + +grid-col("half") + +h(2) Benchmarks + + p + | In 2015, independent researchers from Emory University and + | Yahoo! Labs showed that spaCy offered the + | #[strong fastest syntactic parser in the world] and that its + | accuracy was #[strong within 1% of the best] available + | (#[+a("https://aclweb.org/anthology/P/P15/P15-1038.pdf") Choi et al., 2015]). + | spaCy v2.0, released in 2017, is more accurate than any of + | the systems Choi et al. evaluated. + .o-inline-list - +button("/docs/usage/lightning-tour", true, "secondary") - | See examples + +button("/usage/facts-figures#benchmarks", true, "secondary") See details - .o-block.u-text-center.u-padding - h3.u-text-label.u-color-subtle.o-block spaCy is trusted by - - each row in logos - +grid("center").o-inline-list - each details, name in row - +a(details[0]) - img(src="/assets/img/logos/#{name}.png" alt=name width=(details[1] || 150)).u-padding-small - -.u-pattern.u-padding - +grid.o-card.o-content - +grid-col("quarter") - img(src="/assets/img/profile_matt.png" width="280") - - +grid-col("three-quarters") - +h(2) What's spaCy all about? - - p - | By 2014, I'd been publishing NLP research for about 10 - | years. During that time, I saw a huge gap open between the - | technology that Google-sized companies could take to market, - | and what was available to everyone else. This was especially - | clear when companies started trying to use my research. Like - | most researchers, my work was free to read, but expensive to - | apply. You could run my code, but its requirements were - | narrow. My code's mission in life was to print results - | tables for my papers — it was good at this job, and bad at - | all others. - - p - | spaCy's #[+a("/docs/api/philosophy") mission] is to make - | cutting-edge NLP practical and commonly available. That's - | why I left academia in 2014, to build a production-quality - | open-source NLP library. It's why - | #[+a("https://twitter.com/_inesmontani") Ines] joined the - | project in 2015, to build visualisations, demos and - | annotation tools that make NLP technologies less abstract - | and easier to use. Together, we've founded - | #[+a(COMPANY_URL, true) Explosion AI], to develop data packs - | you can drop into spaCy to extend its capabilities. If - | you're processing Hindi insurance claims, you need a model - | for that. We can build it for you. - - .o-block - +a("https://twitter.com/honnibal") - +svg("graphics", "matt-signature", 60, 45).u-color-theme + +grid-col("half") + include usage/_facts-figures/_benchmarks-choi-2015