diff --git a/website/docs/api/_data.json b/website/docs/api/_data.json
index 443ee9a67..f3f996846 100644
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@@ -27,8 +27,7 @@
             "GoldCorpus": "goldcorpus"
         },
         "Other": {
-            "Annotation Specs": "annotation",
-            "Feature Scheme": "features"
+            "Annotation Specs": "annotation"
         }
     },
 
@@ -143,9 +142,5 @@
 
     "annotation": {
         "title": "Annotation Specifications"
-    },
-
-    "features": {
-        "title": "Linear Model Feature Scheme"
     }
 }
diff --git a/website/docs/api/features.jade b/website/docs/api/features.jade
deleted file mode 100644
index 018790145..000000000
--- a/website/docs/api/features.jade
+++ /dev/null
@@ -1,138 +0,0 @@
-//- 💫 DOCS > API > LINEAR MOEL FEATURES
-
-include ../../_includes/_mixins
-
-p
-    |  There are two popular strategies for putting together machine learning
-    |  models for NLP: sparse linear models, and neural networks. To solve NLP
-    |  problems with linear models, feature templates need to be assembled that
-    |  combine multiple atomic predictors. This page documents the atomic
-    |  predictors used in the spaCy 1.0 #[+api("parser") #[code Parser]],
-    |  #[+api("tagger") #[code Tagger]] and
-    |  #[+api("entityrecognizer") #[code EntityRecognizer]].
-
-p
-    |  To understand the scheme, recall that spaCy's #[code Parser] and
-    |  #[code EntityRecognizer] are implemented as push-down automata. They
-    |  maintain a "stack" that holds the current entity, and a "buffer"
-    |  consisting of the words to be processed.
-
-p
-    |  Each state consists of the words on the stack (if any), which consistute
-    |  the current entity being constructed. We also have the current word, and
-    |  the two subsequent words. Finally, we also have the entities previously
-    |  built.
-
-p
-    |  This gives us a number of tokens to ask questions about, to make the
-    |  features. About each of these tokens, we can ask about a number of
-    |  different properties. Each feature identifier asks about a specific
-    |  property of a specific token of the context.
-
-+h(2, "tokens") Context tokens
-
-+table([ "ID", "Description" ])
-    +row
-        +cell #[code S0]
-        +cell
-            |  The first word on the stack, i.e. the token most recently added
-            |  to the current entity.
-
-    +row
-        +cell #[code S1]
-        +cell The second word on the stack, i.e. the second most recently added.
-
-    +row
-        +cell #[code S2]
-        +cell The third word on the stack, i.e. the third most recently added.
-
-    +row
-        +cell #[code N0]
-        +cell The first word of the buffer, i.e. the current word being tagged.
-
-    +row
-        +cell #[code N1]
-        +cell The second word of the buffer.
-
-    +row
-        +cell #[code N2]
-        +cell The third word of the buffer.
-
-    +row
-        +cell #[code P1]
-        +cell The word immediately before #[code N0].
-
-    +row
-        +cell #[code P2]
-        +cell The second word before #[code N0].
-
-    +row
-        +cell #[code E0]
-        +cell The first word of the previously constructed entity.
-
-    +row
-        +cell #[code E1]
-        +cell The first word of the second previously constructed entity.
-
-p About each of these tokens, we can ask:
-
-+table([ "ID", "Attribute", "Description" ])
-    +row
-        +cell #[code N0w]
-        +cell #[code token.orth]
-        +cell The word form.
-
-    +row
-        +cell #[code N0W]
-        +cell #[code token.lemma]
-        +cell The word's lemma.
-
-    +row
-        +cell #[code N0p]
-        +cell #[code token.tag]
-        +cell The word's (full) POS tag.
-
-    +row
-        +cell #[code N0c]
-        +cell #[code token.cluster]
-        +cell The word's (full) Brown cluster.
-
-    +row
-        +cell #[code N0c4]
-        +cell -
-        +cell First four digit prefix of the word's Brown cluster.
-
-    +row
-        +cell #[code N0c6]
-        +cell -
-        +cell First six digit prefix of the word's Brown cluster.
-
-    +row
-        +cell #[code N0L]
-        +cell -
-        +cell The word's dependency label. Not used as a feature in the NER.
-
-    +row
-        +cell #[code N0_prefix]
-        +cell #[code token.prefix]
-        +cell The first three characters of the word.
-
-    +row
-        +cell #[code N0_suffix]
-        +cell #[code token.suffix]
-        +cell The last three characters of the word.
-
-    +row
-        +cell #[code N0_shape]
-        +cell #[code token.shape]
-        +cell The word's shape, i.e. is it alphabetic, numeric, etc.
-
-    +row
-        +cell #[code N0_ne_iob]
-        +cell #[code token.ent_iob]
-        +cell The Inside/Outside/Begin code of the word's NER tag.
-
-    +row
-        +cell #[code N0_ne_type]
-        +cell #[code token.ent_type]
-        +cell The word's NER type.
diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json
index 4d065522b..3a24a38df 100644
--- a/website/docs/usage/_data.json
+++ b/website/docs/usage/_data.json
@@ -15,9 +15,9 @@
             "Custom tokenization": "customizing-tokenizer",
             "Rule-based matching": "rule-based-matching",
             "Adding languages": "adding-languages",
-            "Processing text": "processing-text",
             "NLP pipelines": "language-processing-pipeline",
             "Deep learning": "deep-learning",
+            "Production use": "production-use",
             "Training": "training",
             "Training NER": "training-ner",
             "Saving & loading": "saving-loading",
@@ -99,11 +99,6 @@
         "next": "training"
     },
 
-    "processing-text": {
-        "title": "Processing text",
-        "next": "language-processing-pipeline"
-    },
-
     "language-processing-pipeline": {
         "title": "Language processing pipelines",
         "next": "deep-learning"
@@ -111,9 +106,15 @@
 
     "deep-learning": {
         "title": "Hooking a deep learning model into spaCy",
+        "next": "production use"
+    },
+
+    "production-use": {
+        "title": "Production use",
         "next": "training"
     },
 
+
     "training": {
         "title": "Training spaCy's statistical models",
         "next": "saving-loading"
diff --git a/website/docs/usage/processing-text.jade b/website/docs/usage/production-use.jade
similarity index 58%
rename from website/docs/usage/processing-text.jade
rename to website/docs/usage/production-use.jade
index 2562d9fc4..68a313d8a 100644
--- a/website/docs/usage/processing-text.jade
+++ b/website/docs/usage/production-use.jade
@@ -6,69 +6,6 @@ p
     |  Once you have loaded the #[code nlp] object, you can call it as though
     |  it were a function. This allows you to process a single unicode string.
 
-+code.
-    doc = nlp(u'Hello, world! A three sentence document.\nWith new lines...')
-
-p
-    |  The library should perform equally well with #[strong short or long documents].
-    |  All algorithms are linear-time in the length of the string, and once the
-    |  data is loaded, there's no significant start-up cost to consider. This
-    |  means that you don't have to strategically merge or split your text —
-    |  you should feel free to feed in either single tweets or whole novels.
-
-p
-    |  If you run #[+api("spacy#load") #[code spacy.load('en')]], spaCy will
-    |  load the #[+a("/docs/usage/models") model] associated with the name
-    |  #[code 'en']. Each model is a Python package containing an
-    |  #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py"))__init__.py]
-
-the #[code nlp] object will
-    |  be an instance of #[code spacy.en.English]. This means that when you run
-    |  #[code doc = nlp(text)], you're executing
-    |  #[code spacy.en.English.__call__], which is implemented on its parent
-    |  class, #[+api("language") #[code Language]].
-
-+code.
-    doc = nlp.make_doc(text)
-    for proc in nlp.pipeline:
-        proc(doc)
-
-p
-    |  I've tried to make sure that the #[code Language.__call__] function
-    |  doesn't do any "heavy lifting", so that you won't have complicated logic
-    |  to replicate if you need to make your own pipeline class. This is all it
-    |  does.
-
-p
-    |  The #[code .make_doc()] method and #[code .pipeline] attribute make it
-    |  easier to customise spaCy's behaviour. If you're using the default
-    |  pipeline, we can desugar one more time.
-
-+code.
-    doc = nlp.tokenizer(text)
-    nlp.tagger(doc)
-    nlp.parser(doc)
-    nlp.entity(doc)
-
-p Finally, here's where you can find out about each of those components:
-
-+table(["Name", "Source"])
-    +row
-        +cell #[code tokenizer]
-        +cell #[+src(gh("spacy", "spacy/tokenizer.pyx")) spacy.tokenizer.Tokenizer]
-
-    +row
-        +cell #[code tagger]
-        +cell #[+src(gh("spacy", "spacy/tagger.pyx")) spacy.pipeline.Tagger]
-
-    +row
-        +cell #[code parser]
-        +cell #[+src(gh("spacy", "spacy/syntax/parser.pyx")) spacy.pipeline.DependencyParser]
-
-    +row
-        +cell #[code entity]
-        +cell #[+src(gh("spacy", "spacy/syntax/parser.pyx")) spacy.pipeline.EntityRecognizer]
-
 +h(2, "multithreading") Multi-threading with #[code .pipe()]
 
 p