From a2134951f292bd88c13287e74da190af17fffc02 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 29 May 2017 11:45:32 +0200 Subject: [PATCH] Update 101 and add note on pipeline order and tensors --- website/docs/usage/_data.json | 2 +- website/docs/usage/_spacy-101/_pipelines.jade | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json index 79d0b28f1..3d344eb2a 100644 --- a/website/docs/usage/_data.json +++ b/website/docs/usage/_data.json @@ -42,7 +42,7 @@ }, "spacy-101": { - "title": "spaCy 101", + "title": "spaCy 101 – Everything you need to know", "next": "lightning-tour", "quickstart": true }, diff --git a/website/docs/usage/_spacy-101/_pipelines.jade b/website/docs/usage/_spacy-101/_pipelines.jade index 654ca86e4..c21c9f97c 100644 --- a/website/docs/usage/_spacy-101/_pipelines.jade +++ b/website/docs/usage/_spacy-101/_pipelines.jade @@ -63,3 +63,16 @@ p +code(false, "json"). "pipeline": ["tensorizer", "tagger", "parser", "ner"] + +p + | Although you can mix and match pipeline components, their + | #[strong order and combination] is usually important. Some components may + | require certain modifications on the #[code Doc] to process it. For + | example, the default pipeline first applies the tensorizer, which + | pre-processes the doc and encodes its internal + | #[strong meaning representations] as an array of floats, also called a + | #[strong tensor]. This includes the tokens and their context, which is + | required for the next component, the tagger, to make predictions of the + | part-of-speech tags. Because spaCy's models are neural network models, + | they only "speak" tensors and expect the input #[code Doc] to have + | a #[code tensor].