Add TextCategorizer API docs stub

2017-07-22 17:56:33 +02:00 · 2017-07-22 17:56:33 +02:00 · f085b88f9d
parent ab1a4e8b3c
commit f085b88f9d
2 changed files with 28 additions and 0 deletions
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@ -21,6 +21,7 @@
            "Tagger": "tagger",
            "DependencyParser": "dependencyparser",
            "EntityRecognizer": "entityrecognizer",
+            "TextCategorizer": "textcategorizer",
            "Matcher": "matcher",
            "Lexeme": "lexeme",
            "Vocab": "vocab",
@ -130,6 +131,12 @@
        "source": "spacy/pipeline.pyx"
    },

+    "textcategorizer": {
+        "title": "TextCategorizer",
+        "tag": "class",
+        "source": "spacy/pipeline.pyx"
+    },
+
    "dependencyparser": {
        "title": "DependencyParser",
        "tag": "class",
--- a/website/docs/api/textcategorizer.jade
+++ b/website/docs/api/textcategorizer.jade
@ -0,0 +1,21 @@
+//- 💫 DOCS > API > TEXTCATEGORIZER
+
+include ../../_includes/_mixins
+
+p
+    |  Add text categorization models to spaCy pipelines. The model supports
+    |  classification with multiple, non-mutually exclusive labels.
+
+p
+    |  You can change the model architecture rather easily, but by default, the
+    |  #[code TextCategorizer] class uses a convolutional neural network to
+    |  assign position-sensitive vectors to each word in the document. This step
+    |  is similar to the #[+api("tensorizer") #[code Tensorizer]] component, but the
+    |  #[code TextCategorizer] uses its own CNN model, to avoid sharing weights
+    |  with the other pipeline components. The document tensor is then
+    |  summarized by concatenating max and mean pooling, and a multilayer
+    |  perceptron is used to predict an output vector of length #[code nr_class],
+    |  before a logistic activation is applied elementwise. The value of each
+    |  output neuron is the probability that some class is present.
+
+under-construction