Update training section in NER guide and add links

2017-06-01 11:52:49 +02:00 · 2017-06-01 11:52:49 +02:00 · 72380c952a
parent d5c8d2f5fd
commit 72380c952a
1 changed files with 15 additions and 26 deletions
--- a/website/docs/usage/entity-recognition.jade
+++ b/website/docs/usage/entity-recognition.jade
@ -154,40 +154,29 @@ p
    |  To provide training examples to the entity recogniser, you'll first need
    |  to create an instance of the #[+api("goldparse") #[code GoldParse]] class.
    |  You can specify your annotations in a stand-off format or as token tags.
-
-+code.
-    import random
-    import spacy
-    from spacy.gold import GoldParse
-    from spacy.pipeline import EntityRecognizer
-
-    train_data = [('Who is Chaka Khan?', [(7, 17, 'PERSON')]),
-                  ('I like London and Berlin.', [(7, 13, 'LOC'), (18, 24, 'LOC')])]
-
-    nlp = spacy.load('en', entity=False, parser=False)
-    ner = EntityRecognizer(nlp.vocab, entity_types=['PERSON', 'LOC'])
-
-    for itn in range(5):
-        random.shuffle(train_data)
-        for raw_text, entity_offsets in train_data:
-            doc = nlp.make_doc(raw_text)
-            gold = GoldParse(doc, entities=entity_offsets)
-
-            nlp.tagger(doc)
-            ner.update(doc, gold)
-
-p
    |  If a character offset in your entity annotations don't fall on a token
    |  boundary, the #[code GoldParse] class will treat that annotation as a
    |  missing value.  This allows for more realistic training, because the
    |  entity recogniser is allowed to learn from examples that may feature
    |  tokenizer errors.

-+aside-code("Example").
+code.
+    train_data = [('Who is Chaka Khan?', [(7, 17, 'PERSON')]),
+                  ('I like London and Berlin.', [(7, 13, 'LOC'), (18, 24, 'LOC')])]
+
+code.
    doc = Doc(nlp.vocab, [u'rats', u'make', u'good', u'pets'])
    gold = GoldParse(doc, [u'U-ANIMAL', u'O', u'O', u'O'])
-    ner = EntityRecognizer(nlp.vocab, entity_types=['ANIMAL'])
-    ner.update(doc, gold)
+
+infobox
+    |  For more details on #[strong training and updating] the named entity
+    |  recognizer, see the usage guides on #[+a("/docs/usage/training") training]
+    |  and #[+a("/docs/usage/training-ner") training the named entity recognizer],
+    |  or check out the runnable
+    |  #[+src(gh("spaCy", "examples/training/train_ner.py")) training script]
+    |  on GitHub.
+
+h(3, "updating-biluo") The BILUO Scheme

 p
    |  You can also provide token-level entity annotation, using the