From ab70f6e18d6c84b78815893f20fae29d3d0fd661 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 27 Jan 2017 12:27:10 +0100 Subject: [PATCH] Update NER training example --- examples/training/train_ner.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/training/train_ner.py b/examples/training/train_ner.py index 220244b93..bcc087d07 100644 --- a/examples/training/train_ner.py +++ b/examples/training/train_ner.py @@ -8,6 +8,12 @@ from spacy.pipeline import EntityRecognizer from spacy.gold import GoldParse from spacy.tagger import Tagger + +try: + unicode +except: + unicode = str + def train_ner(nlp, train_data, entity_types): # Add new words to vocab. @@ -24,7 +30,6 @@ def train_ner(nlp, train_data, entity_types): doc = nlp.make_doc(raw_text) gold = GoldParse(doc, entities=entity_offsets) ner.update(doc, gold) - ner.model.end_training() return ner def save_model(ner, model_dir): @@ -33,8 +38,11 @@ def save_model(ner, model_dir): model_dir.mkdir() assert model_dir.is_dir() - with (model_dir / 'config.json').open('w') as file_: - json.dump(ner.cfg, file_) + with (model_dir / 'config.json').open('wb') as file_: + data = json.dumps(ner.cfg) + if isinstance(data, unicode): + data = data.encode('utf8') + file_.write(data) ner.model.dump(str(model_dir / 'model')) if not (model_dir / 'vocab').exists(): (model_dir / 'vocab').mkdir()