From dae6bc05ebfbc6c0db024ad131cf3a91842d468e Mon Sep 17 00:00:00 2001 From: Wolfgang Seeker Date: Mon, 2 May 2016 16:04:53 +0200 Subject: [PATCH 1/3] define German dummy lemmatizer until morphology is done --- bin/parser/train.py | 2 -- spacy/de/__init__.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bin/parser/train.py b/bin/parser/train.py index aa916d1e0..372c7932e 100755 --- a/bin/parser/train.py +++ b/bin/parser/train.py @@ -111,8 +111,6 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', gold_tuples = gold_tuples[:n_sents] nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False) - if nlp.lang == 'de': - nlp.vocab.morphology.lemmatizer = lambda string,pos: set([string]) nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates()) nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager) nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown) diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index 76817ccff..e76431b15 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -3,7 +3,18 @@ from __future__ import unicode_literals, print_function from os import path from ..language import Language +from ..vocab import Vocab +from .. import attrs +from .. import util +from .. import about class German(Language): lang = 'de' + + @classmethod + def default_vocab(cls, package, get_lex_attr=None, vectors_package=None): + vocab = super(German,cls).default_vocab(package,get_lex_attr,vectors_package) + # for now until the morphology is done for German + vocab.morphology.lemmatizer = lambda string,pos: set([string]) + return vocab From 857454ffa0c7d66a788d15e1f6feb1ba6ba1ba59 Mon Sep 17 00:00:00 2001 From: Wolfgang Seeker Date: Mon, 2 May 2016 17:10:41 +0200 Subject: [PATCH 2/3] fix indentation -.- --- spacy/de/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index e76431b15..f2a173e71 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -12,9 +12,10 @@ from .. import about class German(Language): lang = 'de' - @classmethod + @classmethod def default_vocab(cls, package, get_lex_attr=None, vectors_package=None): vocab = super(German,cls).default_vocab(package,get_lex_attr,vectors_package) - # for now until the morphology is done for German + # set a dummy lemmatizer for now that simply returns the same string + # until the morphology is done for German vocab.morphology.lemmatizer = lambda string,pos: set([string]) return vocab From 92bfbebeecdb08a77bee546dce9c67a725be1a6d Mon Sep 17 00:00:00 2001 From: Wolfgang Seeker Date: Mon, 2 May 2016 17:33:22 +0200 Subject: [PATCH 3/3] remove unnecessary imports --- spacy/de/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index f2a173e71..8a2f809ff 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -3,10 +3,6 @@ from __future__ import unicode_literals, print_function from os import path from ..language import Language -from ..vocab import Vocab -from .. import attrs -from .. import util -from .. import about class German(Language):