From 9fd471372a7e804fdd5402a6095404f71b947ed0 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 11 Oct 2017 13:25:51 +0200 Subject: [PATCH] Add lookup lemmatizer to lemmatizer as lookup() method --- spacy/lemmatizer.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index 6c0fb6356..1fb83a727 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -10,10 +10,11 @@ class Lemmatizer(object): def load(cls, path, index=None, exc=None, rules=None): return cls(index or {}, exc or {}, rules or {}) - def __init__(self, index, exceptions, rules): - self.index = index - self.exc = exceptions - self.rules = rules + def __init__(self, index=None, exceptions=None, rules=None, lookup=None): + self.index = index if index is not None else {} + self.exc = exceptions if exceptions is not None else {} + self.rules = rules if rules is not None else {} + self.lookup_table = lookup if lookup is not None else {} def __call__(self, string, univ_pos, morphology=None): if univ_pos == NOUN: @@ -79,6 +80,11 @@ class Lemmatizer(object): def punct(self, string, morphology=None): return self(string, 'punct', morphology) + def lookup(self, string): + if string in self.lookup_table: + return self.lookup_table[string] + return string + def lemmatize(string, index, exceptions, rules): string = string.lower() @@ -102,18 +108,3 @@ def lemmatize(string, index, exceptions, rules): if not forms: forms.append(string) return set(forms) - - -class LookupLemmatizer(Lemmatizer): - @classmethod - def load(cls, path, lookup): - return cls(lookup or {}) - - def __init__(self, lookup): - self.lookup = lookup - - def __call__(self, string, univ_pos, morphology=None): - try: - return set([self.lookup[string]]) - except: - return set([string])