Add lookup lemmatizer to lemmatizer as lookup() method

This commit is contained in:
ines 2017-10-11 13:25:51 +02:00
parent c1d6d43c83
commit 9fd471372a
1 changed files with 10 additions and 19 deletions

View File

@ -10,10 +10,11 @@ class Lemmatizer(object):
def load(cls, path, index=None, exc=None, rules=None): def load(cls, path, index=None, exc=None, rules=None):
return cls(index or {}, exc or {}, rules or {}) return cls(index or {}, exc or {}, rules or {})
def __init__(self, index, exceptions, rules): def __init__(self, index=None, exceptions=None, rules=None, lookup=None):
self.index = index self.index = index if index is not None else {}
self.exc = exceptions self.exc = exceptions if exceptions is not None else {}
self.rules = rules self.rules = rules if rules is not None else {}
self.lookup_table = lookup if lookup is not None else {}
def __call__(self, string, univ_pos, morphology=None): def __call__(self, string, univ_pos, morphology=None):
if univ_pos == NOUN: if univ_pos == NOUN:
@ -79,6 +80,11 @@ class Lemmatizer(object):
def punct(self, string, morphology=None): def punct(self, string, morphology=None):
return self(string, 'punct', morphology) return self(string, 'punct', morphology)
def lookup(self, string):
if string in self.lookup_table:
return self.lookup_table[string]
return string
def lemmatize(string, index, exceptions, rules): def lemmatize(string, index, exceptions, rules):
string = string.lower() string = string.lower()
@ -102,18 +108,3 @@ def lemmatize(string, index, exceptions, rules):
if not forms: if not forms:
forms.append(string) forms.append(string)
return set(forms) return set(forms)
class LookupLemmatizer(Lemmatizer):
@classmethod
def load(cls, path, lookup):
return cls(lookup or {})
def __init__(self, lookup):
self.lookup = lookup
def __call__(self, string, univ_pos, morphology=None):
try:
return set([self.lookup[string]])
except:
return set([string])