* Make Matcher use attrs from the attrs.pyx file, rather than having an incomplete function doing the mapping.

This commit is contained in:
Matthew Honnibal 2016-04-14 10:37:39 +02:00
parent 61d20de35d
commit 108aca0e50
1 changed files with 4 additions and 18 deletions

View File

@ -15,6 +15,7 @@ from libcpp.vector cimport vector
from murmurhash.mrmr cimport hash64 from murmurhash.mrmr cimport hash64
from .attrs cimport LENGTH, ENT_TYPE, ORTH, NORM, LEMMA, LOWER, SHAPE from .attrs cimport LENGTH, ENT_TYPE, ORTH, NORM, LEMMA, LOWER, SHAPE
from . import attrs
from .tokens.doc cimport get_token_attr from .tokens.doc cimport get_token_attr
from .tokens.doc cimport Doc from .tokens.doc cimport Doc
from .vocab cimport Vocab from .vocab cimport Vocab
@ -111,12 +112,13 @@ def _convert_strings(token_specs, string_store):
converted.append([]) converted.append([])
for attr, value in spec.items(): for attr, value in spec.items():
if isinstance(attr, basestring): if isinstance(attr, basestring):
attr = map_attr_name(attr) attr = attrs.IDS.get(attr)
if isinstance(value, basestring): if isinstance(value, basestring):
value = string_store[value] value = string_store[value]
if isinstance(value, bool): if isinstance(value, bool):
value = int(value) value = int(value)
converted[-1].append((attr, value)) if attr is not None:
converted[-1].append((attr, value))
return converted return converted
@ -146,22 +148,6 @@ def get_bilou(length):
raise ValueError("Max length currently 10 for phrase matching") raise ValueError("Max length currently 10 for phrase matching")
def map_attr_name(attr):
attr = attr.upper()
if attr == 'ORTH':
return ORTH
elif attr == 'LEMMA':
return LEMMA
elif attr == 'LOWER':
return LOWER
elif attr == 'SHAPE':
return SHAPE
elif attr == 'NORM':
return NORM
else:
raise Exception("TODO: Finish supporting attr mapping %s" % attr)
cdef class Matcher: cdef class Matcher:
cdef Pool mem cdef Pool mem
cdef vector[Pattern*] patterns cdef vector[Pattern*] patterns