From 108aca0e50bd7edd2fe8bd902e5c5d98ce998bfb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 14 Apr 2016 10:37:39 +0200 Subject: [PATCH] * Make Matcher use attrs from the attrs.pyx file, rather than having an incomplete function doing the mapping. --- spacy/matcher.pyx | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index c85dbd989..6b50d9a40 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -15,6 +15,7 @@ from libcpp.vector cimport vector from murmurhash.mrmr cimport hash64 from .attrs cimport LENGTH, ENT_TYPE, ORTH, NORM, LEMMA, LOWER, SHAPE +from . import attrs from .tokens.doc cimport get_token_attr from .tokens.doc cimport Doc from .vocab cimport Vocab @@ -111,12 +112,13 @@ def _convert_strings(token_specs, string_store): converted.append([]) for attr, value in spec.items(): if isinstance(attr, basestring): - attr = map_attr_name(attr) + attr = attrs.IDS.get(attr) if isinstance(value, basestring): value = string_store[value] if isinstance(value, bool): value = int(value) - converted[-1].append((attr, value)) + if attr is not None: + converted[-1].append((attr, value)) return converted @@ -146,22 +148,6 @@ def get_bilou(length): raise ValueError("Max length currently 10 for phrase matching") -def map_attr_name(attr): - attr = attr.upper() - if attr == 'ORTH': - return ORTH - elif attr == 'LEMMA': - return LEMMA - elif attr == 'LOWER': - return LOWER - elif attr == 'SHAPE': - return SHAPE - elif attr == 'NORM': - return NORM - else: - raise Exception("TODO: Finish supporting attr mapping %s" % attr) - - cdef class Matcher: cdef Pool mem cdef vector[Pattern*] patterns