From 42bc3ad73bbf4434b799e341903dcea7d9cd8401 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 9 Mar 2019 00:20:29 +0000 Subject: [PATCH] Fix class mapping for morphologizer --- spacy/morphology.pyx | 10 +++++++--- spacy/pipeline/morphologizer.pyx | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index cead0756b..eab7c9e31 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -1066,16 +1066,20 @@ FEATURES = [ FEATURE_NAMES = {get_string_id(name): name for name in FEATURES} FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES} +FIELD_SIZES = Counter(FEATURE_FIELDS.values()) +for field in FIELD_SIZES: + FIELD_SIZES[field] += 1 for feat_id, name in FEATURE_NAMES.items(): FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name] - -FIELD_SIZES = Counter(FEATURE_FIELDS.values()) +# Mapping of feature names to their position in total vector FEATURE_OFFSETS = {} +# Mapping of field names to their first position in total vector. FIELD_OFFSETS = {} _seen_fields = Counter() for i, feature in enumerate(FEATURES): field = FEATURE_FIELDS[feature] - FEATURE_OFFSETS[feature] = _seen_fields[field] + # Add 1 for the NIL class, on each field + FEATURE_OFFSETS[feature] = _seen_fields[field] + 1 if _seen_fields[field] == 0: FIELD_OFFSETS[field] = i _seen_fields[field] += 1 diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 7d0ad42cf..589373f80 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -88,7 +88,7 @@ class Morphologizer(Pipe): if doc_guesses[j, k] == 0: doc_feat_ids[j, k] = 0 else: - doc_feat_ids[j, k] = offset + doc_guesses[j, k] + doc_feat_ids[j, k] = offset + (doc_guesses[j, k]-1) # Get the set of feature names. feats = {FEATURES[f] for f in doc_feat_ids[j] if f != 0} # Now add the analysis, and set the hash.