Modify morphology to support arbitrary features (#4932)

* Restructure tag maps for MorphAnalysis changes

Prepare tag maps for upcoming MorphAnalysis changes that allow
arbritrary features.

* Use default tag map rather than duplicating for ca / uk / vi

* Import tag map into defaults for ga

* Modify tag maps so all morphological fields and features are strings
  * Move features from `"Other"` to the top level
  * Rewrite tuples as strings separated by `","`

* Rewrite morph symbols for fr lemmatizer as strings

* Export MorphAnalysis under spacy.tokens

* Modify morphology to support arbitrary features

Modify `Morphology` and `MorphAnalysis` so that arbitrary features are
supported.

* Modify `MorphAnalysisC` so that it can support arbitrary features and
multiple values per field. `MorphAnalysisC` is redesigned to contain:
  * key: hash of UD FEATS string of morphological features
  * array of `MorphFeatureC` structs that each contain a hash of `Field`
and `Field=Value` for a given morphological feature, which makes it
possible to:
    * find features by field
    * represent multiple values for a given field

* `get_field()` is renamed to `get_by_field()` and is no longer `nogil`.
Instead a new helper function `get_n_by_field()` is `nogil` and returns
`n` features by field.

* `MorphAnalysis.get()` returns all possible values for a field as a
list of individual features such as `["Tense=Pres", "Tense=Past"]`.

* `MorphAnalysis`'s `str()` and `repr()` are the UD FEATS string.

* `Morphology.feats_to_dict()` converts a UD FEATS string to a dict
where:
  * Each field has one entry in the dict
  * Multiple values remain separated by a separator in the value string

* `Token.morph_` returns the UD FEATS string and you can set
`Token.morph_` with a UD FEATS string or with a tag map dict.

* Modify get_by_field to use np.ndarray

Modify `get_by_field()` to use np.ndarray. Remove `max_results` from
`get_n_by_field()` and always iterate over all the fields.

* Rewrite without MorphFeatureC

* Add shortcut for existing feats strings as keys

Add shortcut for existing feats strings as keys in `Morphology.add()`.

* Check for '_' as empty analysis when adding morphs

* Extend helper converters in Morphology

Add and extend helper converters that convert and normalize between:

* UD FEATS strings (`"Case=dat,gen|Number=sing"`)
* per-field dict of feats (`{"Case": "dat,gen", "Number": "sing"}`)
* list of individual features (`["Case=dat", "Case=gen",
"Number=sing"]`)

All converters sort fields and values where applicable.
This commit is contained in:
adrianeboyd 2020-01-23 22:01:54 +01:00 committed by Matthew Honnibal
parent 0a0de85409
commit adc9745718
27 changed files with 1080 additions and 2254 deletions

View File

@ -103,6 +103,9 @@ class Warnings(object):
W027 = ("Found a large training file of {size} bytes. Note that it may "
"be more efficient to split your training data into multiple "
"smaller JSON files instead.")
W028 = ("Skipping unsupported morphological feature(s): {feature}. "
"Provide features as a dict {{\"Field1\": \"Value1,Value2\"}} or "
"string \"Field1=Value1,Value2|Field2=Value3\".")
@add_codes

View File

@ -11,8 +11,8 @@ TAG_MAP = {
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
":": {POS: PUNCT},
"": {POS: SYM, "Other": {"SymType": "currency"}},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
"": {POS: SYM, "SymType": "currency"},
"#": {POS: SYM, "SymType": "numbersign"},
"AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CONJ, "ConjType": "coor"},
"CD": {POS: NUM, "NumType": "card"},

View File

@ -1,25 +0,0 @@
from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
TAG_MAP = {
"ADV": {POS: ADV},
"NOUN": {POS: NOUN},
"ADP": {POS: ADP},
"PRON": {POS: PRON},
"SCONJ": {POS: SCONJ},
"PROPN": {POS: PROPN},
"DET": {POS: DET},
"SYM": {POS: SYM},
"INTJ": {POS: INTJ},
"PUNCT": {POS: PUNCT},
"NUM": {POS: NUM},
"AUX": {POS: AUX},
"X": {POS: X},
"CONJ": {POS: CONJ},
"CCONJ": {POS: CCONJ},
"ADJ": {POS: ADJ},
"VERB": {POS: VERB},
"PART": {POS: PART},
"SP": {POS: SPACE},
}

View File

@ -4,7 +4,6 @@ from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .morph_rules import MORPH_RULES
from ..tag_map import TAG_MAP
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
@ -24,7 +23,6 @@ class DanishDefaults(Language.Defaults):
morph_rules = MORPH_RULES
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
tag_map = TAG_MAP
stop_words = STOP_WORDS

View File

@ -656,7 +656,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Plur",
"Case": "Acc",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfFePlGe": {
POS: DET,
@ -664,7 +664,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Plur",
"Case": "Gen",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfFePlNm": {
POS: DET,
@ -672,7 +672,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Plur",
"Case": "Nom",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfFeSgAc": {
POS: DET,
@ -680,7 +680,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Acc",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfFeSgDa": {
POS: DET,
@ -688,7 +688,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Dat",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfFeSgGe": {
POS: DET,
@ -696,7 +696,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Gen",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfFeSgNm": {
POS: DET,
@ -704,7 +704,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Nom",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaPlAc": {
POS: DET,
@ -712,7 +712,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Plur",
"Case": "Acc",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaPlGe": {
POS: DET,
@ -720,7 +720,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Plur",
"Case": "Gen",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaPlNm": {
POS: DET,
@ -728,7 +728,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Plur",
"Case": "Nom",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaSgAc": {
POS: DET,
@ -736,7 +736,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Acc",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaSgDa": {
POS: DET,
@ -744,7 +744,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Dat",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaSgGe": {
POS: DET,
@ -752,7 +752,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Gen",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfMaSgNm": {
POS: DET,
@ -760,7 +760,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Nom",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNePlAc": {
POS: DET,
@ -768,7 +768,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Plur",
"Case": "Acc",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNePlDa": {
POS: DET,
@ -776,7 +776,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Plur",
"Case": "Dat",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNePlGe": {
POS: DET,
@ -784,7 +784,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Plur",
"Case": "Gen",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNePlNm": {
POS: DET,
@ -792,7 +792,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Plur",
"Case": "Nom",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNeSgAc": {
POS: DET,
@ -800,7 +800,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Acc",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNeSgDa": {
POS: DET,
@ -808,7 +808,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Dat",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNeSgGe": {
POS: DET,
@ -816,7 +816,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Gen",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtDfNeSgNm": {
POS: DET,
@ -824,7 +824,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Nom",
"Other": {"Definite": "Def"},
"Definite": "Def",
},
"AtIdFeSgAc": {
POS: DET,
@ -832,7 +832,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Acc",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdFeSgDa": {
POS: DET,
@ -840,7 +840,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Dat",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdFeSgGe": {
POS: DET,
@ -848,7 +848,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Gen",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdFeSgNm": {
POS: DET,
@ -856,7 +856,7 @@ TAG_MAP = {
"Gender": "Fem",
"Number": "Sing",
"Case": "Nom",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdMaSgAc": {
POS: DET,
@ -864,7 +864,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Acc",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdMaSgGe": {
POS: DET,
@ -872,7 +872,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Gen",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdMaSgNm": {
POS: DET,
@ -880,7 +880,7 @@ TAG_MAP = {
"Gender": "Masc",
"Number": "Sing",
"Case": "Nom",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdNeSgAc": {
POS: DET,
@ -888,7 +888,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Acc",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdNeSgGe": {
POS: DET,
@ -896,7 +896,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Gen",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"AtIdNeSgNm": {
POS: DET,
@ -904,7 +904,7 @@ TAG_MAP = {
"Gender": "Neut",
"Number": "Sing",
"Case": "Nom",
"Other": {"Definite": "Ind"},
"Definite": "Ind",
},
"CjCo": {POS: CCONJ},
"CjSb": {POS: SCONJ},

View File

@ -1,7 +1,6 @@
from ...lemmatizer import Lemmatizer
from ...symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP
from ...symbols import SCONJ, CCONJ
from ...symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
class FrenchLemmatizer(Lemmatizer):
@ -82,13 +81,13 @@ class FrenchLemmatizer(Lemmatizer):
return True
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
return True
elif VerbForm_inf in morphology:
elif "VerbForm=inf" in morphology:
return True
elif VerbForm_none in morphology:
elif "VerbForm=none" in morphology:
return True
elif Number_sing in morphology:
elif "Number=sing" in morphology:
return True
elif Degree_pos in morphology:
elif "Degree=pos" in morphology:
return True
else:
return False

View File

@ -1,5 +1,6 @@
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .tag_map import TAG_MAP
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
@ -13,6 +14,7 @@ class IrishDefaults(Language.Defaults):
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS)
tag_map = TAG_MAP
class Irish(Language):

View File

@ -1,26 +1,26 @@
# fmt: off
TAG_MAP = {
"ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"},
"ADJ__Case=Gen|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "fem", "Number": "sing"},
"ADJ__Case=Gen|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing"},
"ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "strong"}},
"ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "weak"}},
"ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "NounType": "strong"},
"ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "NounType": "weak"},
"ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"},
"ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"},
"ADJ__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
"ADJ__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
"ADJ__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
"ADJ__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
"ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "notslender"}},
"ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "slender"}},
"ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Other": {"Form": "len"}},
"ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "NounType": "notslender"},
"ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "NounType": "slender"},
"ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Form": "len"},
"ADJ__Degree=Cmp,Sup": {"pos": "ADJ", "Degree": "cmp|sup"},
"ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "ecl"}},
"ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "hpref"}},
"ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "len"}},
"ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Form": "ecl"},
"ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Form": "hpref"},
"ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Form": "len"},
"ADJ__Degree=Pos": {"pos": "ADJ", "Degree": "pos"},
"ADJ__Foreign=Yes": {"pos": "ADJ", "Foreign": "yes"},
"ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Other": {"Form": "len"}},
"ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Form": "len"},
"ADJ__Gender=Masc|Number=Sing|PartType=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
"ADJ__Gender=Masc|Number=Sing|Case=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
"ADJ__Number=Plur|PartType=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
@ -29,9 +29,9 @@ TAG_MAP = {
"ADJ___": {"pos": "ADJ"},
"ADJ__VerbForm=Part": {"pos": "ADJ", "VerbForm": "part"},
"ADP__Foreign=Yes": {"pos": "ADP", "Foreign": "yes"},
"ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Other": {"Form": "len"}},
"ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
"ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
"ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Form": "len"},
"ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Form": "len"},
"ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Form": "len"},
"ADP__Gender=Fem|Number=Sing|Person=3": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3},
"ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
"ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
@ -57,41 +57,41 @@ TAG_MAP = {
"ADP__Person=3|Poss=Yes": {"pos": "ADP", "Person": 3, "Poss": "yes"},
"ADP___": {"pos": "ADP"},
"ADP__Poss=Yes": {"pos": "ADP", "Poss": "yes"},
"ADP__PrepForm=Cmpd": {"pos": "ADP", "Other": {"PrepForm": "cmpd"}},
"ADP__PrepForm=Cmpd": {"pos": "ADP", "PrepForm": "cmpd"},
"ADP__PronType=Art": {"pos": "ADP", "PronType": "art"},
"ADV__Form=Len": {"pos": "ADV", "Other": {"Form": "len"}},
"ADV__Form=Len": {"pos": "ADV", "Form": "len"},
"ADV___": {"pos": "ADV"},
"ADV__PronType=Int": {"pos": "ADV", "PronType": "int"},
"AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
"AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
"AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
"AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
"AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Form": "vf", "VerbForm": "cop"}},
"AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"VerbForm": "cop"}},
"AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Other": {"Mood": "int", "VerbForm": "cop"}},
"AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Mood": "int", "VerbForm": "cop"}},
"AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
"AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
"AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"PartType": "comp", "VerbForm": "cop"}},
"AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
"AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
"AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"VerbForm": "cop"}},
"AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"VerbForm": "cop"}},
"AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
"AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
"AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
"AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Form": "vf", "VerbForm": "cop"},
"AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Form": "vf", "VerbForm": "cop"},
"AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "VerbForm": "cop"},
"AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Mood": "int", "VerbForm": "cop"},
"AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Mood": "int", "VerbForm": "cop"},
"AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Mood": "int", "VerbForm": "cop"},
"AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Mood": "int", "VerbForm": "cop"},
"AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "PartType": "comp", "VerbForm": "cop"},
"AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "VerbForm": "cop"},
"AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "VerbForm": "cop"},
"AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "VerbForm": "cop"},
"AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "VerbForm": "cop"},
"AUX___": {"pos": "AUX"},
"AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "Other": {"VerbForm": "cop"}},
"AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
"AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
"AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"VerbForm": "cop"}},
"AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"VerbForm": "cop"}},
"AUX__VerbForm=Cop": {"pos": "AUX", "Other": {"VerbForm": "cop"}},
"AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "VerbForm": "cop"},
"AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "VerbForm": "cop"},
"AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "VerbForm": "cop"},
"AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "VerbForm": "cop"},
"AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "VerbForm": "cop"},
"AUX__VerbForm=Cop": {"pos": "AUX", "VerbForm": "cop"},
"CCONJ___": {"pos": "CCONJ"},
"DET__Case=Gen|Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
"DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Other": {"Form": "ecl"}},
"DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Form": "ecl"},
"DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
"DET__Definite=Def|Number=Plur|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "plur", "PronType": "art"},
"DET__Definite=Def|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "sing", "PronType": "art"},
"DET__Definite=Def": {"pos": "DET", "Definite": "def"},
"DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Other": {"Form": "hpref"}},
"DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Form": "hpref"},
"DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
"DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
"DET__Number=Plur|Person=1|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 1, "Poss": "yes"},
@ -103,33 +103,33 @@ TAG_MAP = {
"DET__PronType=Dem": {"pos": "DET", "PronType": "dem"},
"DET__PronType=Ind": {"pos": "DET", "PronType": "ind"},
"NOUN__Case=Dat|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Definite": "ind", "Gender": "fem", "Number": "sing"},
"NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
"NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Form": "ecl"},
"NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Form": "len"},
"NOUN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing"},
"NOUN__Case=Dat|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "masc", "Number": "sing"},
"NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
"NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "NounType": "strong"},
"NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "NounType": "strong"},
"NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "NounType": "weak"},
"NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "sing"},
"NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "ind", "Gender": "fem", "Number": "sing"},
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "weak"}},
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
"NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
"NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "strong"}},
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "weak"}},
"NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Other": {"Form": "len"}},
"NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
"NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "weak"}},
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Form": "ecl", "NounType": "strong"},
"NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "ecl"},
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl", "NounType": "strong"},
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl", "NounType": "weak"},
"NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "ecl"},
"NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "hpref"},
"NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "len"},
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "len", "NounType": "strong"},
"NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "len", "NounType": "weak"},
"NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"},
"NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Form": "len"},
"NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "NounType": "strong"},
"NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "NounType": "weak"},
"NOUN__Case=Gen|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur"},
"NOUN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing"},
"NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
"NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
"NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "strong"},
"NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "weak"},
"NOUN__Case=Gen|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur"},
"NOUN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing"},
"NOUN__Case=Gen|Number=Sing": {"pos": "NOUN", "Case": "gen", "Number": "sing"},
@ -140,79 +140,79 @@ TAG_MAP = {
"NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
"NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
"NOUN__Case=NomAcc|Definite=Ind|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "ind", "Gender": "masc", "Number": "plur"},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
"NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "emp"}},
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "hpref"}},
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "hpref"}},
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "len"}},
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "ecl"},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "ecl"},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "ecl"},
"NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "ecl"},
"NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "emp"},
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "hpref"},
"NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "hpref"},
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "hpref"},
"NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "hpref"},
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Form": "len"},
"NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"},
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Form": "len"},
"NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"},
"NOUN__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
"NOUN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
"NOUN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
"NOUN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
"NOUN__Case=Voc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Definite": "def", "Gender": "masc", "Number": "plur"},
"NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Form": "len"},
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Form": "len"},
"NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Form": "len"},
"NOUN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing"},
"NOUN__Degree=Pos": {"pos": "NOUN", "Degree": "pos"},
"NOUN__Foreign=Yes": {"pos": "NOUN", "Foreign": "yes"},
"NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "ecl"}},
"NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "ecl"}},
"NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Other": {"Form": "ecl"}},
"NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "hpref"}},
"NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "len"}},
"NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "len"}},
"NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Form": "ecl"},
"NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "ecl"},
"NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Form": "ecl"},
"NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "hpref"},
"NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Form": "len"},
"NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Form": "len"},
"NOUN__Gender=Fem|Number=Sing": {"pos": "NOUN", "Gender": "fem", "Number": "sing"},
"NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "Other": {"PartType": "comp"}},
"NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "PartType": "comp"},
"NOUN__Number=Sing": {"pos": "NOUN", "Number": "sing"},
"NOUN___": {"pos": "NOUN"},
"NOUN__Reflex=Yes": {"pos": "NOUN", "Reflex": "yes"},
"NOUN__VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf"},
"NOUN__VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun"},
"NUM__Definite=Def|NumType=Card": {"pos": "NUM", "Definite": "def", "NumType": "card"},
"NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "ecl"}},
"NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "ecl"}},
"NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "hpref"}},
"NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "len"}},
"NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "len"}},
"NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "ecl"},
"NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Form": "ecl"},
"NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "hpref"},
"NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Form": "len"},
"NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Form": "len"},
"NUM__NumType=Card": {"pos": "NUM", "NumType": "card"},
"NUM__NumType=Ord": {"pos": "NUM", "NumType": "ord"},
"NUM___": {"pos": "NUM"},
"PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"Form": "ecl", "PartType": "vb"}},
"PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "Other": {"PartType": "vb"}},
"PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "Other": {"PartType": "vb"}},
"PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"Mood": "int", "PartType": "vb"}},
"PART__PartType=Ad": {"pos": "PART", "Other": {"PartType": "ad"}},
"PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "cmpl"}},
"PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "cmpl"}},
"PART__PartType=Cmpl": {"pos": "PART", "Other": {"PartType": "cmpl"}},
"PART__PartType=Comp": {"pos": "PART", "Other": {"PartType": "comp"}},
"PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "cop"}},
"PART__PartType=Deg": {"pos": "PART", "Other": {"PartType": "deg"}},
"PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Form": "ecl", "PartType": "vb"},
"PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "PartType": "vb"},
"PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "PartType": "vb"},
"PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Mood": "int", "PartType": "vb"},
"PART__PartType=Ad": {"pos": "PART", "PartType": "ad"},
"PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "PartType": "cmpl"},
"PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "PartType": "cmpl"},
"PART__PartType=Cmpl": {"pos": "PART", "PartType": "cmpl"},
"PART__PartType=Comp": {"pos": "PART", "PartType": "comp"},
"PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "PartType": "cop"},
"PART__PartType=Deg": {"pos": "PART", "PartType": "deg"},
"PART__PartType=Inf": {"pos": "PART", "PartType": "inf"},
"PART__PartType=Num": {"pos": "PART", "Other": {"PartType": "num"}},
"PART__PartType=Pat": {"pos": "PART", "Other": {"PartType": "pat"}},
"PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "vb"}},
"PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Other": {"PartType": "vb"}},
"PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
"PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "vb"}},
"PART__PartType=Vb": {"pos": "PART", "Other": {"PartType": "vb"}},
"PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "vb"}},
"PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
"PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "Other": {"PartType": "vb"}},
"PART__PartType=Voc": {"pos": "PART", "Other": {"PartType": "voc"}},
"PART__PartType=Num": {"pos": "PART", "PartType": "num"},
"PART__PartType=Pat": {"pos": "PART", "PartType": "pat"},
"PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "PartType": "vb"},
"PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "PartType": "vb"},
"PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "PartType": "vb"},
"PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "PartType": "vb"},
"PART__PartType=Vb": {"pos": "PART", "PartType": "vb"},
"PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "PartType": "vb"},
"PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "PartType": "vb"},
"PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "PartType": "vb"},
"PART__PartType=Voc": {"pos": "PART", "PartType": "voc"},
"PART___": {"pos": "PART"},
"PART__PronType=Rel": {"pos": "PART", "PronType": "rel"},
"PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
"PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Other": {"Form": "len"}},
"PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Form": "len"},
"PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Form": "len"},
"PRON__Gender=Fem|Number=Sing|Person=3": {"pos": "PRON", "Gender": "fem", "Number": "sing", "Person": 3},
"PRON__Gender=Masc|Number=Sing|Person=3": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3},
"PRON__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
@ -232,103 +232,103 @@ TAG_MAP = {
"PRON__PronType=Ind": {"pos": "PRON", "PronType": "ind"},
"PRON__PronType=Int": {"pos": "PRON", "PronType": "int"},
"PRON__Reflex=Yes": {"pos": "PRON", "Reflex": "yes"},
"PROPN__Abbr=Yes": {"pos": "PROPN", "Other": {"Abbr": "yes"}},
"PROPN__Abbr=Yes": {"pos": "PROPN", "Abbr": "yes"},
"PROPN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "dat", "Gender": "fem", "Number": "sing"},
"PROPN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
"PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
"PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
"PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
"PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Other": {"Form": "len"}},
"PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Other": {"Form": "len"}},
"PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Form": "ecl"},
"PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Form": "ecl"},
"PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "hpref"},
"PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Form": "len"},
"PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Form": "len"},
"PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Form": "len"},
"PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Form": "len"},
"PROPN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing"},
"PROPN__Case=Gen|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem"},
"PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
"PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "NounType": "weak"},
"PROPN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing"},
"PROPN__Case=Gen|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc"},
"PROPN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
"PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
"PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
"PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
"PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
"PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
"PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
"PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
"PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "ecl"},
"PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "ecl"},
"PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "hpref"},
"PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Form": "len"},
"PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Form": "len"},
"PROPN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
"PROPN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
"PROPN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
"PROPN__Case=NomAcc|Gender=Masc": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc"},
"PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Other": {"Form": "len"}},
"PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Form": "len"},
"PROPN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "voc", "Gender": "masc", "Number": "sing"},
"PROPN__Gender=Masc|Number=Sing": {"pos": "PROPN", "Gender": "masc", "Number": "sing"},
"PROPN___": {"pos": "PROPN"},
"PUNCT___": {"pos": "PUNCT"},
"SCONJ___": {"pos": "SCONJ"},
"SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "Other": {"VerbForm": "cop"}},
"SCONJ__VerbForm=Cop": {"pos": "SCONJ", "Other": {"VerbForm": "cop"}},
"SYM__Abbr=Yes": {"pos": "SYM", "Other": {"Abbr": "yes"}},
"SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "VerbForm": "cop"},
"SCONJ__VerbForm=Cop": {"pos": "SCONJ", "VerbForm": "cop"},
"SYM__Abbr=Yes": {"pos": "SYM", "Abbr": "yes"},
"VERB__Case=NomAcc|Gender=Masc|Mood=Ind|Number=Sing|Tense=Pres": {"pos": "VERB", "Case": "nom|acc", "Gender": "masc", "Mood": "ind", "Number": "sing", "Tense": "pres"},
"VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
"VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Dialect": "munster", "Form": "len"},
"VERB__Foreign=Yes": {"pos": "VERB", "Foreign": "yes"},
"VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl", "Voice": "auto"}},
"VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
"VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl", "Voice": "auto"}},
"VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Other": {"Form": "ecl"}},
"VERB__Form=Ecl": {"pos": "VERB", "Other": {"Form": "ecl"}},
"VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Other": {"Form": "emp"}},
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
"VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len"}},
"VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
"VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
"VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Other": {"Form": "len"}},
"VERB__Form=Len": {"pos": "VERB", "Other": {"Form": "len"}},
"VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Form": "ecl"},
"VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Form": "ecl", "Voice": "auto"},
"VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "ecl", "Voice": "auto"},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "ecl", "Voice": "auto"},
"VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "ecl"},
"VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "ecl", "Voice": "auto"},
"VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Form": "ecl"},
"VERB__Form=Ecl": {"pos": "VERB", "Form": "ecl"},
"VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Form": "emp"},
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Form": "emp"},
"VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Form": "emp"},
"VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Form": "len"},
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Form": "len"},
"VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Form": "len"},
"VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Form": "len"},
"VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Form": "len"},
"VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "len"},
"VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Form": "len", "Voice": "auto"},
"VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Form": "len"},
"VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Form": "len"},
"VERB__Form=Len": {"pos": "VERB", "Form": "len"},
"VERB__Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3},
"VERB__Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1},
"VERB__Mood=Cnd": {"pos": "VERB", "Mood": "cnd"},
"VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Voice": "auto"}},
"VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Voice": "auto"},
"VERB__Mood=Imp|Number=Plur|Person=1|Polarity=Neg": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1, "Polarity": "neg"},
"VERB__Mood=Imp|Number=Plur|Person=1": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1},
"VERB__Mood=Imp|Number=Plur|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 2},
@ -338,28 +338,28 @@ TAG_MAP = {
"VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres"},
"VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past"},
"VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres"},
"VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Voice": "auto"}},
"VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Voice": "auto"},
"VERB__Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres"},
"VERB__Mood=Ind|PronType=Rel|Tense=Fut": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "fut"},
"VERB__Mood=Ind|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "pres"},
"VERB__Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut"},
"VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Voice": "auto"}},
"VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Voice": "auto"},
"VERB__Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past"},
"VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Voice": "auto"}},
"VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Voice": "auto"},
"VERB__Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres"},
"VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Voice": "auto"}},
"VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Voice": "auto"},
"VERB___": {"pos": "VERB"},
"X__Abbr=Yes": {"pos": "X", "Other": {"Abbr": "yes"}},
"X__Abbr=Yes": {"pos": "X", "Abbr": "yes"},
"X__Case=NomAcc|Foreign=Yes|Gender=Fem|Number=Sing": {"pos": "X", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Foreign": "yes"},
"X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Other": {"Dialect": "ulster"}},
"X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
"X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Other": {"Dialect": "munster"}},
"X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Voice": "auto"}},
"X__Dialect=Munster": {"pos": "X", "Other": {"Dialect": "munster"}},
"X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Other": {"Dialect": "munster"}},
"X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"Dialect": "ulster"}},
"X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Other": {"Dialect": "ulster", "PartType": "vb"}},
"X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Other": {"Dialect": "ulster", "VerbForm": "cop"}},
"X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Dialect": "ulster"},
"X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Dialect": "munster", "Form": "len"},
"X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Dialect": "munster"},
"X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Dialect": "munster", "Voice": "auto"},
"X__Dialect=Munster": {"pos": "X", "Dialect": "munster"},
"X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Dialect": "munster"},
"X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Dialect": "ulster"},
"X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Dialect": "ulster", "PartType": "vb"},
"X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Dialect": "ulster", "VerbForm": "cop"},
"X__Foreign=Yes": {"pos": "X", "Foreign": "yes"},
"X___": {"pos": "X"}
}

View File

@ -195,7 +195,7 @@ MORPH_RULES = {
"seg": {
LEMMA: PRON_LEMMA,
"Person": "Three",
"Number": ("Sing", "Plur"),
"Number": "Sing,Plur",
"Reflex": "Yes",
}
},
@ -248,7 +248,7 @@ MORPH_RULES = {
},
"deres": {
LEMMA: "deres",
"Person": ("Two", "Three"),
"Person": "Two,Three",
"Number": "Sing",
"Poss": "Yes",
"Gender": "Masc",
@ -309,7 +309,7 @@ MORPH_RULES = {
},
"deres": {
LEMMA: "deres",
"Person": ("Two", "Three"),
"Person": "Two,Three",
"Number": "Sing",
"Poss": "Yes",
"Gender": "Fem",
@ -370,7 +370,7 @@ MORPH_RULES = {
},
"deres": {
LEMMA: "deres",
"Person": ("Two", "Three"),
"Person": "Two,Three",
"Number": "Sing",
"Poss": "Yes",
"Gender": "Neut",
@ -400,7 +400,7 @@ MORPH_RULES = {
"våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"},
"deres": {
LEMMA: "deres",
"Person": ("Two", "Three"),
"Person": "Two,Three",
"Number": "Plur",
"Poss": "Yes",
},
@ -448,21 +448,21 @@ MORPH_RULES = {
"PronType": "Prs",
"Number": "Sing",
"Person": "Three",
"Gender": ("Fem", "Masc"),
"Gender": "Fem,Masc",
},
"den": {
LEMMA: PRON_LEMMA,
"PronType": "Prs",
"Number": "Sing",
"Person": "Three",
"Gender": ("Fem", "Masc"),
"Gender": "Fem,Masc",
},
"ingen": {
LEMMA: PRON_LEMMA,
"PronType": "Prs",
"Number": "Sing",
"Person": "Three",
"Gender": ("Fem", "Masc"),
"Gender": "Fem,Masc",
"Polarity": "Neg",
},
},
@ -475,7 +475,7 @@ MORPH_RULES = {
LEMMA: PRON_LEMMA,
"PronType": "Prs",
"Number": "Sing",
"Case": ("Gen", "Nom"),
"Case": "Gen,Nom",
}
},
"PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {

View File

@ -105,7 +105,7 @@ MORPH_RULES = {
"PronType": "Prs",
"Person": "Three",
"Number": "Plur",
"Case": ("Nom", "Acc"),
"Case": "Nom,Acc",
},
"dem": {
LEMMA: PRON_LEMMA,
@ -166,7 +166,7 @@ MORPH_RULES = {
LEMMA: PRON_LEMMA,
"PronType": "Prs",
"Person": "Two",
"Number": ("Sing", "Plur"),
"Number": "Sing,Plur",
"Gender": "Masc",
"Poss": "Yes",
"Reflex": "Yes",
@ -175,7 +175,7 @@ MORPH_RULES = {
LEMMA: PRON_LEMMA,
"PronType": "Prs",
"Person": "Two",
"Number": ("Sing", "Plur"),
"Number": "Sing,Plur",
"Gender": "Fem",
"Poss": "Yes",
"Reflex": "Yes",
@ -184,7 +184,7 @@ MORPH_RULES = {
LEMMA: PRON_LEMMA,
"PronType": "Prs",
"Person": "Two",
"Number": ("Sing", "Plur"),
"Number": "Sing,Plur",
"Poss": "Yes",
"Reflex": "Yes",
},
@ -272,7 +272,7 @@ MORPH_RULES = {
"VBZ": {
"är": {
"VerbForm": "Fin",
"Person": ("One", "Two", "Three"),
"Person": "One,Two,Three",
"Tense": "Pres",
"Mood": "Ind",
}

View File

@ -1,25 +0,0 @@
from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
TAG_MAP = {
"ADV": {POS: ADV},
"NOUN": {POS: NOUN},
"ADP": {POS: ADP},
"PRON": {POS: PRON},
"SCONJ": {POS: SCONJ},
"PROPN": {POS: PROPN},
"DET": {POS: DET},
"SYM": {POS: SYM},
"INTJ": {POS: INTJ},
"PUNCT": {POS: PUNCT},
"NUM": {POS: NUM},
"AUX": {POS: AUX},
"X": {POS: X},
"CONJ": {POS: CONJ},
"CCONJ": {POS: CCONJ},
"ADJ": {POS: ADJ},
"VERB": {POS: VERB},
"PART": {POS: PART},
"SP": {POS: SPACE},
}

View File

@ -10,8 +10,8 @@ TAG_MAP = {
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
":": {POS: PUNCT},
"$": {POS: SYM, "Other": {"SymType": "currency"}},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
"$": {POS: SYM, "SymType": "currency"},
"#": {POS: SYM, "SymType": "numbersign"},
"AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CCONJ, "ConjType": "coor"},
"CD": {POS: NUM, "NumType": "card"},
@ -52,7 +52,7 @@ TAG_MAP = {
"VerbForm": "fin",
"Tense": "pres",
"Number": "sing",
"Person": 3,
"Person": "3",
},
"WDT": {POS: ADJ, "PronType": "int|rel"},
"WP": {POS: NOUN, "PronType": "int|rel"},

View File

@ -1,25 +0,0 @@
from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
TAG_MAP = {
"ADV": {POS: ADV},
"NOUN": {POS: NOUN},
"ADP": {POS: ADP},
"PRON": {POS: PRON},
"SCONJ": {POS: SCONJ},
"PROPN": {POS: PROPN},
"DET": {POS: DET},
"SYM": {POS: SYM},
"INTJ": {POS: INTJ},
"PUNCT": {POS: PUNCT},
"NUM": {POS: NUM},
"AUX": {POS: AUX},
"X": {POS: X},
"CONJ": {POS: CONJ},
"CCONJ": {POS: CCONJ},
"ADJ": {POS: ADJ},
"VERB": {POS: VERB},
"PART": {POS: PART},
"SP": {POS: SPACE},
}

View File

@ -2,6 +2,7 @@ from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap, PreshMapArray
from libc.stdint cimport uint64_t
from murmurhash cimport mrmr
cimport numpy as np
from .structs cimport TokenC, MorphAnalysisC
from .strings cimport StringStore
@ -20,12 +21,11 @@ cdef class Morphology:
cdef readonly object tag_names
cdef readonly object reverse_index
cdef readonly object exc
cdef readonly object _feat_map
cdef readonly PreshMapArray _cache
cdef readonly int n_tags
cpdef update(self, hash_t morph, features)
cdef hash_t insert(self, MorphAnalysisC tag) except 0
cdef MorphAnalysisC create_morph_tag(self, field_feature_pairs) except *
cdef int insert(self, MorphAnalysisC tag) except -1
cdef int assign_untagged(self, TokenC* token) except -1
cdef int assign_tag(self, TokenC* token, tag) except -1
@ -34,8 +34,7 @@ cdef class Morphology:
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil
cdef attr_t get_field(const MorphAnalysisC* tag, int field) nogil
cdef list list_features(const MorphAnalysisC* tag)
cdef tag_to_json(const MorphAnalysisC* tag)
cdef int check_feature(const MorphAnalysisC* morph, attr_t feature) nogil
cdef list list_features(const MorphAnalysisC* morph)
cdef np.ndarray get_by_field(const MorphAnalysisC* morph, attr_t field)
cdef int get_n_by_field(attr_t* results, const MorphAnalysisC* morph, attr_t field) nogil

File diff suppressed because it is too large Load Diff

View File

@ -82,52 +82,11 @@ cdef struct TokenC:
cdef struct MorphAnalysisC:
univ_pos_t pos
hash_t key
int length
attr_t abbr
attr_t adp_type
attr_t adv_type
attr_t animacy
attr_t aspect
attr_t case
attr_t conj_type
attr_t connegative
attr_t definite
attr_t degree
attr_t derivation
attr_t echo
attr_t foreign
attr_t gender
attr_t hyph
attr_t inf_form
attr_t mood
attr_t negative
attr_t number
attr_t name_type
attr_t noun_type
attr_t num_form
attr_t num_type
attr_t num_value
attr_t part_form
attr_t part_type
attr_t person
attr_t polite
attr_t polarity
attr_t poss
attr_t prefix
attr_t prep_case
attr_t pron_type
attr_t punct_side
attr_t punct_type
attr_t reflex
attr_t style
attr_t style_variant
attr_t tense
attr_t typo
attr_t verb_form
attr_t voice
attr_t verb_type
attr_t* fields
attr_t* features
# Internal struct, for storage and disambiguation of entities.
cdef struct KBEntryC:

View File

@ -108,282 +108,282 @@ cdef enum symbol_t:
EOL
SPACE
Animacy_anim
Animacy_inan
Animacy_hum # U20
Animacy_nhum
Aspect_freq
Aspect_imp
Aspect_mod
Aspect_none
Aspect_perf
Aspect_iter # U20
Aspect_hab # U20
Case_abe
Case_abl
Case_abs
Case_acc
Case_ade
Case_all
Case_cau
Case_com
Case_cmp # U20
Case_dat
Case_del
Case_dis
Case_ela
Case_equ # U20
Case_ess
Case_gen
Case_ill
Case_ine
Case_ins
Case_loc
Case_lat
Case_nom
Case_par
Case_sub
Case_sup
Case_tem
Case_ter
Case_tra
Case_voc
Definite_two
Definite_def
Definite_red
Definite_cons # U20
Definite_ind
Definite_spec # U20
Degree_cmp
Degree_comp
Degree_none
Degree_pos
Degree_sup
Degree_abs
Degree_com
Degree_dim # du
Degree_equ # U20
Evident_nfh # U20
Gender_com
Gender_fem
Gender_masc
Gender_neut
Mood_cnd
Mood_imp
Mood_ind
Mood_n
Mood_pot
Mood_sub
Mood_opt
Mood_prp # U20
Mood_adm # U20
Negative_neg
Negative_pos
Negative_yes
Polarity_neg # U20
Polarity_pos # U20
Number_com
Number_dual
Number_none
Number_plur
Number_sing
Number_ptan # bg
Number_count # bg, U20
Number_tri # U20
NumType_card
NumType_dist
NumType_frac
NumType_gen
NumType_mult
NumType_none
NumType_ord
NumType_sets
Person_one
Person_two
Person_three
Person_none
Poss_yes
PronType_advPart
PronType_art
PronType_default
PronType_dem
PronType_ind
PronType_int
PronType_neg
PronType_prs
PronType_rcp
PronType_rel
PronType_tot
PronType_clit
PronType_exc # es, ca, it, fa, U20
PronType_emp # U20
Reflex_yes
Tense_fut
Tense_imp
Tense_past
Tense_pres
VerbForm_fin
VerbForm_ger
VerbForm_inf
VerbForm_none
VerbForm_part
VerbForm_partFut
VerbForm_partPast
VerbForm_partPres
VerbForm_sup
VerbForm_trans
VerbForm_conv # U20
VerbForm_gdv # la
VerbForm_vnoun # U20
Voice_act
Voice_cau
Voice_pass
Voice_mid # gkc, U20
Voice_int # hb
Voice_antip # U20
Voice_dir # U20
Voice_inv # U20
Abbr_yes # cz, fi, sl, U
AdpType_prep # cz, U
AdpType_post # U
AdpType_voc # cz
AdpType_comprep # cz
AdpType_circ # U
AdvType_man
AdvType_loc
AdvType_tim
AdvType_deg
AdvType_cau
AdvType_mod
AdvType_sta
AdvType_ex
AdvType_adadj
ConjType_oper # cz, U
ConjType_comp # cz, U
Connegative_yes # fi
Derivation_minen # fi
Derivation_sti # fi
Derivation_inen # fi
Derivation_lainen # fi
Derivation_ja # fi
Derivation_ton # fi
Derivation_vs # fi
Derivation_ttain # fi
Derivation_ttaa # fi
Echo_rdp # U
Echo_ech # U
Foreign_foreign # cz, fi, U
Foreign_fscript # cz, fi, U
Foreign_tscript # cz, U
Foreign_yes # sl
Gender_dat_masc # bq, U
Gender_dat_fem # bq, U
Gender_erg_masc # bq
Gender_erg_fem # bq
Gender_psor_masc # cz, sl, U
Gender_psor_fem # cz, sl, U
Gender_psor_neut # sl
Hyph_yes # cz, U
InfForm_one # fi
InfForm_two # fi
InfForm_three # fi
NameType_geo # U, cz
NameType_prs # U, cz
NameType_giv # U, cz
NameType_sur # U, cz
NameType_nat # U, cz
NameType_com # U, cz
NameType_pro # U, cz
NameType_oth # U, cz
NounType_com # U
NounType_prop # U
NounType_class # U
Number_abs_sing # bq, U
Number_abs_plur # bq, U
Number_dat_sing # bq, U
Number_dat_plur # bq, U
Number_erg_sing # bq, U
Number_erg_plur # bq, U
Number_psee_sing # U
Number_psee_plur # U
Number_psor_sing # cz, fi, sl, U
Number_psor_plur # cz, fi, sl, U
Number_pauc # U20
Number_grpa # U20
Number_grpl # U20
Number_inv # U20
NumForm_digit # cz, sl, U
NumForm_roman # cz, sl, U
NumForm_word # cz, sl, U
NumValue_one # cz, U
NumValue_two # cz, U
NumValue_three # cz, U
PartForm_pres # fi
PartForm_past # fi
PartForm_agt # fi
PartForm_neg # fi
PartType_mod # U
PartType_emp # U
PartType_res # U
PartType_inf # U
PartType_vbp # U
Person_abs_one # bq, U
Person_abs_two # bq, U
Person_abs_three # bq, U
Person_dat_one # bq, U
Person_dat_two # bq, U
Person_dat_three # bq, U
Person_erg_one # bq, U
Person_erg_two # bq, U
Person_erg_three # bq, U
Person_psor_one # fi, U
Person_psor_two # fi, U
Person_psor_three # fi, U
Person_zero # U20
Person_four # U20
Polite_inf # bq, U
Polite_pol # bq, U
Polite_abs_inf # bq, U
Polite_abs_pol # bq, U
Polite_erg_inf # bq, U
Polite_erg_pol # bq, U
Polite_dat_inf # bq, U
Polite_dat_pol # bq, U
Polite_infm # U20
Polite_form # U20
Polite_form_elev # U20
Polite_form_humb # U20
Prefix_yes # U
PrepCase_npr # cz
PrepCase_pre # U
PunctSide_ini # U
PunctSide_fin # U
PunctType_peri # U
PunctType_qest # U
PunctType_excl # U
PunctType_quot # U
PunctType_brck # U
PunctType_comm # U
PunctType_colo # U
PunctType_semi # U
PunctType_dash # U
Style_arch # cz, fi, U
Style_rare # cz, fi, U
Style_poet # cz, U
Style_norm # cz, U
Style_coll # cz, U
Style_vrnc # cz, U
Style_sing # cz, U
Style_expr # cz, U
Style_derg # cz, U
Style_vulg # cz, U
Style_yes # fi, U
StyleVariant_styleShort # cz
StyleVariant_styleBound # cz, sl
VerbType_aux # U
VerbType_cop # U
VerbType_mod # U
VerbType_light # U
DEPRECATED001
DEPRECATED002
DEPRECATED003
DEPRECATED004
DEPRECATED005
DEPRECATED006
DEPRECATED007
DEPRECATED008
DEPRECATED009
DEPRECATED010
DEPRECATED011
DEPRECATED012
DEPRECATED013
DEPRECATED014
DEPRECATED015
DEPRECATED016
DEPRECATED017
DEPRECATED018
DEPRECATED019
DEPRECATED020
DEPRECATED021
DEPRECATED022
DEPRECATED023
DEPRECATED024
DEPRECATED025
DEPRECATED026
DEPRECATED027
DEPRECATED028
DEPRECATED029
DEPRECATED030
DEPRECATED031
DEPRECATED032
DEPRECATED033
DEPRECATED034
DEPRECATED035
DEPRECATED036
DEPRECATED037
DEPRECATED038
DEPRECATED039
DEPRECATED040
DEPRECATED041
DEPRECATED042
DEPRECATED043
DEPRECATED044
DEPRECATED045
DEPRECATED046
DEPRECATED047
DEPRECATED048
DEPRECATED049
DEPRECATED050
DEPRECATED051
DEPRECATED052
DEPRECATED053
DEPRECATED054
DEPRECATED055
DEPRECATED056
DEPRECATED057
DEPRECATED058
DEPRECATED059
DEPRECATED060
DEPRECATED061
DEPRECATED062
DEPRECATED063
DEPRECATED064
DEPRECATED065
DEPRECATED066
DEPRECATED067
DEPRECATED068
DEPRECATED069
DEPRECATED070
DEPRECATED071
DEPRECATED072
DEPRECATED073
DEPRECATED074
DEPRECATED075
DEPRECATED076
DEPRECATED077
DEPRECATED078
DEPRECATED079
DEPRECATED080
DEPRECATED081
DEPRECATED082
DEPRECATED083
DEPRECATED084
DEPRECATED085
DEPRECATED086
DEPRECATED087
DEPRECATED088
DEPRECATED089
DEPRECATED090
DEPRECATED091
DEPRECATED092
DEPRECATED093
DEPRECATED094
DEPRECATED095
DEPRECATED096
DEPRECATED097
DEPRECATED098
DEPRECATED099
DEPRECATED100
DEPRECATED101
DEPRECATED102
DEPRECATED103
DEPRECATED104
DEPRECATED105
DEPRECATED106
DEPRECATED107
DEPRECATED108
DEPRECATED109
DEPRECATED110
DEPRECATED111
DEPRECATED112
DEPRECATED113
DEPRECATED114
DEPRECATED115
DEPRECATED116
DEPRECATED117
DEPRECATED118
DEPRECATED119
DEPRECATED120
DEPRECATED121
DEPRECATED122
DEPRECATED123
DEPRECATED124
DEPRECATED125
DEPRECATED126
DEPRECATED127
DEPRECATED128
DEPRECATED129
DEPRECATED130
DEPRECATED131
DEPRECATED132
DEPRECATED133
DEPRECATED134
DEPRECATED135
DEPRECATED136
DEPRECATED137
DEPRECATED138
DEPRECATED139
DEPRECATED140
DEPRECATED141
DEPRECATED142
DEPRECATED143
DEPRECATED144
DEPRECATED145
DEPRECATED146
DEPRECATED147
DEPRECATED148
DEPRECATED149
DEPRECATED150
DEPRECATED151
DEPRECATED152
DEPRECATED153
DEPRECATED154
DEPRECATED155
DEPRECATED156
DEPRECATED157
DEPRECATED158
DEPRECATED159
DEPRECATED160
DEPRECATED161
DEPRECATED162
DEPRECATED163
DEPRECATED164
DEPRECATED165
DEPRECATED166
DEPRECATED167
DEPRECATED168
DEPRECATED169
DEPRECATED170
DEPRECATED171
DEPRECATED172
DEPRECATED173
DEPRECATED174
DEPRECATED175
DEPRECATED176
DEPRECATED177
DEPRECATED178
DEPRECATED179
DEPRECATED180
DEPRECATED181
DEPRECATED182
DEPRECATED183
DEPRECATED184
DEPRECATED185
DEPRECATED186
DEPRECATED187
DEPRECATED188
DEPRECATED189
DEPRECATED190
DEPRECATED191
DEPRECATED192
DEPRECATED193
DEPRECATED194
DEPRECATED195
DEPRECATED196
DEPRECATED197
DEPRECATED198
DEPRECATED199
DEPRECATED200
DEPRECATED201
DEPRECATED202
DEPRECATED203
DEPRECATED204
DEPRECATED205
DEPRECATED206
DEPRECATED207
DEPRECATED208
DEPRECATED209
DEPRECATED210
DEPRECATED211
DEPRECATED212
DEPRECATED213
DEPRECATED214
DEPRECATED215
DEPRECATED216
DEPRECATED217
DEPRECATED218
DEPRECATED219
DEPRECATED220
DEPRECATED221
DEPRECATED222
DEPRECATED223
DEPRECATED224
DEPRECATED225
DEPRECATED226
DEPRECATED227
DEPRECATED228
DEPRECATED229
DEPRECATED230
DEPRECATED231
DEPRECATED232
DEPRECATED233
DEPRECATED234
DEPRECATED235
DEPRECATED236
DEPRECATED237
DEPRECATED238
DEPRECATED239
DEPRECATED240
DEPRECATED241
DEPRECATED242
DEPRECATED243
DEPRECATED244
DEPRECATED245
DEPRECATED246
DEPRECATED247
DEPRECATED248
DEPRECATED249
DEPRECATED250
DEPRECATED251
DEPRECATED252
DEPRECATED253
DEPRECATED254
DEPRECATED255
DEPRECATED256
DEPRECATED257
DEPRECATED258
DEPRECATED259
DEPRECATED260
DEPRECATED261
DEPRECATED262
DEPRECATED263
DEPRECATED264
DEPRECATED265
DEPRECATED266
DEPRECATED267
DEPRECATED268
DEPRECATED269
DEPRECATED270
DEPRECATED271
DEPRECATED272
DEPRECATED273
DEPRECATED274
DEPRECATED275
DEPRECATED276
PERSON
NORP

View File

@ -110,282 +110,282 @@ IDS = {
"EOL": EOL,
"SPACE": SPACE,
"Animacy_anim": Animacy_anim,
"Animacy_inam": Animacy_inan,
"Animacy_hum": Animacy_hum, # U20
"Animacy_nhum": Animacy_nhum,
"Aspect_freq": Aspect_freq,
"Aspect_imp": Aspect_imp,
"Aspect_mod": Aspect_mod,
"Aspect_none": Aspect_none,
"Aspect_perf": Aspect_perf,
"Aspect_iter": Aspect_iter, # U20
"Aspect_hab": Aspect_hab, # U20
"Case_abe": Case_abe,
"Case_abl": Case_abl,
"Case_abs": Case_abs,
"Case_acc": Case_acc,
"Case_ade": Case_ade,
"Case_all": Case_all,
"Case_cau": Case_cau,
"Case_com": Case_com,
"Case_cmp": Case_cmp, # U20
"Case_dat": Case_dat,
"Case_del": Case_del,
"Case_dis": Case_dis,
"Case_ela": Case_ela,
"Case_equ": Case_equ, # U20
"Case_ess": Case_ess,
"Case_gen": Case_gen,
"Case_ill": Case_ill,
"Case_ine": Case_ine,
"Case_ins": Case_ins,
"Case_loc": Case_loc,
"Case_lat": Case_lat,
"Case_nom": Case_nom,
"Case_par": Case_par,
"Case_sub": Case_sub,
"Case_sup": Case_sup,
"Case_tem": Case_tem,
"Case_ter": Case_ter,
"Case_tra": Case_tra,
"Case_voc": Case_voc,
"Definite_two": Definite_two,
"Definite_def": Definite_def,
"Definite_red": Definite_red,
"Definite_cons": Definite_cons, # U20
"Definite_ind": Definite_ind,
"Definite_spec": Definite_spec, # U20
"Degree_cmp": Degree_cmp,
"Degree_comp": Degree_comp,
"Degree_none": Degree_none,
"Degree_pos": Degree_pos,
"Degree_sup": Degree_sup,
"Degree_abs": Degree_abs,
"Degree_com": Degree_com,
"Degree_dim": Degree_dim, # du
"Degree_equ": Degree_equ, # U20
"Evident_nfh": Evident_nfh, # U20
"Gender_com": Gender_com,
"Gender_fem": Gender_fem,
"Gender_masc": Gender_masc,
"Gender_neut": Gender_neut,
"Mood_cnd": Mood_cnd,
"Mood_imp": Mood_imp,
"Mood_ind": Mood_ind,
"Mood_n": Mood_n,
"Mood_pot": Mood_pot,
"Mood_sub": Mood_sub,
"Mood_opt": Mood_opt,
"Mood_prp": Mood_prp, # U20
"Mood_adm": Mood_adm, # U20
"Negative_neg": Negative_neg,
"Negative_pos": Negative_pos,
"Negative_yes": Negative_yes,
"Polarity_neg": Polarity_neg, # U20
"Polarity_pos": Polarity_pos, # U20
"Number_com": Number_com,
"Number_dual": Number_dual,
"Number_none": Number_none,
"Number_plur": Number_plur,
"Number_sing": Number_sing,
"Number_ptan": Number_ptan, # bg
"Number_count": Number_count, # bg, U20
"Number_tri": Number_tri, # U20
"NumType_card": NumType_card,
"NumType_dist": NumType_dist,
"NumType_frac": NumType_frac,
"NumType_gen": NumType_gen,
"NumType_mult": NumType_mult,
"NumType_none": NumType_none,
"NumType_ord": NumType_ord,
"NumType_sets": NumType_sets,
"Person_one": Person_one,
"Person_two": Person_two,
"Person_three": Person_three,
"Person_none": Person_none,
"Poss_yes": Poss_yes,
"PronType_advPart": PronType_advPart,
"PronType_art": PronType_art,
"PronType_default": PronType_default,
"PronType_dem": PronType_dem,
"PronType_ind": PronType_ind,
"PronType_int": PronType_int,
"PronType_neg": PronType_neg,
"PronType_prs": PronType_prs,
"PronType_rcp": PronType_rcp,
"PronType_rel": PronType_rel,
"PronType_tot": PronType_tot,
"PronType_clit": PronType_clit,
"PronType_exc": PronType_exc, # es, ca, it, fa, U20
"PronType_emp": PronType_emp, # U20
"Reflex_yes": Reflex_yes,
"Tense_fut": Tense_fut,
"Tense_imp": Tense_imp,
"Tense_past": Tense_past,
"Tense_pres": Tense_pres,
"VerbForm_fin": VerbForm_fin,
"VerbForm_ger": VerbForm_ger,
"VerbForm_inf": VerbForm_inf,
"VerbForm_none": VerbForm_none,
"VerbForm_part": VerbForm_part,
"VerbForm_partFut": VerbForm_partFut,
"VerbForm_partPast": VerbForm_partPast,
"VerbForm_partPres": VerbForm_partPres,
"VerbForm_sup": VerbForm_sup,
"VerbForm_trans": VerbForm_trans,
"VerbForm_conv": VerbForm_conv, # U20
"VerbForm_gdv": VerbForm_gdv, # la,
"VerbForm_vnoun": VerbForm_vnoun, # U20
"Voice_act": Voice_act,
"Voice_cau": Voice_cau,
"Voice_pass": Voice_pass,
"Voice_mid": Voice_mid, # gkc, U20
"Voice_int": Voice_int, # hb,
"Voice_antip": Voice_antip, # U20
"Voice_dir": Voice_dir, # U20
"Voice_inv": Voice_inv, # U20
"Abbr_yes": Abbr_yes, # cz, fi, sl, U,
"AdpType_prep": AdpType_prep, # cz, U,
"AdpType_post": AdpType_post, # U,
"AdpType_voc": AdpType_voc, # cz,
"AdpType_comprep": AdpType_comprep, # cz,
"AdpType_circ": AdpType_circ, # U,
"AdvType_man": AdvType_man,
"AdvType_loc": AdvType_loc,
"AdvType_tim": AdvType_tim,
"AdvType_deg": AdvType_deg,
"AdvType_cau": AdvType_cau,
"AdvType_mod": AdvType_mod,
"AdvType_sta": AdvType_sta,
"AdvType_ex": AdvType_ex,
"AdvType_adadj": AdvType_adadj,
"ConjType_oper": ConjType_oper, # cz, U,
"ConjType_comp": ConjType_comp, # cz, U,
"Connegative_yes": Connegative_yes, # fi,
"Derivation_minen": Derivation_minen, # fi,
"Derivation_sti": Derivation_sti, # fi,
"Derivation_inen": Derivation_inen, # fi,
"Derivation_lainen": Derivation_lainen, # fi,
"Derivation_ja": Derivation_ja, # fi,
"Derivation_ton": Derivation_ton, # fi,
"Derivation_vs": Derivation_vs, # fi,
"Derivation_ttain": Derivation_ttain, # fi,
"Derivation_ttaa": Derivation_ttaa, # fi,
"Echo_rdp": Echo_rdp, # U,
"Echo_ech": Echo_ech, # U,
"Foreign_foreign": Foreign_foreign, # cz, fi, U,
"Foreign_fscript": Foreign_fscript, # cz, fi, U,
"Foreign_tscript": Foreign_tscript, # cz, U,
"Foreign_yes": Foreign_yes, # sl,
"Gender_dat_masc": Gender_dat_masc, # bq, U,
"Gender_dat_fem": Gender_dat_fem, # bq, U,
"Gender_erg_masc": Gender_erg_masc, # bq,
"Gender_erg_fem": Gender_erg_fem, # bq,
"Gender_psor_masc": Gender_psor_masc, # cz, sl, U,
"Gender_psor_fem": Gender_psor_fem, # cz, sl, U,
"Gender_psor_neut": Gender_psor_neut, # sl,
"Hyph_yes": Hyph_yes, # cz, U,
"InfForm_one": InfForm_one, # fi,
"InfForm_two": InfForm_two, # fi,
"InfForm_three": InfForm_three, # fi,
"NameType_geo": NameType_geo, # U, cz,
"NameType_prs": NameType_prs, # U, cz,
"NameType_giv": NameType_giv, # U, cz,
"NameType_sur": NameType_sur, # U, cz,
"NameType_nat": NameType_nat, # U, cz,
"NameType_com": NameType_com, # U, cz,
"NameType_pro": NameType_pro, # U, cz,
"NameType_oth": NameType_oth, # U, cz,
"NounType_com": NounType_com, # U,
"NounType_prop": NounType_prop, # U,
"NounType_class": NounType_class, # U,
"Number_abs_sing": Number_abs_sing, # bq, U,
"Number_abs_plur": Number_abs_plur, # bq, U,
"Number_dat_sing": Number_dat_sing, # bq, U,
"Number_dat_plur": Number_dat_plur, # bq, U,
"Number_erg_sing": Number_erg_sing, # bq, U,
"Number_erg_plur": Number_erg_plur, # bq, U,
"Number_psee_sing": Number_psee_sing, # U,
"Number_psee_plur": Number_psee_plur, # U,
"Number_psor_sing": Number_psor_sing, # cz, fi, sl, U,
"Number_psor_plur": Number_psor_plur, # cz, fi, sl, U,
"Number_pauc": Number_pauc, # U20
"Number_grpa": Number_grpa, # U20
"Number_grpl": Number_grpl, # U20
"Number_inv": Number_inv, # U20
"NumForm_digit": NumForm_digit, # cz, sl, U,
"NumForm_roman": NumForm_roman, # cz, sl, U,
"NumForm_word": NumForm_word, # cz, sl, U,
"NumValue_one": NumValue_one, # cz, U,
"NumValue_two": NumValue_two, # cz, U,
"NumValue_three": NumValue_three, # cz, U,
"PartForm_pres": PartForm_pres, # fi,
"PartForm_past": PartForm_past, # fi,
"PartForm_agt": PartForm_agt, # fi,
"PartForm_neg": PartForm_neg, # fi,
"PartType_mod": PartType_mod, # U,
"PartType_emp": PartType_emp, # U,
"PartType_res": PartType_res, # U,
"PartType_inf": PartType_inf, # U,
"PartType_vbp": PartType_vbp, # U,
"Person_abs_one": Person_abs_one, # bq, U,
"Person_abs_two": Person_abs_two, # bq, U,
"Person_abs_three": Person_abs_three, # bq, U,
"Person_dat_one": Person_dat_one, # bq, U,
"Person_dat_two": Person_dat_two, # bq, U,
"Person_dat_three": Person_dat_three, # bq, U,
"Person_erg_one": Person_erg_one, # bq, U,
"Person_erg_two": Person_erg_two, # bq, U,
"Person_erg_three": Person_erg_three, # bq, U,
"Person_psor_one": Person_psor_one, # fi, U,
"Person_psor_two": Person_psor_two, # fi, U,
"Person_psor_three": Person_psor_three, # fi, U,
"Person_zero": Person_zero, # U20
"Person_four": Person_four, # U20
"Polite_inf": Polite_inf, # bq, U,
"Polite_pol": Polite_pol, # bq, U,
"Polite_abs_inf": Polite_abs_inf, # bq, U,
"Polite_abs_pol": Polite_abs_pol, # bq, U,
"Polite_erg_inf": Polite_erg_inf, # bq, U,
"Polite_erg_pol": Polite_erg_pol, # bq, U,
"Polite_dat_inf": Polite_dat_inf, # bq, U,
"Polite_dat_pol": Polite_dat_pol, # bq, U,
"Polite_infm": Polite_infm, # U20
"Polite_form": Polite_form, # U20
"Polite_form_elev": Polite_form_elev, # U20
"Polite_form_humb": Polite_form_humb, # U20
"Prefix_yes": Prefix_yes, # U,
"PrepCase_npr": PrepCase_npr, # cz,
"PrepCase_pre": PrepCase_pre, # U,
"PunctSide_ini": PunctSide_ini, # U,
"PunctSide_fin": PunctSide_fin, # U,
"PunctType_peri": PunctType_peri, # U,
"PunctType_qest": PunctType_qest, # U,
"PunctType_excl": PunctType_excl, # U,
"PunctType_quot": PunctType_quot, # U,
"PunctType_brck": PunctType_brck, # U,
"PunctType_comm": PunctType_comm, # U,
"PunctType_colo": PunctType_colo, # U,
"PunctType_semi": PunctType_semi, # U,
"PunctType_dash": PunctType_dash, # U,
"Style_arch": Style_arch, # cz, fi, U,
"Style_rare": Style_rare, # cz, fi, U,
"Style_poet": Style_poet, # cz, U,
"Style_norm": Style_norm, # cz, U,
"Style_coll": Style_coll, # cz, U,
"Style_vrnc": Style_vrnc, # cz, U,
"Style_sing": Style_sing, # cz, U,
"Style_expr": Style_expr, # cz, U,
"Style_derg": Style_derg, # cz, U,
"Style_vulg": Style_vulg, # cz, U,
"Style_yes": Style_yes, # fi, U,
"StyleVariant_styleShort": StyleVariant_styleShort, # cz,
"StyleVariant_styleBound": StyleVariant_styleBound, # cz, sl,
"VerbType_aux": VerbType_aux, # U,
"VerbType_cop": VerbType_cop, # U,
"VerbType_mod": VerbType_mod, # U,
"VerbType_light": VerbType_light, # U,
"DEPRECATED001": DEPRECATED001,
"DEPRECATED002": DEPRECATED002,
"DEPRECATED003": DEPRECATED003,
"DEPRECATED004": DEPRECATED004,
"DEPRECATED005": DEPRECATED005,
"DEPRECATED006": DEPRECATED006,
"DEPRECATED007": DEPRECATED007,
"DEPRECATED008": DEPRECATED008,
"DEPRECATED009": DEPRECATED009,
"DEPRECATED010": DEPRECATED010,
"DEPRECATED011": DEPRECATED011,
"DEPRECATED012": DEPRECATED012,
"DEPRECATED013": DEPRECATED013,
"DEPRECATED014": DEPRECATED014,
"DEPRECATED015": DEPRECATED015,
"DEPRECATED016": DEPRECATED016,
"DEPRECATED017": DEPRECATED017,
"DEPRECATED018": DEPRECATED018,
"DEPRECATED019": DEPRECATED019,
"DEPRECATED020": DEPRECATED020,
"DEPRECATED021": DEPRECATED021,
"DEPRECATED022": DEPRECATED022,
"DEPRECATED023": DEPRECATED023,
"DEPRECATED024": DEPRECATED024,
"DEPRECATED025": DEPRECATED025,
"DEPRECATED026": DEPRECATED026,
"DEPRECATED027": DEPRECATED027,
"DEPRECATED028": DEPRECATED028,
"DEPRECATED029": DEPRECATED029,
"DEPRECATED030": DEPRECATED030,
"DEPRECATED031": DEPRECATED031,
"DEPRECATED032": DEPRECATED032,
"DEPRECATED033": DEPRECATED033,
"DEPRECATED034": DEPRECATED034,
"DEPRECATED035": DEPRECATED035,
"DEPRECATED036": DEPRECATED036,
"DEPRECATED037": DEPRECATED037,
"DEPRECATED038": DEPRECATED038,
"DEPRECATED039": DEPRECATED039,
"DEPRECATED040": DEPRECATED040,
"DEPRECATED041": DEPRECATED041,
"DEPRECATED042": DEPRECATED042,
"DEPRECATED043": DEPRECATED043,
"DEPRECATED044": DEPRECATED044,
"DEPRECATED045": DEPRECATED045,
"DEPRECATED046": DEPRECATED046,
"DEPRECATED047": DEPRECATED047,
"DEPRECATED048": DEPRECATED048,
"DEPRECATED049": DEPRECATED049,
"DEPRECATED050": DEPRECATED050,
"DEPRECATED051": DEPRECATED051,
"DEPRECATED052": DEPRECATED052,
"DEPRECATED053": DEPRECATED053,
"DEPRECATED054": DEPRECATED054,
"DEPRECATED055": DEPRECATED055,
"DEPRECATED056": DEPRECATED056,
"DEPRECATED057": DEPRECATED057,
"DEPRECATED058": DEPRECATED058,
"DEPRECATED059": DEPRECATED059,
"DEPRECATED060": DEPRECATED060,
"DEPRECATED061": DEPRECATED061,
"DEPRECATED062": DEPRECATED062,
"DEPRECATED063": DEPRECATED063,
"DEPRECATED064": DEPRECATED064,
"DEPRECATED065": DEPRECATED065,
"DEPRECATED066": DEPRECATED066,
"DEPRECATED067": DEPRECATED067,
"DEPRECATED068": DEPRECATED068,
"DEPRECATED069": DEPRECATED069,
"DEPRECATED070": DEPRECATED070,
"DEPRECATED071": DEPRECATED071,
"DEPRECATED072": DEPRECATED072,
"DEPRECATED073": DEPRECATED073,
"DEPRECATED074": DEPRECATED074,
"DEPRECATED075": DEPRECATED075,
"DEPRECATED076": DEPRECATED076,
"DEPRECATED077": DEPRECATED077,
"DEPRECATED078": DEPRECATED078,
"DEPRECATED079": DEPRECATED079,
"DEPRECATED080": DEPRECATED080,
"DEPRECATED081": DEPRECATED081,
"DEPRECATED082": DEPRECATED082,
"DEPRECATED083": DEPRECATED083,
"DEPRECATED084": DEPRECATED084,
"DEPRECATED085": DEPRECATED085,
"DEPRECATED086": DEPRECATED086,
"DEPRECATED087": DEPRECATED087,
"DEPRECATED088": DEPRECATED088,
"DEPRECATED089": DEPRECATED089,
"DEPRECATED090": DEPRECATED090,
"DEPRECATED091": DEPRECATED091,
"DEPRECATED092": DEPRECATED092,
"DEPRECATED093": DEPRECATED093,
"DEPRECATED094": DEPRECATED094,
"DEPRECATED095": DEPRECATED095,
"DEPRECATED096": DEPRECATED096,
"DEPRECATED097": DEPRECATED097,
"DEPRECATED098": DEPRECATED098,
"DEPRECATED099": DEPRECATED099,
"DEPRECATED100": DEPRECATED100,
"DEPRECATED101": DEPRECATED101,
"DEPRECATED102": DEPRECATED102,
"DEPRECATED103": DEPRECATED103,
"DEPRECATED104": DEPRECATED104,
"DEPRECATED105": DEPRECATED105,
"DEPRECATED106": DEPRECATED106,
"DEPRECATED107": DEPRECATED107,
"DEPRECATED108": DEPRECATED108,
"DEPRECATED109": DEPRECATED109,
"DEPRECATED110": DEPRECATED110,
"DEPRECATED111": DEPRECATED111,
"DEPRECATED112": DEPRECATED112,
"DEPRECATED113": DEPRECATED113,
"DEPRECATED114": DEPRECATED114,
"DEPRECATED115": DEPRECATED115,
"DEPRECATED116": DEPRECATED116,
"DEPRECATED117": DEPRECATED117,
"DEPRECATED118": DEPRECATED118,
"DEPRECATED119": DEPRECATED119,
"DEPRECATED120": DEPRECATED120,
"DEPRECATED121": DEPRECATED121,
"DEPRECATED122": DEPRECATED122,
"DEPRECATED123": DEPRECATED123,
"DEPRECATED124": DEPRECATED124,
"DEPRECATED125": DEPRECATED125,
"DEPRECATED126": DEPRECATED126,
"DEPRECATED127": DEPRECATED127,
"DEPRECATED128": DEPRECATED128,
"DEPRECATED129": DEPRECATED129,
"DEPRECATED130": DEPRECATED130,
"DEPRECATED131": DEPRECATED131,
"DEPRECATED132": DEPRECATED132,
"DEPRECATED133": DEPRECATED133,
"DEPRECATED134": DEPRECATED134,
"DEPRECATED135": DEPRECATED135,
"DEPRECATED136": DEPRECATED136,
"DEPRECATED137": DEPRECATED137,
"DEPRECATED138": DEPRECATED138,
"DEPRECATED139": DEPRECATED139,
"DEPRECATED140": DEPRECATED140,
"DEPRECATED141": DEPRECATED141,
"DEPRECATED142": DEPRECATED142,
"DEPRECATED143": DEPRECATED143,
"DEPRECATED144": DEPRECATED144,
"DEPRECATED145": DEPRECATED145,
"DEPRECATED146": DEPRECATED146,
"DEPRECATED147": DEPRECATED147,
"DEPRECATED148": DEPRECATED148,
"DEPRECATED149": DEPRECATED149,
"DEPRECATED150": DEPRECATED150,
"DEPRECATED151": DEPRECATED151,
"DEPRECATED152": DEPRECATED152,
"DEPRECATED153": DEPRECATED153,
"DEPRECATED154": DEPRECATED154,
"DEPRECATED155": DEPRECATED155,
"DEPRECATED156": DEPRECATED156,
"DEPRECATED157": DEPRECATED157,
"DEPRECATED158": DEPRECATED158,
"DEPRECATED159": DEPRECATED159,
"DEPRECATED160": DEPRECATED160,
"DEPRECATED161": DEPRECATED161,
"DEPRECATED162": DEPRECATED162,
"DEPRECATED163": DEPRECATED163,
"DEPRECATED164": DEPRECATED164,
"DEPRECATED165": DEPRECATED165,
"DEPRECATED166": DEPRECATED166,
"DEPRECATED167": DEPRECATED167,
"DEPRECATED168": DEPRECATED168,
"DEPRECATED169": DEPRECATED169,
"DEPRECATED170": DEPRECATED170,
"DEPRECATED171": DEPRECATED171,
"DEPRECATED172": DEPRECATED172,
"DEPRECATED173": DEPRECATED173,
"DEPRECATED174": DEPRECATED174,
"DEPRECATED175": DEPRECATED175,
"DEPRECATED176": DEPRECATED176,
"DEPRECATED177": DEPRECATED177,
"DEPRECATED178": DEPRECATED178,
"DEPRECATED179": DEPRECATED179,
"DEPRECATED180": DEPRECATED180,
"DEPRECATED181": DEPRECATED181,
"DEPRECATED182": DEPRECATED182,
"DEPRECATED183": DEPRECATED183,
"DEPRECATED184": DEPRECATED184,
"DEPRECATED185": DEPRECATED185,
"DEPRECATED186": DEPRECATED186,
"DEPRECATED187": DEPRECATED187,
"DEPRECATED188": DEPRECATED188,
"DEPRECATED189": DEPRECATED189,
"DEPRECATED190": DEPRECATED190,
"DEPRECATED191": DEPRECATED191,
"DEPRECATED192": DEPRECATED192,
"DEPRECATED193": DEPRECATED193,
"DEPRECATED194": DEPRECATED194,
"DEPRECATED195": DEPRECATED195,
"DEPRECATED196": DEPRECATED196,
"DEPRECATED197": DEPRECATED197,
"DEPRECATED198": DEPRECATED198,
"DEPRECATED199": DEPRECATED199,
"DEPRECATED200": DEPRECATED200,
"DEPRECATED201": DEPRECATED201,
"DEPRECATED202": DEPRECATED202,
"DEPRECATED203": DEPRECATED203,
"DEPRECATED204": DEPRECATED204,
"DEPRECATED205": DEPRECATED205,
"DEPRECATED206": DEPRECATED206,
"DEPRECATED207": DEPRECATED207,
"DEPRECATED208": DEPRECATED208,
"DEPRECATED209": DEPRECATED209,
"DEPRECATED210": DEPRECATED210,
"DEPRECATED211": DEPRECATED211,
"DEPRECATED212": DEPRECATED212,
"DEPRECATED213": DEPRECATED213,
"DEPRECATED214": DEPRECATED214,
"DEPRECATED215": DEPRECATED215,
"DEPRECATED216": DEPRECATED216,
"DEPRECATED217": DEPRECATED217,
"DEPRECATED218": DEPRECATED218,
"DEPRECATED219": DEPRECATED219,
"DEPRECATED220": DEPRECATED220,
"DEPRECATED221": DEPRECATED221,
"DEPRECATED222": DEPRECATED222,
"DEPRECATED223": DEPRECATED223,
"DEPRECATED224": DEPRECATED224,
"DEPRECATED225": DEPRECATED225,
"DEPRECATED226": DEPRECATED226,
"DEPRECATED227": DEPRECATED227,
"DEPRECATED228": DEPRECATED228,
"DEPRECATED229": DEPRECATED229,
"DEPRECATED230": DEPRECATED230,
"DEPRECATED231": DEPRECATED231,
"DEPRECATED232": DEPRECATED232,
"DEPRECATED233": DEPRECATED233,
"DEPRECATED234": DEPRECATED234,
"DEPRECATED235": DEPRECATED235,
"DEPRECATED236": DEPRECATED236,
"DEPRECATED237": DEPRECATED237,
"DEPRECATED238": DEPRECATED238,
"DEPRECATED239": DEPRECATED239,
"DEPRECATED240": DEPRECATED240,
"DEPRECATED241": DEPRECATED241,
"DEPRECATED242": DEPRECATED242,
"DEPRECATED243": DEPRECATED243,
"DEPRECATED244": DEPRECATED244,
"DEPRECATED245": DEPRECATED245,
"DEPRECATED246": DEPRECATED246,
"DEPRECATED247": DEPRECATED247,
"DEPRECATED248": DEPRECATED248,
"DEPRECATED249": DEPRECATED249,
"DEPRECATED250": DEPRECATED250,
"DEPRECATED251": DEPRECATED251,
"DEPRECATED252": DEPRECATED252,
"DEPRECATED253": DEPRECATED253,
"DEPRECATED254": DEPRECATED254,
"DEPRECATED255": DEPRECATED255,
"DEPRECATED256": DEPRECATED256,
"DEPRECATED257": DEPRECATED257,
"DEPRECATED258": DEPRECATED258,
"DEPRECATED259": DEPRECATED259,
"DEPRECATED260": DEPRECATED260,
"DEPRECATED261": DEPRECATED261,
"DEPRECATED262": DEPRECATED262,
"DEPRECATED263": DEPRECATED263,
"DEPRECATED264": DEPRECATED264,
"DEPRECATED265": DEPRECATED265,
"DEPRECATED266": DEPRECATED266,
"DEPRECATED267": DEPRECATED267,
"DEPRECATED268": DEPRECATED268,
"DEPRECATED269": DEPRECATED269,
"DEPRECATED270": DEPRECATED270,
"DEPRECATED271": DEPRECATED271,
"DEPRECATED272": DEPRECATED272,
"DEPRECATED273": DEPRECATED273,
"DEPRECATED274": DEPRECATED274,
"DEPRECATED275": DEPRECATED275,
"DEPRECATED276": DEPRECATED276,
"PERSON": PERSON,
"NORP": NORP,

View File

@ -9,22 +9,52 @@ def i_has(en_tokenizer):
return doc
def test_token_morph_id(i_has):
assert i_has[0].morph.id
assert i_has[1].morph.id != 0
assert i_has[0].morph.id != i_has[1].morph.id
def test_token_morph_eq(i_has):
assert i_has[0].morph is not i_has[0].morph
assert i_has[0].morph == i_has[0].morph
assert i_has[0].morph != i_has[1].morph
def test_token_morph_key(i_has):
assert i_has[0].morph.key != 0
assert i_has[1].morph.key != 0
assert i_has[0].morph.key == i_has[0].morph.key
assert i_has[0].morph.key != i_has[1].morph.key
def test_morph_props(i_has):
assert i_has[0].morph.pron_type == i_has.vocab.strings["PronType_prs"]
assert i_has[0].morph.pron_type_ == "PronType_prs"
assert i_has[1].morph.pron_type == 0
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
assert i_has[1].morph.get("PronType") == []
def test_morph_iter(i_has):
assert list(i_has[0].morph) == ["PronType_prs"]
assert list(i_has[1].morph) == ["Number_sing", "Person_three", "VerbForm_fin"]
assert set(i_has[0].morph) == set(["PronType=prs"])
assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"])
def test_morph_get(i_has):
assert i_has[0].morph.get("pron_type") == "PronType_prs"
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
def test_morph_set(i_has):
assert i_has[0].morph.get("PronType") == ["PronType=prs"]
# set by string
i_has[0].morph_ = "PronType=unk"
assert i_has[0].morph.get("PronType") == ["PronType=unk"]
# set by string, fields are alphabetized
i_has[0].morph_ = "PronType=123|NounType=unk"
assert i_has[0].morph_ == "NounType=unk|PronType=123"
# set by dict
i_has[0].morph_ = {"AType": "123", "BType": "unk", "POS": "ADJ"}
assert i_has[0].morph_ == "AType=123|BType=unk|POS=ADJ"
# set by string with multiple values, fields and values are alphabetized
i_has[0].morph_ = "BType=c|AType=b,a"
assert i_has[0].morph_ == "AType=a,b|BType=c"
# set by dict with multiple values, fields and values are alphabetized
i_has[0].morph_ = {"AType": "b,a", "BType": "c"}
assert i_has[0].morph_ == "AType=a,b|BType=c"
def test_morph_str(i_has):
assert str(i_has[0].morph) == "PronType=prs"
assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin"

View File

@ -0,0 +1,26 @@
import pytest
from spacy.morphology import Morphology
def test_feats_converters():
feats = "Case=dat,gen|Number=sing"
feats_dict = {"Case": "dat,gen", "Number": "sing"}
feats_list = feats.split(Morphology.FEATURE_SEP)
# simple conversions
assert Morphology.list_to_feats(feats_list) == feats
assert Morphology.dict_to_feats(feats_dict) == feats
assert Morphology.feats_to_dict(feats) == feats_dict
# roundtrips
assert Morphology.dict_to_feats(Morphology.feats_to_dict(feats)) == feats
assert Morphology.feats_to_dict(Morphology.dict_to_feats(feats_dict)) == feats_dict
# unsorted input is normalized
unsorted_feats = "Number=sing|Case=gen,dat"
unsorted_feats_dict = {"Case": "gen,dat", "Number": "sing"}
unsorted_feats_list = feats.split(Morphology.FEATURE_SEP)
assert Morphology.feats_to_dict(unsorted_feats) == feats_dict
assert Morphology.dict_to_feats(unsorted_feats_dict) == feats
assert Morphology.list_to_feats(unsorted_feats_list) == feats
assert Morphology.dict_to_feats(Morphology.feats_to_dict(unsorted_feats)) == feats

View File

@ -16,32 +16,30 @@ def test_init(morphology):
def test_add_morphology_with_string_names(morphology):
morphology.add({"Case_gen", "Number_sing"})
morphology.add({"Case": "gen", "Number": "sing"})
def test_add_morphology_with_int_ids(morphology):
morphology.add({get_string_id("Case_gen"), get_string_id("Number_sing")})
morphology.strings.add("Case")
morphology.strings.add("gen")
morphology.strings.add("Number")
morphology.strings.add("sing")
morphology.add({get_string_id("Case"): get_string_id("gen"), get_string_id("Number"): get_string_id("sing")})
def test_add_morphology_with_mix_strings_and_ints(morphology):
morphology.add({get_string_id("PunctSide_ini"), "VerbType_aux"})
morphology.strings.add("PunctSide")
morphology.strings.add("ini")
morphology.add({get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"})
def test_morphology_tags_hash_distinctly(morphology):
tag1 = morphology.add({"PunctSide_ini", "VerbType_aux"})
tag2 = morphology.add({"Case_gen", "Number_sing"})
tag1 = morphology.add({"PunctSide": "ini", "VerbType": "aux"})
tag2 = morphology.add({"Case": "gen", "Number": "sing"})
assert tag1 != tag2
def test_morphology_tags_hash_independent_of_order(morphology):
tag1 = morphology.add({"Case_gen", "Number_sing"})
tag2 = morphology.add({"Number_sing", "Case_gen"})
tag1 = morphology.add({"Case": "gen", "Number": "sing"})
tag2 = morphology.add({"Number": "sing", "Case": "gen"})
assert tag1 == tag2
def test_update_morphology_tag(morphology):
tag1 = morphology.add({"Case_gen"})
tag2 = morphology.update(tag1, {"Number_sing"})
assert tag1 != tag2
tag3 = morphology.add({"Number_sing", "Case_gen"})
assert tag2 == tag3

View File

@ -2,7 +2,7 @@ import pytest
import random
from spacy.matcher import Matcher
from spacy.attrs import IS_PUNCT, ORTH, LOWER
from spacy.symbols import POS, VERB, VerbForm_inf
from spacy.symbols import POS, VERB
from spacy.vocab import Vocab
from spacy.language import Language
from spacy.lemmatizer import Lemmatizer
@ -164,7 +164,7 @@ def test_issue590(en_vocab):
def test_issue595():
"""Test lemmatization of base forms"""
words = ["Do", "n't", "feed", "the", "dog"]
tag_map = {"VB": {POS: VERB, VerbForm_inf: True}}
tag_map = {"VB": {POS: VERB, "VerbForm": "inf"}}
lookups = Lookups()
lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]})
lookups.add_table("lemma_index", {"verb": {}})

View File

@ -8,7 +8,7 @@ from spacy.matcher import Matcher
from spacy.tokenizer import Tokenizer
from spacy.lemmatizer import Lemmatizer
from spacy.lookups import Lookups
from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
from spacy.symbols import ORTH, LEMMA, POS, VERB
def test_issue1061():
@ -88,7 +88,7 @@ def test_issue1375():
def test_issue1387():
tag_map = {"VBG": {POS: VERB, VerbForm_part: True}}
tag_map = {"VBG": {POS: VERB, "VerbForm": "part"}}
lookups = Lookups()
lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
lookups.add_table("lemma_exc", {"verb": {"coping": ("cope",)}})

View File

@ -2,5 +2,6 @@ from .doc import Doc
from .token import Token
from .span import Span
from ._serialize import DocBin
from .morphanalysis import MorphAnalysis
__all__ = ["Doc", "Token", "Span", "DocBin"]
__all__ = ["Doc", "Token", "Span", "DocBin", "MorphAnalysis"]

View File

@ -5,5 +5,5 @@ from ..structs cimport MorphAnalysisC
cdef class MorphAnalysis:
cdef readonly Vocab vocab
cdef hash_t key
cdef readonly hash_t key
cdef MorphAnalysisC c

View File

@ -1,15 +1,14 @@
from libc.string cimport memset
cimport numpy as np
from ..vocab cimport Vocab
from ..typedefs cimport hash_t, attr_t
from ..morphology cimport list_features, check_feature, get_field, tag_to_json
from ..strings import get_string_id
from ..morphology cimport list_features, check_feature, get_by_field
cdef class MorphAnalysis:
"""Control access to morphological features for a token."""
def __init__(self, Vocab vocab, features=tuple()):
def __init__(self, Vocab vocab, features=dict()):
self.vocab = vocab
self.key = self.vocab.morphology.add(features)
analysis = <const MorphAnalysisC*>self.vocab.morphology.tags.get(self.key)
@ -33,7 +32,7 @@ cdef class MorphAnalysis:
def __contains__(self, feature):
"""Test whether the morphological analysis contains some feature."""
cdef attr_t feat_id = get_string_id(feature)
cdef attr_t feat_id = self.vocab.strings.as_int(feature)
return check_feature(&self.c, feat_id)
def __iter__(self):
@ -55,369 +54,28 @@ cdef class MorphAnalysis:
def __hash__(self):
return self.key
def get(self, unicode field):
def __eq__(self, other):
return self.key == other.key
def __ne__(self, other):
return self.key != other.key
def get(self, field):
"""Retrieve a feature by field."""
cdef int field_id = self.vocab.morphology._feat_map.attr2field[field]
return self.vocab.strings[get_field(&self.c, field_id)]
cdef attr_t field_id = self.vocab.strings.as_int(field)
cdef np.ndarray results = get_by_field(&self.c, field_id)
return [self.vocab.strings[result] for result in results]
def to_json(self):
"""Produce a json serializable representation, which will be a list of
strings.
"""Produce a json serializable representation as a UD FEATS-style
string.
"""
return tag_to_json(&self.c)
@property
def is_base_form(self):
raise NotImplementedError
@property
def pos(self):
return self.c.pos
@property
def pos_(self):
return self.vocab.strings[self.c.pos]
property id:
def __get__(self):
return self.key
property abbr:
def __get__(self):
return self.c.abbr
property adp_type:
def __get__(self):
return self.c.adp_type
property adv_type:
def __get__(self):
return self.c.adv_type
property animacy:
def __get__(self):
return self.c.animacy
property aspect:
def __get__(self):
return self.c.aspect
property case:
def __get__(self):
return self.c.case
property conj_type:
def __get__(self):
return self.c.conj_type
property connegative:
def __get__(self):
return self.c.connegative
property definite:
def __get__(self):
return self.c.definite
property degree:
def __get__(self):
return self.c.degree
property derivation:
def __get__(self):
return self.c.derivation
property echo:
def __get__(self):
return self.c.echo
property foreign:
def __get__(self):
return self.c.foreign
property gender:
def __get__(self):
return self.c.gender
property hyph:
def __get__(self):
return self.c.hyph
property inf_form:
def __get__(self):
return self.c.inf_form
property mood:
def __get__(self):
return self.c.mood
property name_type:
def __get__(self):
return self.c.name_type
property negative:
def __get__(self):
return self.c.negative
property noun_type:
def __get__(self):
return self.c.noun_type
property number:
def __get__(self):
return self.c.number
property num_form:
def __get__(self):
return self.c.num_form
property num_type:
def __get__(self):
return self.c.num_type
property num_value:
def __get__(self):
return self.c.num_value
property part_form:
def __get__(self):
return self.c.part_form
property part_type:
def __get__(self):
return self.c.part_type
property person:
def __get__(self):
return self.c.person
property polite:
def __get__(self):
return self.c.polite
property polarity:
def __get__(self):
return self.c.polarity
property poss:
def __get__(self):
return self.c.poss
property prefix:
def __get__(self):
return self.c.prefix
property prep_case:
def __get__(self):
return self.c.prep_case
property pron_type:
def __get__(self):
return self.c.pron_type
property punct_side:
def __get__(self):
return self.c.punct_side
property punct_type:
def __get__(self):
return self.c.punct_type
property reflex:
def __get__(self):
return self.c.reflex
property style:
def __get__(self):
return self.c.style
property style_variant:
def __get__(self):
return self.c.style_variant
property tense:
def __get__(self):
return self.c.tense
property typo:
def __get__(self):
return self.c.typo
property verb_form:
def __get__(self):
return self.c.verb_form
property voice:
def __get__(self):
return self.c.voice
property verb_type:
def __get__(self):
return self.c.verb_type
property abbr_:
def __get__(self):
return self.vocab.strings[self.c.abbr]
property adp_type_:
def __get__(self):
return self.vocab.strings[self.c.adp_type]
property adv_type_:
def __get__(self):
return self.vocab.strings[self.c.adv_type]
property animacy_:
def __get__(self):
return self.vocab.strings[self.c.animacy]
property aspect_:
def __get__(self):
return self.vocab.strings[self.c.aspect]
property case_:
def __get__(self):
return self.vocab.strings[self.c.case]
property conj_type_:
def __get__(self):
return self.vocab.strings[self.c.conj_type]
property connegative_:
def __get__(self):
return self.vocab.strings[self.c.connegative]
property definite_:
def __get__(self):
return self.vocab.strings[self.c.definite]
property degree_:
def __get__(self):
return self.vocab.strings[self.c.degree]
property derivation_:
def __get__(self):
return self.vocab.strings[self.c.derivation]
property echo_:
def __get__(self):
return self.vocab.strings[self.c.echo]
property foreign_:
def __get__(self):
return self.vocab.strings[self.c.foreign]
property gender_:
def __get__(self):
return self.vocab.strings[self.c.gender]
property hyph_:
def __get__(self):
return self.vocab.strings[self.c.hyph]
property inf_form_:
def __get__(self):
return self.vocab.strings[self.c.inf_form]
property name_type_:
def __get__(self):
return self.vocab.strings[self.c.name_type]
property negative_:
def __get__(self):
return self.vocab.strings[self.c.negative]
property mood_:
def __get__(self):
return self.vocab.strings[self.c.mood]
property number_:
def __get__(self):
return self.vocab.strings[self.c.number]
property num_form_:
def __get__(self):
return self.vocab.strings[self.c.num_form]
property num_type_:
def __get__(self):
return self.vocab.strings[self.c.num_type]
property num_value_:
def __get__(self):
return self.vocab.strings[self.c.num_value]
property part_form_:
def __get__(self):
return self.vocab.strings[self.c.part_form]
property part_type_:
def __get__(self):
return self.vocab.strings[self.c.part_type]
property person_:
def __get__(self):
return self.vocab.strings[self.c.person]
property polite_:
def __get__(self):
return self.vocab.strings[self.c.polite]
property polarity_:
def __get__(self):
return self.vocab.strings[self.c.polarity]
property poss_:
def __get__(self):
return self.vocab.strings[self.c.poss]
property prefix_:
def __get__(self):
return self.vocab.strings[self.c.prefix]
property prep_case_:
def __get__(self):
return self.vocab.strings[self.c.prep_case]
property pron_type_:
def __get__(self):
return self.vocab.strings[self.c.pron_type]
property punct_side_:
def __get__(self):
return self.vocab.strings[self.c.punct_side]
property punct_type_:
def __get__(self):
return self.vocab.strings[self.c.punct_type]
property reflex_:
def __get__(self):
return self.vocab.strings[self.c.reflex]
property style_:
def __get__(self):
return self.vocab.strings[self.c.style]
property style_variant_:
def __get__(self):
return self.vocab.strings[self.c.style_variant]
property tense_:
def __get__(self):
return self.vocab.strings[self.c.tense]
property typo_:
def __get__(self):
return self.vocab.strings[self.c.typo]
property verb_form_:
def __get__(self):
return self.vocab.strings[self.c.verb_form]
property voice_:
def __get__(self):
return self.vocab.strings[self.c.voice]
property verb_type_:
def __get__(self):
return self.vocab.strings[self.c.verb_type]
morph_string = self.vocab.strings[self.c.key]
if morph_string == self.vocab.morphology.EMPTY_MORPH:
return ""
return morph_string
def to_dict(self):
"""Produce a dict representation.
"""
return self.vocab.morphology.feats_to_dict(self.to_json())

View File

@ -217,6 +217,14 @@ cdef class Token:
def morph(self):
return MorphAnalysis.from_id(self.vocab, self.c.morph)
property morph_:
def __get__(self):
return str(MorphAnalysis.from_id(self.vocab, self.c.morph))
def __set__(self, features):
cdef hash_t key = self.vocab.morphology.add(features)
self.c.morph = key
@property
def lex_id(self):
"""RETURNS (int): Sequential ID of the token's lexical type."""