From ec44bee32102187e44ff5ce649ea8bf357bd1442 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 8 Dec 2016 12:00:54 +0100 Subject: [PATCH] Fix capitalization on morphological features --- spacy/en/language_data.py | 84 +++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/spacy/en/language_data.py b/spacy/en/language_data.py index 543364170..b5bc125e4 100644 --- a/spacy/en/language_data.py +++ b/spacy/en/language_data.py @@ -10,55 +10,55 @@ PRON_LEMMA = "-PRON-" TAG_MAP = { - ".": {POS: PUNCT, "puncttype": "peri"}, - ",": {POS: PUNCT, "puncttype": "comm"}, - "-LRB-": {POS: PUNCT, "puncttype": "brck", "punctside": "ini"}, - "-RRB-": {POS: PUNCT, "puncttype": "brck", "punctside": "fin"}, - "``": {POS: PUNCT, "puncttype": "quot", "punctside": "ini"}, - "\"\"": {POS: PUNCT, "puncttype": "quot", "punctside": "fin"}, - "''": {POS: PUNCT, "puncttype": "quot", "punctside": "fin"}, + ".": {POS: PUNCT, "PunctType": "peri"}, + ",": {POS: PUNCT, "PunctType": "comm"}, + "-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"}, + "-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"}, + "``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"}, + "\"\"": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, + "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, ":": {POS: PUNCT}, - "$": {POS: SYM, "other": {"symtype": "currency"}}, - "#": {POS: SYM, "other": {"symtype": "numbersign"}}, - "AFX": {POS: ADJ, "hyph": "hyph"}, - "CC": {POS: CONJ, "conjtype": "coor"}, - "CD": {POS: NUM, "numtype": "card"}, + "$": {POS: SYM, "Other": {"SymType": "currency"}}, + "#": {POS: SYM, "Other": {"SymType": "numbersign"}}, + "AFX": {POS: ADJ, "Hyph": "yes"}, + "CC": {POS: CONJ, "ConjType": "coor"}, + "CD": {POS: NUM, "NumType": "card"}, "DT": {POS: DET}, - "EX": {POS: ADV, "advtype": "ex"}, - "FW": {POS: X, "foreign": "foreign"}, - "HYPH": {POS: PUNCT, "puncttype": "dash"}, + "EX": {POS: ADV, "AdvType": "ex"}, + "FW": {POS: X, "Foreign": "yes"}, + "HYPH": {POS: PUNCT, "PunctType": "dash"}, "IN": {POS: ADP}, - "JJ": {POS: ADJ, "degree": "pos"}, - "JJR": {POS: ADJ, "degree": "comp"}, - "JJS": {POS: ADJ, "degree": "sup"}, - "LS": {POS: PUNCT, "numtype": "ord"}, - "MD": {POS: VERB, "verbtype": "mod"}, + "JJ": {POS: ADJ, "Degree": "pos"}, + "JJR": {POS: ADJ, "Degree": "comp"}, + "JJS": {POS: ADJ, "Degree": "sup"}, + "LS": {POS: PUNCT, "NumType": "ord"}, + "MD": {POS: VERB, "VerbType": "mod"}, "NIL": {POS: ""}, - "NN": {POS: NOUN, "number": "sing"}, - "NNP": {POS: PROPN, "nountype": "prop", "number": "sing"}, - "NNPS": {POS: PROPN, "nountype": "prop", "number": "plur"}, - "NNS": {POS: NOUN, "number": "plur"}, - "PDT": {POS: ADJ, "adjtype": "pdt", "prontype": "prn"}, - "POS": {POS: PART, "poss": "poss"}, - "PRP": {POS: PRON, "prontype": "prs"}, - "PRP$": {POS: ADJ, "prontype": "prs", "poss": "poss"}, - "RB": {POS: ADV, "degree": "pos"}, - "RBR": {POS: ADV, "degree": "comp"}, - "RBS": {POS: ADV, "degree": "sup"}, + "NN": {POS: NOUN, "Number": "sing"}, + "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"}, + "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"}, + "NNS": {POS: NOUN, "Number": "plur"}, + "PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"}, + "POS": {POS: PART, "Poss": "yes"}, + "PRP": {POS: PRON, "PronType": "prs"}, + "PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"}, + "RB": {POS: ADV, "Degree": "pos"}, + "RBR": {POS: ADV, "Degree": "comp"}, + "RBS": {POS: ADV, "Degree": "sup"}, "RP": {POS: PART}, "SYM": {POS: SYM}, - "TO": {POS: PART, "parttype": "inf", "verbform": "inf"}, + "TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"}, "UH": {POS: INTJ}, - "VB": {POS: VERB, "verbform": "inf"}, - "VBD": {POS: VERB, "verbform": "fin", "tense": "past"}, - "VBG": {POS: VERB, "verbform": "part", "tense": "pres", "aspect": "prog"}, - "VBN": {POS: VERB, "verbform": "part", "tense": "past", "aspect": "perf"}, - "VBP": {POS: VERB, "verbform": "fin", "tense": "pres"}, - "VBZ": {POS: VERB, "verbform": "fin", "tense": "pres", "number": "sing", "person": 3}, - "WDT": {POS: ADJ, "prontype": "int|rel"}, - "WP": {POS: NOUN, "prontype": "int|rel"}, - "WP$": {POS: ADJ, "poss": "poss", "prontype": "int|rel"}, - "WRB": {POS: ADV, "prontype": "int|rel"}, + "VB": {POS: VERB, "VerbForm": "inf"}, + "VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"}, + "VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"}, + "VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"}, + "VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"}, + "VBZ": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 3}, + "WDT": {POS: ADJ, "PronType": "int|rel"}, + "WP": {POS: NOUN, "PronType": "int|rel"}, + "WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"}, + "WRB": {POS: ADV, "PronType": "int|rel"}, "SP": {POS: SPACE}, "ADD": {POS: X}, "NFP": {POS: PUNCT},