diff --git a/spacy/lang/ur/tag_map.py b/spacy/lang/ur/tag_map.py index 2499d7e3e..eebd3a14a 100644 --- a/spacy/lang/ur/tag_map.py +++ b/spacy/lang/ur/tag_map.py @@ -1,66 +1,94 @@ # coding: utf8 from __future__ import unicode_literals +from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX, SCONJ from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB -from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON TAG_MAP = { - ".": {POS: PUNCT, "PunctType": "peri"}, - ",": {POS: PUNCT, "PunctType": "comm"}, - "-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"}, - "-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"}, - "``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"}, - '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, - "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, + "JJ-Ez": {POS: ADJ}, + "INJC": {POS: X}, + "QFC": {POS: DET}, + "UNK": {POS: X}, + "NSTC": {POS: ADV}, + "NST": {POS: ADV}, + "VMC": {POS: VERB}, + "PRPC": {POS: PRON}, + "RBC": {POS: ADV}, + "PSPC": {POS: ADP}, + "INJ": {POS: X}, + "JJZ": {POS: ADJ}, + "CCC": {POS: SCONJ}, + "NN-Ez": {POS: NOUN}, + "ECH": {POS: NOUN}, + "WQ": {POS: DET}, + "RDP": {POS: ADJ}, + "JJC": {POS: ADJ}, + "NEG": {POS: PART}, + "NNZ": {POS: NOUN}, + "QO": {POS: ADJ}, + "INTFC": {POS: ADV}, + "INTF": {POS: ADV}, + "NFC": {POS: ADP}, + "QCC": {POS: NUM}, + "QC": {POS: NUM}, + "QF": {POS: DET}, + "VAUX": {POS: AUX}, + "VM": {POS: VERB}, + "DEM": {POS: DET}, + "NNPC": {POS: PROPN}, + "NNC": {POS: NOUN}, + "PSP": {POS: ADP}, + + ".": {POS: PUNCT}, + ",": {POS: PUNCT}, + "-LRB-": {POS: PUNCT}, + "-RRB-": {POS: PUNCT}, + "``": {POS: PUNCT}, + '""': {POS: PUNCT}, + "''": {POS: PUNCT}, ":": {POS: PUNCT}, - "$": {POS: SYM, "Other": {"SymType": "currency"}}, - "#": {POS: SYM, "Other": {"SymType": "numbersign"}}, - "AFX": {POS: ADJ, "Hyph": "yes"}, - "CC": {POS: CCONJ, "ConjType": "coor"}, - "CD": {POS: NUM, "NumType": "card"}, + "$": {POS: SYM}, + "#": {POS: SYM}, + "AFX": {POS: ADJ}, + "CC": {POS: CCONJ}, + "CD": {POS: NUM}, "DT": {POS: DET}, - "EX": {POS: ADV, "AdvType": "ex"}, - "FW": {POS: X, "Foreign": "yes"}, - "HYPH": {POS: PUNCT, "PunctType": "dash"}, + "EX": {POS: ADV}, + "FW": {POS: X}, + "HYPH": {POS: PUNCT}, "IN": {POS: ADP}, - "JJ": {POS: ADJ, "Degree": "pos"}, - "JJR": {POS: ADJ, "Degree": "comp"}, - "JJS": {POS: ADJ, "Degree": "sup"}, - "LS": {POS: PUNCT, "NumType": "ord"}, - "MD": {POS: VERB, "VerbType": "mod"}, + "JJ": {POS: ADJ}, + "JJR": {POS: ADJ}, + "JJS": {POS: ADJ}, + "LS": {POS: PUNCT}, + "MD": {POS: VERB}, "NIL": {POS: ""}, - "NN": {POS: NOUN, "Number": "sing"}, - "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"}, - "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"}, - "NNS": {POS: NOUN, "Number": "plur"}, - "PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"}, - "POS": {POS: PART, "Poss": "yes"}, - "PRP": {POS: PRON, "PronType": "prs"}, - "PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"}, - "RB": {POS: ADV, "Degree": "pos"}, - "RBR": {POS: ADV, "Degree": "comp"}, - "RBS": {POS: ADV, "Degree": "sup"}, + "NN": {POS: NOUN}, + "NNP": {POS: PROPN}, + "NNPS": {POS: PROPN}, + "NNS": {POS: NOUN}, + "PDT": {POS: ADJ}, + "POS": {POS: PART}, + "PRP": {POS: PRON}, + "PRP$": {POS: ADJ}, + "RB": {POS: ADV}, + "RBR": {POS: ADV}, + "RBS": {POS: ADV}, "RP": {POS: PART}, "SP": {POS: SPACE}, "SYM": {POS: SYM}, - "TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"}, + "TO": {POS: PART}, "UH": {POS: INTJ}, - "VB": {POS: VERB, "VerbForm": "inf"}, - "VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"}, - "VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"}, - "VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"}, - "VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"}, - "VBZ": { - POS: VERB, - "VerbForm": "fin", - "Tense": "pres", - "Number": "sing", - "Person": 3, - }, - "WDT": {POS: ADJ, "PronType": "int|rel"}, - "WP": {POS: NOUN, "PronType": "int|rel"}, - "WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"}, - "WRB": {POS: ADV, "PronType": "int|rel"}, + "VB": {POS: VERB}, + "VBD": {POS: VERB}, + "VBG": {POS: VERB}, + "VBN": {POS: VERB}, + "VBP": {POS: VERB}, + "VBZ": {POS: VERB}, + "WDT": {POS: ADJ}, + "WP": {POS: NOUN}, + "WP$": {POS: ADJ}, + "WRB": {POS: ADV}, "ADD": {POS: X}, "NFP": {POS: PUNCT}, "GW": {POS: X},