Merge pull request #5258 from mirfan899/master

This commit is contained in:
Ines Montani 2020-04-29 12:51:55 +02:00 committed by GitHub
commit eac47971f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 77 additions and 49 deletions

View File

@ -1,66 +1,94 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX, SCONJ
from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
TAG_MAP = { TAG_MAP = {
".": {POS: PUNCT, "PunctType": "peri"}, "JJ-Ez": {POS: ADJ},
",": {POS: PUNCT, "PunctType": "comm"}, "INJC": {POS: X},
"-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"}, "QFC": {POS: DET},
"-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"}, "UNK": {POS: X},
"``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"}, "NSTC": {POS: ADV},
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "NST": {POS: ADV},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "VMC": {POS: VERB},
"PRPC": {POS: PRON},
"RBC": {POS: ADV},
"PSPC": {POS: ADP},
"INJ": {POS: X},
"JJZ": {POS: ADJ},
"CCC": {POS: SCONJ},
"NN-Ez": {POS: NOUN},
"ECH": {POS: NOUN},
"WQ": {POS: DET},
"RDP": {POS: ADJ},
"JJC": {POS: ADJ},
"NEG": {POS: PART},
"NNZ": {POS: NOUN},
"QO": {POS: ADJ},
"INTFC": {POS: ADV},
"INTF": {POS: ADV},
"NFC": {POS: ADP},
"QCC": {POS: NUM},
"QC": {POS: NUM},
"QF": {POS: DET},
"VAUX": {POS: AUX},
"VM": {POS: VERB},
"DEM": {POS: DET},
"NNPC": {POS: PROPN},
"NNC": {POS: NOUN},
"PSP": {POS: ADP},
".": {POS: PUNCT},
",": {POS: PUNCT},
"-LRB-": {POS: PUNCT},
"-RRB-": {POS: PUNCT},
"``": {POS: PUNCT},
'""': {POS: PUNCT},
"''": {POS: PUNCT},
":": {POS: PUNCT}, ":": {POS: PUNCT},
"$": {POS: SYM, "Other": {"SymType": "currency"}}, "$": {POS: SYM},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}}, "#": {POS: SYM},
"AFX": {POS: ADJ, "Hyph": "yes"}, "AFX": {POS: ADJ},
"CC": {POS: CCONJ, "ConjType": "coor"}, "CC": {POS: CCONJ},
"CD": {POS: NUM, "NumType": "card"}, "CD": {POS: NUM},
"DT": {POS: DET}, "DT": {POS: DET},
"EX": {POS: ADV, "AdvType": "ex"}, "EX": {POS: ADV},
"FW": {POS: X, "Foreign": "yes"}, "FW": {POS: X},
"HYPH": {POS: PUNCT, "PunctType": "dash"}, "HYPH": {POS: PUNCT},
"IN": {POS: ADP}, "IN": {POS: ADP},
"JJ": {POS: ADJ, "Degree": "pos"}, "JJ": {POS: ADJ},
"JJR": {POS: ADJ, "Degree": "comp"}, "JJR": {POS: ADJ},
"JJS": {POS: ADJ, "Degree": "sup"}, "JJS": {POS: ADJ},
"LS": {POS: PUNCT, "NumType": "ord"}, "LS": {POS: PUNCT},
"MD": {POS: VERB, "VerbType": "mod"}, "MD": {POS: VERB},
"NIL": {POS: ""}, "NIL": {POS: ""},
"NN": {POS: NOUN, "Number": "sing"}, "NN": {POS: NOUN},
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"}, "NNP": {POS: PROPN},
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"}, "NNPS": {POS: PROPN},
"NNS": {POS: NOUN, "Number": "plur"}, "NNS": {POS: NOUN},
"PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"}, "PDT": {POS: ADJ},
"POS": {POS: PART, "Poss": "yes"}, "POS": {POS: PART},
"PRP": {POS: PRON, "PronType": "prs"}, "PRP": {POS: PRON},
"PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"}, "PRP$": {POS: ADJ},
"RB": {POS: ADV, "Degree": "pos"}, "RB": {POS: ADV},
"RBR": {POS: ADV, "Degree": "comp"}, "RBR": {POS: ADV},
"RBS": {POS: ADV, "Degree": "sup"}, "RBS": {POS: ADV},
"RP": {POS: PART}, "RP": {POS: PART},
"SP": {POS: SPACE}, "SP": {POS: SPACE},
"SYM": {POS: SYM}, "SYM": {POS: SYM},
"TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"}, "TO": {POS: PART},
"UH": {POS: INTJ}, "UH": {POS: INTJ},
"VB": {POS: VERB, "VerbForm": "inf"}, "VB": {POS: VERB},
"VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"}, "VBD": {POS: VERB},
"VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"}, "VBG": {POS: VERB},
"VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"}, "VBN": {POS: VERB},
"VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"}, "VBP": {POS: VERB},
"VBZ": { "VBZ": {POS: VERB},
POS: VERB, "WDT": {POS: ADJ},
"VerbForm": "fin", "WP": {POS: NOUN},
"Tense": "pres", "WP$": {POS: ADJ},
"Number": "sing", "WRB": {POS: ADV},
"Person": 3,
},
"WDT": {POS: ADJ, "PronType": "int|rel"},
"WP": {POS: NOUN, "PronType": "int|rel"},
"WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"},
"WRB": {POS: ADV, "PronType": "int|rel"},
"ADD": {POS: X}, "ADD": {POS: X},
"NFP": {POS: PUNCT}, "NFP": {POS: PUNCT},
"GW": {POS: X}, "GW": {POS: X},