Fix inconsistencies in generate_specials.py

Re Issue #321, fix inconsistencies in the script that generates specials.json. The result still isn't so satisfying --- we need to revise this as we move to parse more morphologically rich languages.
This commit is contained in:
Matthew Honnibal 2016-04-07 11:21:52 +10:00
parent 357e2aaece
commit 85485f5c2b
1 changed files with 30 additions and 31 deletions

View File

@ -11,49 +11,48 @@ token_properties = {
"are": {"L": "be", "pos": "VBP", "number": 2}, "are": {"L": "be", "pos": "VBP", "number": 2},
"ca": {"L": "can", "pos": "MD"}, "ca": {"L": "can", "pos": "MD"},
"can": {"L": "can", "pos": "MD"}, "can": {"L": "can", "pos": "MD"},
"could": {"pos": "MD"}, # no lemma for could? "could": {"pos": "MD", "L": "could"},
"'d": {"L": "would", "pos": "MD"}, "'d": {"L": "would", "pos": "MD"},
"did": {"L": "do", "pos": "VBD"}, "did": {"L": "do", "pos": "VBD"},
"do": {"L": "do"}, # no POS for do? "do": {"L": "do"},
"does": {"L": "do", "pos": "VBZ"}, "does": {"L": "do", "pos": "VBZ"},
"had": {"L": "have", "pos": "VBD"}, "had": {"L": "have", "pos": "VBD"},
"has": {}, # no POS or lemma for has? "has": {"L": "have", "pos": "VBZ"},
"have": {"pos": "VB"}, # no lemma for have? "have": {"pos": "VB"},
"he": {"L": "-PRON-"}, # no POS for he? "he": {"L": "-PRON-", "pos": "PRP"},
"how": {}, # no POS or lemma for how? "how": {},
"i": {"L": "-PRON-"}, # no POS for i? "i": {"L": "-PRON-", "pos": "PRP"},
"is": {"L": "be", "pos": "VBZ"}, "is": {"L": "be", "pos": "VBZ"},
"it": {"L": "-PRON-"}, # no POS for it? "it": {"L": "-PRON-", "pos": "PRP"},
"'ll": {"L": "will", "pos": "MD"}, "'ll": {"L": "will", "pos": "MD"},
"'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1}, "'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1},
"'ma": {}, # no POS or lemma for ma? "'ma": {},
"might": {}, # no POS or lemma for might? "might": {},
"must": {}, # no POS or lemma for must? "must": {},
"need": {}, # no POS or lemma for need? "need": {},
"not": {"L": "not", "pos": "RB"}, "not": {"L": "not", "pos": "RB"},
"'nt": {"L": "not", "pos": "RB"}, "'nt": {"L": "not", "pos": "RB"},
"n't": {"L": "not", "pos": "RB"}, "n't": {"L": "not", "pos": "RB"},
"'re": {}, # no POS or lemma for re? "'re": {"L": "be", "pos": "VBZ"},
"'s": {}, # no POS or lemma for s? "'s": {}, # no POS or lemma for s?
"sha": {}, # no POS or lemma for sha? "sha": {"L": "shall", "pos": "MD"},
"she": {"L": "-PRON-"}, # no POS for she? "she": {"L": "-PRON-", "pos": "PRP"},
"should": {}, # no POS or lemma for should? "should": {},
"that": {}, # no POS or lemma for that? "that": {},
"there": {}, # no POS or lemma for there? "there": {},
"they": {"L": "-PRON-"}, # no POS for they? "they": {"L": "-PRON-", "pos": "PRP"},
"was": {}, # no POS or lemma for was? "was": {},
"we": {}, # no POS or lemma for we? "we": {"L": "-PRON-", "pos": "PRP"},
"were": {}, # no POS or lemma for were? "were": {},
"what": {}, # no POS or lemma for what? "what": {},
"when": {}, # no POS or lemma for when? "when": {},
"where": {}, # no POS or lemma for where? "where": {},
"who": {}, # no POS or lemma for who? "who": {},
"why": {}, # no POS or lemma for why? "why": {},
"wo": {}, # no POS or lemma for wo? "wo": {},
"would": {}, # no POS or lemma for would? "would": {},
"you": {"L": "-PRON-"}, # no POS or lemma for you? "you": {"L": "-PRON-", "pos": "PRP"},
"'ve": {"L": "have", "pos": "VB"} "'ve": {"L": "have", "pos": "VB"}
} }
# contains starting tokens with their potential contractions # contains starting tokens with their potential contractions