2016-09-25 12:49:00 +00:00
|
|
|
|
# encoding: utf8
|
|
|
|
|
from __future__ import unicode_literals
|
2016-11-24 12:51:32 +00:00
|
|
|
|
import re
|
2016-09-25 12:49:00 +00:00
|
|
|
|
|
2016-12-07 19:29:52 +00:00
|
|
|
|
from ..symbols import *
|
2016-12-07 20:11:59 +00:00
|
|
|
|
from ..language_data import EMOTICONS
|
2016-09-25 12:49:00 +00:00
|
|
|
|
|
|
|
|
|
|
2016-12-07 19:29:52 +00:00
|
|
|
|
PRON_LEMMA = "-PRON-"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TAG_MAP = {
|
2016-12-08 11:00:54 +00:00
|
|
|
|
".": {POS: PUNCT, "PunctType": "peri"},
|
|
|
|
|
",": {POS: PUNCT, "PunctType": "comm"},
|
|
|
|
|
"-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"},
|
|
|
|
|
"-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"},
|
|
|
|
|
"``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"},
|
|
|
|
|
"\"\"": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
|
|
|
|
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
":": {POS: PUNCT},
|
2016-12-08 11:00:54 +00:00
|
|
|
|
"$": {POS: SYM, "Other": {"SymType": "currency"}},
|
|
|
|
|
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
|
|
|
|
|
"AFX": {POS: ADJ, "Hyph": "yes"},
|
|
|
|
|
"CC": {POS: CONJ, "ConjType": "coor"},
|
|
|
|
|
"CD": {POS: NUM, "NumType": "card"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
"DT": {POS: DET},
|
2016-12-08 11:00:54 +00:00
|
|
|
|
"EX": {POS: ADV, "AdvType": "ex"},
|
|
|
|
|
"FW": {POS: X, "Foreign": "yes"},
|
|
|
|
|
"HYPH": {POS: PUNCT, "PunctType": "dash"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
"IN": {POS: ADP},
|
2016-12-08 11:00:54 +00:00
|
|
|
|
"JJ": {POS: ADJ, "Degree": "pos"},
|
|
|
|
|
"JJR": {POS: ADJ, "Degree": "comp"},
|
|
|
|
|
"JJS": {POS: ADJ, "Degree": "sup"},
|
|
|
|
|
"LS": {POS: PUNCT, "NumType": "ord"},
|
|
|
|
|
"MD": {POS: VERB, "VerbType": "mod"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
"NIL": {POS: ""},
|
2016-12-08 11:00:54 +00:00
|
|
|
|
"NN": {POS: NOUN, "Number": "sing"},
|
|
|
|
|
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
|
|
|
|
|
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
|
|
|
|
|
"NNS": {POS: NOUN, "Number": "plur"},
|
|
|
|
|
"PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"},
|
|
|
|
|
"POS": {POS: PART, "Poss": "yes"},
|
|
|
|
|
"PRP": {POS: PRON, "PronType": "prs"},
|
|
|
|
|
"PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"},
|
|
|
|
|
"RB": {POS: ADV, "Degree": "pos"},
|
|
|
|
|
"RBR": {POS: ADV, "Degree": "comp"},
|
|
|
|
|
"RBS": {POS: ADV, "Degree": "sup"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
"RP": {POS: PART},
|
|
|
|
|
"SYM": {POS: SYM},
|
2016-12-08 11:00:54 +00:00
|
|
|
|
"TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
"UH": {POS: INTJ},
|
2016-12-08 11:00:54 +00:00
|
|
|
|
"VB": {POS: VERB, "VerbForm": "inf"},
|
|
|
|
|
"VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"},
|
|
|
|
|
"VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
|
|
|
|
|
"VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"},
|
|
|
|
|
"VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"},
|
|
|
|
|
"VBZ": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 3},
|
|
|
|
|
"WDT": {POS: ADJ, "PronType": "int|rel"},
|
|
|
|
|
"WP": {POS: NOUN, "PronType": "int|rel"},
|
|
|
|
|
"WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"},
|
|
|
|
|
"WRB": {POS: ADV, "PronType": "int|rel"},
|
2016-12-07 19:29:52 +00:00
|
|
|
|
"SP": {POS: SPACE},
|
|
|
|
|
"ADD": {POS: X},
|
|
|
|
|
"NFP": {POS: PUNCT},
|
|
|
|
|
"GW": {POS: X},
|
|
|
|
|
"AFX": {POS: X},
|
|
|
|
|
"HYPH": {POS: PUNCT},
|
|
|
|
|
"XX": {POS: X},
|
|
|
|
|
"BES": {POS: VERB},
|
|
|
|
|
"HVS": {POS: VERB}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
2016-12-07 19:29:52 +00:00
|
|
|
|
STOP_WORDS = set("""
|
2016-11-24 13:57:37 +00:00
|
|
|
|
a about above across after afterwards again against all almost alone along
|
|
|
|
|
already also although always am among amongst amount an and another any anyhow
|
|
|
|
|
anyone anything anyway anywhere are around as at
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
back be became because become becomes becoming been before beforehand behind
|
|
|
|
|
being below beside besides between beyond both bottom but by
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
|
|
|
|
call can cannot ca could
|
|
|
|
|
|
|
|
|
|
did do does doing done down due during
|
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
each eight either eleven else elsewhere empty enough etc even ever every
|
|
|
|
|
everyone everything everywhere except
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
few fifteen fifty first five for former formerly forty four from front full
|
|
|
|
|
further
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
|
|
|
|
get give go
|
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
had has have he hence her here hereafter hereby herein hereupon hers herself
|
|
|
|
|
him himself his how however hundred
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
|
|
|
|
i if in inc indeed into is it its itself
|
|
|
|
|
|
|
|
|
|
keep
|
|
|
|
|
|
|
|
|
|
last latter latterly least less
|
|
|
|
|
|
|
|
|
|
just
|
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
made make many may me meanwhile might mine more moreover most mostly move much
|
|
|
|
|
must my myself
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
name namely neither never nevertheless next nine no nobody none noone nor not
|
|
|
|
|
nothing now nowhere
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
of off often on once one only onto or other others otherwise our ours ourselves
|
|
|
|
|
out over own
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
|
|
|
|
part per perhaps please put
|
|
|
|
|
|
|
|
|
|
quite
|
|
|
|
|
|
|
|
|
|
rather re really regarding
|
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
same say see seem seemed seeming seems serious several she should show side
|
|
|
|
|
since six sixty so some somehow someone something sometime sometimes somewhere
|
|
|
|
|
still such
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
take ten than that the their them themselves then thence there thereafter
|
|
|
|
|
thereby therefore therein thereupon these they third this those though three
|
|
|
|
|
through throughout thru thus to together too top toward towards twelve twenty
|
|
|
|
|
two
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
|
|
|
|
under until up unless upon us used using
|
|
|
|
|
|
2016-11-24 13:57:37 +00:00
|
|
|
|
various very very via was we well were what whatever when whence whenever where
|
|
|
|
|
whereafter whereas whereby wherein whereupon wherever whether which while
|
|
|
|
|
whither who whoever whole whom whose why will with within without would
|
2016-11-24 12:51:32 +00:00
|
|
|
|
|
|
|
|
|
yet you your yours yourself yourselves
|
2016-09-25 12:49:00 +00:00
|
|
|
|
""".split())
|
|
|
|
|
|
|
|
|
|
|
2016-12-07 19:29:52 +00:00
|
|
|
|
TOKENIZER_EXCEPTIONS = {
|
|
|
|
|
"and/or": [
|
|
|
|
|
{ORTH: "and/or", LEMMA: "and/or", TAG: "CC"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Theydve": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shouldn't've": [
|
|
|
|
|
{ORTH: "should"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"There'll": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"howll": [
|
|
|
|
|
{ORTH: "how"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hadn't've": [
|
|
|
|
|
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"who'll": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"aint": [
|
|
|
|
|
{ORTH: "ai", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
" ": [
|
|
|
|
|
{TAG: "SP", ORTH: " "}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shouldnt": [
|
|
|
|
|
{ORTH: "Should"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"when's": [
|
|
|
|
|
{ORTH: "when"},
|
|
|
|
|
{ORTH: "'s", LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Didnt": [
|
|
|
|
|
{ORTH: "Did", LEMMA: "do", TAG: "VBD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"itll": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Who're": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Ain't": [
|
|
|
|
|
{ORTH: "Ai", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Can't": [
|
|
|
|
|
{ORTH: "Ca", LEMMA: "can", TAG: "MD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whyre": [
|
|
|
|
|
{ORTH: "Why"},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Aren't": [
|
|
|
|
|
{ORTH: "Are", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Neednt": [
|
|
|
|
|
{ORTH: "Need"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"should've": [
|
|
|
|
|
{ORTH: "should"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shouldn't": [
|
|
|
|
|
{ORTH: "should"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Idve": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"weve": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Ive": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"they'd": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Youdve": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"theyve": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Weren't": [
|
|
|
|
|
{ORTH: "Were"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"werent": [
|
|
|
|
|
{ORTH: "were"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whyre": [
|
|
|
|
|
{ORTH: "why"},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"I'm": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"She'd've": [
|
|
|
|
|
{ORTH: "She", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"not've": [
|
|
|
|
|
{ORTH: "not", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"we'll": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Don't": [
|
|
|
|
|
{ORTH: "Do", LEMMA: "do"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whyll": [
|
|
|
|
|
{ORTH: "Why"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"they've": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wasn't": [
|
|
|
|
|
{ORTH: "was"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"could've": [
|
|
|
|
|
{ORTH: "could", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"what've": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"havent": [
|
|
|
|
|
{ORTH: "have", TAG: "VB"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Who've": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shan't": [
|
|
|
|
|
{ORTH: "Sha"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"i'll": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"you'd": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whens": [
|
|
|
|
|
{ORTH: "when"},
|
|
|
|
|
{ORTH: "s", LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whys": [
|
|
|
|
|
{ORTH: "why"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whereve": [
|
|
|
|
|
{ORTH: "Where"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"\u00a0": [
|
|
|
|
|
{ORTH: "\u00a0", TAG: "SP", LEMMA: " "}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"there'd": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hadn't've": [
|
|
|
|
|
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whatll": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wouldn't've": [
|
|
|
|
|
{ORTH: "would"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"there's": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Who'll": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"youll": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wouldve": [
|
|
|
|
|
{ORTH: "would"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wouldnt": [
|
|
|
|
|
{ORTH: "Would"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Thered": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Youre": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "re", LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Couldn't've": [
|
|
|
|
|
{ORTH: "Could", TAG: "MD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"who're": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whys": [
|
|
|
|
|
{ORTH: "Why"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mightn't've": [
|
|
|
|
|
{ORTH: "might"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wholl": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hadn't": [
|
|
|
|
|
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Havent": [
|
|
|
|
|
{ORTH: "Have", TAG: "VB"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whatve": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Thats": [
|
|
|
|
|
{ORTH: "That"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Howll": [
|
|
|
|
|
{ORTH: "How"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wouldn't": [
|
|
|
|
|
{ORTH: "would"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"You'll": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Cant": [
|
|
|
|
|
{ORTH: "Ca", LEMMA: "can", TAG: "MD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"i'd": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"weren't": [
|
|
|
|
|
{ORTH: "were"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"would've": [
|
|
|
|
|
{ORTH: "would"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"i'm": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"why'll": [
|
|
|
|
|
{ORTH: "why"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"we'd've": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shouldve": [
|
|
|
|
|
{ORTH: "Should"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"can't": [
|
|
|
|
|
{ORTH: "ca", LEMMA: "can", TAG: "MD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"thats": [
|
|
|
|
|
{ORTH: "that"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hes": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Needn't": [
|
|
|
|
|
{ORTH: "Need"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"It's": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Why're": [
|
|
|
|
|
{ORTH: "Why"},
|
|
|
|
|
{ORTH: "'re", LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hed": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mt.": [
|
|
|
|
|
{ORTH: "Mt.", LEMMA: "Mount"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"couldn't": [
|
|
|
|
|
{ORTH: "could", TAG: "MD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"What've": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"It'd": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"theydve": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"aren't": [
|
|
|
|
|
{ORTH: "are", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mightn't": [
|
|
|
|
|
{ORTH: "Might"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"'S": [
|
|
|
|
|
{ORTH: "'S", LEMMA: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"I've": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whered": [
|
|
|
|
|
{ORTH: "Where"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Itdve": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"I'ma": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ma"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whos": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"They'd": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"What'll": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"You've": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mustve": [
|
|
|
|
|
{ORTH: "Must"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whod": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mightntve": [
|
|
|
|
|
{ORTH: "might"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"I'd've": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Must've": [
|
|
|
|
|
{ORTH: "Must"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"it'd": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"what're": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wasn't": [
|
|
|
|
|
{ORTH: "Was"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"what's": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"he'd've": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"She'd": [
|
|
|
|
|
{ORTH: "She", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shedve": [
|
|
|
|
|
{ORTH: "she", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"ain't": [
|
|
|
|
|
{ORTH: "ai", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"She's": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"i'd've": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"We'd've": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"must've": [
|
|
|
|
|
{ORTH: "must"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"That's": [
|
|
|
|
|
{ORTH: "That"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whatre": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"you'd've": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Dont": [
|
|
|
|
|
{ORTH: "Do", LEMMA: "do"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"thered": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Youd": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"couldn't've": [
|
|
|
|
|
{ORTH: "could", TAG: "MD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whens": [
|
|
|
|
|
{ORTH: "When"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Isnt": [
|
|
|
|
|
{ORTH: "Is", LEMMA: "be", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mightve": [
|
|
|
|
|
{ORTH: "might"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"didnt": [
|
|
|
|
|
{ORTH: "did", LEMMA: "do", TAG: "VBD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"ive": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"It'd've": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"\t": [
|
|
|
|
|
{ORTH: "\t", TAG: "SP"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Itll": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"didn't": [
|
|
|
|
|
{ORTH: "did", LEMMA: "do", TAG: "VBD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"cant": [
|
|
|
|
|
{ORTH: "ca", LEMMA: "can", TAG: "MD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"im": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"they'd've": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hadntve": [
|
|
|
|
|
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Weve": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mightnt": [
|
|
|
|
|
{ORTH: "Might"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"youdve": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shedve": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"theyd": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Cannot": [
|
|
|
|
|
{ORTH: "Can", LEMMA: "can", TAG: "MD"},
|
|
|
|
|
{ORTH: "not", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hadn't": [
|
|
|
|
|
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"What're": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"He'll": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wholl": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"They're": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shouldnt": [
|
|
|
|
|
{ORTH: "should"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"\n": [
|
|
|
|
|
{ORTH: "\n", TAG: "SP"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whered": [
|
|
|
|
|
{ORTH: "where"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"youve": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"notve": [
|
|
|
|
|
{ORTH: "not", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"couldve": [
|
|
|
|
|
{ORTH: "could", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mustve": [
|
|
|
|
|
{ORTH: "must"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Youve": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"therell": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"might've": [
|
|
|
|
|
{ORTH: "might"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mustn't": [
|
|
|
|
|
{ORTH: "Must"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wheres": [
|
|
|
|
|
{ORTH: "where"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"they're": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"idve": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hows": [
|
|
|
|
|
{ORTH: "how"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"youre": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Didn't": [
|
|
|
|
|
{ORTH: "Did", LEMMA: "do", TAG: "VBD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Couldve": [
|
|
|
|
|
{ORTH: "Could", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"cannot": [
|
|
|
|
|
{ORTH: "can", LEMMA: "can", TAG: "MD"},
|
|
|
|
|
{ORTH: "not", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Im": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"howd": [
|
|
|
|
|
{ORTH: "how"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"you've": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"You're": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"she'll": [
|
|
|
|
|
{ORTH: "she", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Theyll": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"don't": [
|
|
|
|
|
{ORTH: "do", LEMMA: "do"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"itd": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hedve": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"isnt": [
|
|
|
|
|
{ORTH: "is", LEMMA: "be", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"won't": [
|
|
|
|
|
{ORTH: "wo"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"We're": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"\u2018S": [
|
|
|
|
|
{ORTH: "\u2018S", LEMMA: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"\u2018s": [
|
|
|
|
|
{ORTH: "\u2018s", LEMMA: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"dont": [
|
|
|
|
|
{ORTH: "do", LEMMA: "do"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"ima": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ma"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Let's": [
|
|
|
|
|
{ORTH: "Let"},
|
|
|
|
|
{ORTH: "'s", LEMMA: "us"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"he's": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"we've": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"What's": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Who's": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hedve": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"he'd": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"When's": [
|
|
|
|
|
{ORTH: "When"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mightn't've": [
|
|
|
|
|
{ORTH: "Might"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"We've": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Couldntve": [
|
|
|
|
|
{ORTH: "Could", TAG: "MD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Who'd": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"haven't": [
|
|
|
|
|
{ORTH: "have", TAG: "VB"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"arent": [
|
|
|
|
|
{ORTH: "are", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"You'd've": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wouldn't": [
|
|
|
|
|
{ORTH: "Would"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"who's": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mightve": [
|
|
|
|
|
{ORTH: "Might"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Theredve": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"theredve": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"who'd": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Where's": [
|
|
|
|
|
{ORTH: "Where"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wont": [
|
|
|
|
|
{ORTH: "wo"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"she'd've": [
|
|
|
|
|
{ORTH: "she", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Should've": [
|
|
|
|
|
{ORTH: "Should"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"theyre": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wouldntve": [
|
|
|
|
|
{ORTH: "Would"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Where've": [
|
|
|
|
|
{ORTH: "Where"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mustn't": [
|
|
|
|
|
{ORTH: "must"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"isn't": [
|
|
|
|
|
{ORTH: "is", LEMMA: "be", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Aint": [
|
|
|
|
|
{ORTH: "Ai", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"why's": [
|
|
|
|
|
{ORTH: "why"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"There'd": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"They'll": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"how'll": [
|
|
|
|
|
{ORTH: "how"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wedve": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"couldntve": [
|
|
|
|
|
{ORTH: "could", TAG: "MD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"There's": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"we'd": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whod": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whatve": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wouldve": [
|
|
|
|
|
{ORTH: "Would"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"there'll": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"needn't": [
|
|
|
|
|
{ORTH: "need"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shouldntve": [
|
|
|
|
|
{ORTH: "should"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"why're": [
|
|
|
|
|
{ORTH: "why"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Doesnt": [
|
|
|
|
|
{ORTH: "Does", LEMMA: "do", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whereve": [
|
|
|
|
|
{ORTH: "where"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"they'll": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"I'd": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Might've": [
|
|
|
|
|
{ORTH: "Might"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mightnt": [
|
|
|
|
|
{ORTH: "might"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Not've": [
|
|
|
|
|
{ORTH: "Not", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mightn't": [
|
|
|
|
|
{ORTH: "might"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"you're": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"They've": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"what'll": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Could've": [
|
|
|
|
|
{ORTH: "Could", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Would've": [
|
|
|
|
|
{ORTH: "Would"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Isn't": [
|
|
|
|
|
{ORTH: "Is", LEMMA: "be", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"let's": [
|
|
|
|
|
{ORTH: "let"},
|
|
|
|
|
{ORTH: "'s", LEMMA: "us"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"She'll": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"You'd": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wouldnt": [
|
|
|
|
|
{ORTH: "would"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Why'll": [
|
|
|
|
|
{ORTH: "Why"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Where'd": [
|
|
|
|
|
{ORTH: "Where"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Theyre": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Won't": [
|
|
|
|
|
{ORTH: "Wo"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Couldn't": [
|
|
|
|
|
{ORTH: "Could", TAG: "MD"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"it's": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"it'll": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"They'd've": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Ima": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ma"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whats": [
|
|
|
|
|
{ORTH: "what"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"How's": [
|
|
|
|
|
{ORTH: "How"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shouldntve": [
|
|
|
|
|
{ORTH: "Should"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"youd": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whatll": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wouldn't've": [
|
|
|
|
|
{ORTH: "Would"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"How'd": [
|
|
|
|
|
{ORTH: "How"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"doesnt": [
|
|
|
|
|
{ORTH: "does", LEMMA: "do", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shouldn't": [
|
|
|
|
|
{ORTH: "Should"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"He'd've": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mightntve": [
|
|
|
|
|
{ORTH: "Might"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"couldnt": [
|
|
|
|
|
{ORTH: "could", TAG: "MD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Haven't": [
|
|
|
|
|
{ORTH: "Have", TAG: "VB"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"doesn't": [
|
|
|
|
|
{ORTH: "does", LEMMA: "do", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hasn't": [
|
|
|
|
|
{ORTH: "Has"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"how's": [
|
|
|
|
|
{ORTH: "how"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hes": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"he'll": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hed": [
|
|
|
|
|
{ORTH: "he", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"how'd": [
|
|
|
|
|
{ORTH: "how"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"we're": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "'re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hadnt": [
|
|
|
|
|
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shant": [
|
|
|
|
|
{ORTH: "Sha"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Theyve": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hows": [
|
|
|
|
|
{ORTH: "How"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"We'll": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"i've": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whove": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"i'ma": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ma"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Howd": [
|
|
|
|
|
{ORTH: "How"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hadnt": [
|
|
|
|
|
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shant": [
|
|
|
|
|
{ORTH: "sha"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"There'd've": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"I'll": [
|
|
|
|
|
{ORTH: "I", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Why's": [
|
|
|
|
|
{ORTH: "Why"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shouldn't've": [
|
|
|
|
|
{ORTH: "Should"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wasnt": [
|
|
|
|
|
{ORTH: "Was"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whove": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hasn't": [
|
|
|
|
|
{ORTH: "has"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wouldntve": [
|
|
|
|
|
{ORTH: "would"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wheres": [
|
|
|
|
|
{ORTH: "Where"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"How'll": [
|
|
|
|
|
{ORTH: "How"},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"there'd've": [
|
|
|
|
|
{ORTH: "there"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whos": [
|
|
|
|
|
{ORTH: "Who"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shes": [
|
|
|
|
|
{ORTH: "she", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Doesn't": [
|
|
|
|
|
{ORTH: "Does", LEMMA: "do", TAG: "VBZ"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Arent": [
|
|
|
|
|
{ORTH: "Are", TAG: "VBP", "number": 2, LEMMA: "be"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Hasnt": [
|
|
|
|
|
{ORTH: "Has"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"He's": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wasnt": [
|
|
|
|
|
{ORTH: "was"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"whyll": [
|
|
|
|
|
{ORTH: "why"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"mustnt": [
|
|
|
|
|
{ORTH: "must"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"He'd": [
|
|
|
|
|
{ORTH: "He", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Shes": [
|
|
|
|
|
{ORTH: "i", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"where've": [
|
|
|
|
|
{ORTH: "where"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Youll": [
|
|
|
|
|
{ORTH: "You", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hasnt": [
|
|
|
|
|
{ORTH: "has"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"theyll": [
|
|
|
|
|
{ORTH: "they", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"it'd've": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"itdve": [
|
|
|
|
|
{ORTH: "it", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"wedve": [
|
|
|
|
|
{ORTH: "we"},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Werent": [
|
|
|
|
|
{ORTH: "Were"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Therell": [
|
|
|
|
|
{ORTH: "There"},
|
|
|
|
|
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shan't": [
|
|
|
|
|
{ORTH: "sha"},
|
|
|
|
|
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Wont": [
|
|
|
|
|
{ORTH: "Wo"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"hadntve": [
|
|
|
|
|
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"who've": [
|
|
|
|
|
{ORTH: "who"},
|
|
|
|
|
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whatre": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "re"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"'s": [
|
|
|
|
|
{ORTH: "'s", LEMMA: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"where'd": [
|
|
|
|
|
{ORTH: "where"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"shouldve": [
|
|
|
|
|
{ORTH: "should"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"where's": [
|
|
|
|
|
{ORTH: "where"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"neednt": [
|
|
|
|
|
{ORTH: "need"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"It'll": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"We'd": [
|
|
|
|
|
{ORTH: "We"},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Whats": [
|
|
|
|
|
{ORTH: "What"},
|
|
|
|
|
{ORTH: "s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"\u2014": [
|
|
|
|
|
{ORTH: "\u2014", TAG: ":", LEMMA: "--"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Itd": [
|
|
|
|
|
{ORTH: "It", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"she'd": [
|
|
|
|
|
{ORTH: "she", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Mustnt": [
|
|
|
|
|
{ORTH: "Must"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Notve": [
|
|
|
|
|
{ORTH: "Not", LEMMA: "not", TAG: "RB"},
|
|
|
|
|
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"you'll": [
|
|
|
|
|
{ORTH: "you", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Theyd": [
|
|
|
|
|
{ORTH: "They", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "d", LEMMA: "would", TAG: "MD"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"she's": [
|
|
|
|
|
{ORTH: "she", LEMMA: PRON_LEMMA},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"Couldnt": [
|
|
|
|
|
{ORTH: "Could", TAG: "MD"},
|
|
|
|
|
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
|
|
|
|
|
],
|
|
|
|
|
|
|
|
|
|
"that's": [
|
|
|
|
|
{ORTH: "that"},
|
|
|
|
|
{ORTH: "'s"}
|
|
|
|
|
]
|
2016-09-25 12:49:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-12-07 19:29:52 +00:00
|
|
|
|
|
|
|
|
|
self_map = [
|
|
|
|
|
"''",
|
|
|
|
|
"'em",
|
|
|
|
|
"'ol'",
|
|
|
|
|
"\")",
|
|
|
|
|
"a.",
|
|
|
|
|
"a.m.",
|
|
|
|
|
"Adm.",
|
|
|
|
|
"Ala.",
|
|
|
|
|
"Apr.",
|
|
|
|
|
"Ariz.",
|
|
|
|
|
"Ark.",
|
|
|
|
|
"Aug.",
|
|
|
|
|
"b.",
|
|
|
|
|
"Bros.",
|
|
|
|
|
"c.",
|
|
|
|
|
"Calif.",
|
|
|
|
|
"co.",
|
|
|
|
|
"Co.",
|
|
|
|
|
"Colo.",
|
|
|
|
|
"Conn.",
|
|
|
|
|
"Corp.",
|
|
|
|
|
"d.",
|
|
|
|
|
"D.C.",
|
|
|
|
|
"Dec.",
|
|
|
|
|
"Del.",
|
|
|
|
|
"Dr.",
|
|
|
|
|
"e.",
|
|
|
|
|
"e.g.",
|
|
|
|
|
"E.g.",
|
|
|
|
|
"E.G.",
|
|
|
|
|
"f.",
|
|
|
|
|
"Feb.",
|
|
|
|
|
"Fla.",
|
|
|
|
|
"g.",
|
|
|
|
|
"Ga.",
|
|
|
|
|
"Gen.",
|
|
|
|
|
"Gov.",
|
|
|
|
|
"h.",
|
|
|
|
|
"i.",
|
|
|
|
|
"i.e.",
|
|
|
|
|
"I.e.",
|
|
|
|
|
"I.E.",
|
|
|
|
|
"Ill.",
|
|
|
|
|
"Inc.",
|
|
|
|
|
"Ind.",
|
|
|
|
|
"j.",
|
|
|
|
|
"Jan.",
|
|
|
|
|
"Jr.",
|
|
|
|
|
"Jul.",
|
|
|
|
|
"Jun.",
|
|
|
|
|
"k.",
|
|
|
|
|
"Kan.",
|
|
|
|
|
"Kans.",
|
|
|
|
|
"Ky.",
|
|
|
|
|
"l.",
|
|
|
|
|
"La.",
|
|
|
|
|
"Ltd.",
|
|
|
|
|
"m.",
|
|
|
|
|
"Mar.",
|
|
|
|
|
"Mass.",
|
|
|
|
|
"May."
|
|
|
|
|
"Md.",
|
|
|
|
|
"Messrs.",
|
|
|
|
|
"Mich.",
|
|
|
|
|
"Minn.",
|
|
|
|
|
"Miss.",
|
|
|
|
|
"Mo.",
|
|
|
|
|
"Mont.",
|
|
|
|
|
"Mr.",
|
|
|
|
|
"Mrs.",
|
|
|
|
|
"Ms.",
|
|
|
|
|
"n.",
|
|
|
|
|
"N.C.",
|
|
|
|
|
"N.D.",
|
|
|
|
|
"N.H.",
|
|
|
|
|
"N.J.",
|
|
|
|
|
"N.M.",
|
|
|
|
|
"N.Y.",
|
|
|
|
|
"Neb.",
|
|
|
|
|
"Nebr.",
|
|
|
|
|
"Nev.",
|
|
|
|
|
"Nov.",
|
|
|
|
|
"o.",
|
|
|
|
|
"Oct.",
|
|
|
|
|
"Okla.",
|
|
|
|
|
"Ore.",
|
|
|
|
|
"p.",
|
|
|
|
|
"p.m.",
|
|
|
|
|
"Pa.",
|
|
|
|
|
"Ph.D.",
|
|
|
|
|
"q.",
|
|
|
|
|
"r.",
|
|
|
|
|
"Rep.",
|
|
|
|
|
"Rev.",
|
|
|
|
|
"s.",
|
|
|
|
|
"Sen.",
|
|
|
|
|
"Sep.",
|
|
|
|
|
"Sept.",
|
|
|
|
|
"St.",
|
|
|
|
|
"t.",
|
|
|
|
|
"Tenn.",
|
|
|
|
|
"u.",
|
|
|
|
|
"v.",
|
|
|
|
|
"Va.",
|
|
|
|
|
"vs.",
|
|
|
|
|
"w.",
|
|
|
|
|
"Wash.",
|
|
|
|
|
"Wis.",
|
|
|
|
|
"x.",
|
|
|
|
|
"y.",
|
2016-12-07 20:11:29 +00:00
|
|
|
|
"z."
|
2016-12-07 19:29:52 +00:00
|
|
|
|
]
|
|
|
|
|
|
2016-12-07 20:11:59 +00:00
|
|
|
|
for orths in [self_map, EMOTICONS]:
|
|
|
|
|
overlap = set(TOKENIZER_EXCEPTIONS.keys()).intersection(set(orths))
|
|
|
|
|
assert not overlap, overlap
|
|
|
|
|
TOKENIZER_EXCEPTIONS.update({orth: [{ORTH: orth}] for orth in orths})
|
2016-12-07 19:29:52 +00:00
|
|
|
|
|
|
|
|
|
|
2016-11-24 12:51:32 +00:00
|
|
|
|
TOKENIZER_PREFIXES = r'''
|
|
|
|
|
,
|
|
|
|
|
"
|
|
|
|
|
(
|
|
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
*
|
|
|
|
|
<
|
|
|
|
|
$
|
|
|
|
|
£
|
|
|
|
|
“
|
|
|
|
|
'
|
|
|
|
|
``
|
|
|
|
|
`
|
|
|
|
|
#
|
|
|
|
|
US$
|
|
|
|
|
C$
|
|
|
|
|
A$
|
|
|
|
|
€
|
|
|
|
|
a-
|
|
|
|
|
‘
|
|
|
|
|
....
|
|
|
|
|
...
|
|
|
|
|
…
|
|
|
|
|
'''.strip().split('\n')
|
2016-09-25 12:49:00 +00:00
|
|
|
|
|
|
|
|
|
|
2016-11-24 12:51:32 +00:00
|
|
|
|
TOKENIZER_SUFFIXES = r'''
|
|
|
|
|
,
|
|
|
|
|
\"
|
|
|
|
|
\)
|
|
|
|
|
\]
|
|
|
|
|
\}
|
|
|
|
|
\*
|
|
|
|
|
\!
|
|
|
|
|
\?
|
|
|
|
|
%
|
|
|
|
|
\$
|
|
|
|
|
>
|
|
|
|
|
:
|
|
|
|
|
;
|
|
|
|
|
'
|
|
|
|
|
”
|
|
|
|
|
''
|
|
|
|
|
's
|
|
|
|
|
'S
|
|
|
|
|
’s
|
|
|
|
|
’S
|
|
|
|
|
’
|
|
|
|
|
…
|
|
|
|
|
\.\.
|
|
|
|
|
\.\.\.
|
|
|
|
|
\.\.\.\.
|
|
|
|
|
(?<=[a-z0-9)\]”"'%\)])\.
|
|
|
|
|
(?<=[0-9])km
|
|
|
|
|
'''.strip().split('\n')
|
2016-09-25 12:49:00 +00:00
|
|
|
|
|
|
|
|
|
|
2016-11-24 12:51:32 +00:00
|
|
|
|
TOKENIZER_INFIXES = r'''
|
|
|
|
|
…
|
|
|
|
|
\.\.\.+
|
|
|
|
|
(?<=[a-z])\.(?=[A-Z])
|
|
|
|
|
(?<=[a-zA-Z])-(?=[a-zA-z])
|
|
|
|
|
(?<=[a-zA-Z])--(?=[a-zA-z])
|
|
|
|
|
(?<=[0-9])-(?=[0-9])
|
|
|
|
|
(?<=[A-Za-z]),(?=[A-Za-z])
|
|
|
|
|
'''.strip().split('\n')
|