spaCy/spacy/en/language_data.py

2095 lines
42 KiB
Python
Raw Normal View History

# encoding: utf8
from __future__ import unicode_literals
2016-11-24 12:51:32 +00:00
import re
2016-12-07 19:29:52 +00:00
from ..symbols import *
2016-12-07 20:11:59 +00:00
from ..language_data import EMOTICONS
2016-12-07 19:29:52 +00:00
PRON_LEMMA = "-PRON-"
TAG_MAP = {
".": {POS: PUNCT, "PunctType": "peri"},
",": {POS: PUNCT, "PunctType": "comm"},
"-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"},
"-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"},
"``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"},
"\"\"": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
2016-12-07 19:29:52 +00:00
":": {POS: PUNCT},
"$": {POS: SYM, "Other": {"SymType": "currency"}},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
"AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CONJ, "ConjType": "coor"},
"CD": {POS: NUM, "NumType": "card"},
2016-12-07 19:29:52 +00:00
"DT": {POS: DET},
"EX": {POS: ADV, "AdvType": "ex"},
"FW": {POS: X, "Foreign": "yes"},
"HYPH": {POS: PUNCT, "PunctType": "dash"},
2016-12-07 19:29:52 +00:00
"IN": {POS: ADP},
"JJ": {POS: ADJ, "Degree": "pos"},
"JJR": {POS: ADJ, "Degree": "comp"},
"JJS": {POS: ADJ, "Degree": "sup"},
"LS": {POS: PUNCT, "NumType": "ord"},
"MD": {POS: VERB, "VerbType": "mod"},
2016-12-07 19:29:52 +00:00
"NIL": {POS: ""},
"NN": {POS: NOUN, "Number": "sing"},
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
"NNS": {POS: NOUN, "Number": "plur"},
"PDT": {POS: ADJ, "AdjType": "pdt", "PronType": "prn"},
"POS": {POS: PART, "Poss": "yes"},
"PRP": {POS: PRON, "PronType": "prs"},
"PRP$": {POS: ADJ, "PronType": "prs", "Poss": "yes"},
"RB": {POS: ADV, "Degree": "pos"},
"RBR": {POS: ADV, "Degree": "comp"},
"RBS": {POS: ADV, "Degree": "sup"},
2016-12-07 19:29:52 +00:00
"RP": {POS: PART},
"SYM": {POS: SYM},
"TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
2016-12-07 19:29:52 +00:00
"UH": {POS: INTJ},
"VB": {POS: VERB, "VerbForm": "inf"},
"VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"},
"VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
"VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"},
"VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"},
"VBZ": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": 3},
"WDT": {POS: ADJ, "PronType": "int|rel"},
"WP": {POS: NOUN, "PronType": "int|rel"},
"WP$": {POS: ADJ, "Poss": "yes", "PronType": "int|rel"},
"WRB": {POS: ADV, "PronType": "int|rel"},
2016-12-07 19:29:52 +00:00
"SP": {POS: SPACE},
"ADD": {POS: X},
"NFP": {POS: PUNCT},
"GW": {POS: X},
"AFX": {POS: X},
"HYPH": {POS: PUNCT},
"XX": {POS: X},
"BES": {POS: VERB},
"HVS": {POS: VERB}
}
2016-11-24 12:51:32 +00:00
2016-12-07 19:29:52 +00:00
STOP_WORDS = set("""
2016-11-24 13:57:37 +00:00
a about above across after afterwards again against all almost alone along
already also although always am among amongst amount an and another any anyhow
anyone anything anyway anywhere are around as at
2016-11-24 12:51:32 +00:00
2016-11-24 13:57:37 +00:00
back be became because become becomes becoming been before beforehand behind
being below beside besides between beyond both bottom but by
2016-11-24 12:51:32 +00:00
call can cannot ca could
did do does doing done down due during
2016-11-24 13:57:37 +00:00
each eight either eleven else elsewhere empty enough etc even ever every
everyone everything everywhere except
2016-11-24 12:51:32 +00:00
2016-11-24 13:57:37 +00:00
few fifteen fifty first five for former formerly forty four from front full
further
2016-11-24 12:51:32 +00:00
get give go
2016-11-24 13:57:37 +00:00
had has have he hence her here hereafter hereby herein hereupon hers herself
him himself his how however hundred
2016-11-24 12:51:32 +00:00
i if in inc indeed into is it its itself
keep
last latter latterly least less
just
2016-11-24 13:57:37 +00:00
made make many may me meanwhile might mine more moreover most mostly move much
must my myself
2016-11-24 12:51:32 +00:00
2016-11-24 13:57:37 +00:00
name namely neither never nevertheless next nine no nobody none noone nor not
nothing now nowhere
2016-11-24 12:51:32 +00:00
2016-11-24 13:57:37 +00:00
of off often on once one only onto or other others otherwise our ours ourselves
out over own
2016-11-24 12:51:32 +00:00
part per perhaps please put
quite
rather re really regarding
2016-11-24 13:57:37 +00:00
same say see seem seemed seeming seems serious several she should show side
since six sixty so some somehow someone something sometime sometimes somewhere
still such
2016-11-24 12:51:32 +00:00
2016-11-24 13:57:37 +00:00
take ten than that the their them themselves then thence there thereafter
thereby therefore therein thereupon these they third this those though three
through throughout thru thus to together too top toward towards twelve twenty
two
2016-11-24 12:51:32 +00:00
under until up unless upon us used using
2016-11-24 13:57:37 +00:00
various very very via was we well were what whatever when whence whenever where
whereafter whereas whereby wherein whereupon wherever whether which while
whither who whoever whole whom whose why will with within without would
2016-11-24 12:51:32 +00:00
yet you your yours yourself yourselves
""".split())
2016-12-07 19:29:52 +00:00
TOKENIZER_EXCEPTIONS = {
"and/or": [
{ORTH: "and/or", LEMMA: "and/or", TAG: "CC"}
],
"Theydve": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"shouldn't've": [
{ORTH: "should"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"There'll": [
{ORTH: "There"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"howll": [
{ORTH: "how"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"Hadn't've": [
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"who'll": [
{ORTH: "who"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"aint": [
{ORTH: "ai", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
" ": [
{TAG: "SP", ORTH: " "}
],
"Shouldnt": [
{ORTH: "Should"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"when's": [
{ORTH: "when"},
{ORTH: "'s", LEMMA: "be"}
],
"Didnt": [
{ORTH: "Did", LEMMA: "do", TAG: "VBD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"itll": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"Who're": [
{ORTH: "Who"},
{ORTH: "'re"}
],
"Ain't": [
{ORTH: "Ai", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Can't": [
{ORTH: "Ca", LEMMA: "can", TAG: "MD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Whyre": [
{ORTH: "Why"},
{ORTH: "re"}
],
"Aren't": [
{ORTH: "Are", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Neednt": [
{ORTH: "Need"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"should've": [
{ORTH: "should"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"shouldn't": [
{ORTH: "should"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Idve": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"weve": [
{ORTH: "we"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Ive": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"they'd": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Youdve": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"theyve": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Weren't": [
{ORTH: "Were"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"werent": [
{ORTH: "were"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"whyre": [
{ORTH: "why"},
{ORTH: "re"}
],
"I'm": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "'m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
],
"She'd've": [
{ORTH: "She", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"not've": [
{ORTH: "not", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"we'll": [
{ORTH: "we"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Don't": [
{ORTH: "Do", LEMMA: "do"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Whyll": [
{ORTH: "Why"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"they've": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"wasn't": [
{ORTH: "was"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"could've": [
{ORTH: "could", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"what've": [
{ORTH: "what"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"havent": [
{ORTH: "have", TAG: "VB"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Who've": [
{ORTH: "Who"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Shan't": [
{ORTH: "Sha"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"i'll": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"you'd": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"whens": [
{ORTH: "when"},
{ORTH: "s", LEMMA: "be"}
],
"whys": [
{ORTH: "why"},
{ORTH: "s"}
],
"Whereve": [
{ORTH: "Where"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"\u00a0": [
{ORTH: "\u00a0", TAG: "SP", LEMMA: " "}
],
"there'd": [
{ORTH: "there"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"hadn't've": [
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"whatll": [
{ORTH: "what"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"wouldn't've": [
{ORTH: "would"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"there's": [
{ORTH: "there"},
{ORTH: "'s"}
],
"Who'll": [
{ORTH: "Who"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"youll": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"wouldve": [
{ORTH: "would"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Wouldnt": [
{ORTH: "Would"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Thered": [
{ORTH: "There"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Youre": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "re", LEMMA: "be"}
],
"Couldn't've": [
{ORTH: "Could", TAG: "MD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"who're": [
{ORTH: "who"},
{ORTH: "'re"}
],
"Whys": [
{ORTH: "Why"},
{ORTH: "s"}
],
"mightn't've": [
{ORTH: "might"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Wholl": [
{ORTH: "Who"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"hadn't": [
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Havent": [
{ORTH: "Have", TAG: "VB"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Whatve": [
{ORTH: "What"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Thats": [
{ORTH: "That"},
{ORTH: "s"}
],
"Howll": [
{ORTH: "How"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"wouldn't": [
{ORTH: "would"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"You'll": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Cant": [
{ORTH: "Ca", LEMMA: "can", TAG: "MD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"i'd": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"weren't": [
{ORTH: "were"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"would've": [
{ORTH: "would"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"i'm": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
],
"why'll": [
{ORTH: "why"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"we'd've": [
{ORTH: "we"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Shouldve": [
{ORTH: "Should"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"can't": [
{ORTH: "ca", LEMMA: "can", TAG: "MD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"thats": [
{ORTH: "that"},
{ORTH: "s"}
],
"Hes": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "s"}
],
"Needn't": [
{ORTH: "Need"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"It's": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "'s"}
],
"Why're": [
{ORTH: "Why"},
{ORTH: "'re", LEMMA: "be"}
],
"Hed": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Mt.": [
{ORTH: "Mt.", LEMMA: "Mount"}
],
"couldn't": [
{ORTH: "could", TAG: "MD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"What've": [
{ORTH: "What"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"It'd": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"theydve": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"aren't": [
{ORTH: "are", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Mightn't": [
{ORTH: "Might"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"'S": [
{ORTH: "'S", LEMMA: "'s"}
],
"I've": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Whered": [
{ORTH: "Where"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Itdve": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"I'ma": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "'ma"}
],
"whos": [
{ORTH: "who"},
{ORTH: "s"}
],
"They'd": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"What'll": [
{ORTH: "What"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"You've": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Mustve": [
{ORTH: "Must"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"whod": [
{ORTH: "who"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"mightntve": [
{ORTH: "might"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"I'd've": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Must've": [
{ORTH: "Must"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"it'd": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"what're": [
{ORTH: "what"},
{ORTH: "'re"}
],
"Wasn't": [
{ORTH: "Was"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"what's": [
{ORTH: "what"},
{ORTH: "'s"}
],
"he'd've": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"She'd": [
{ORTH: "She", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"shedve": [
{ORTH: "she", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"ain't": [
{ORTH: "ai", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"She's": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'s"}
],
"i'd've": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"We'd've": [
{ORTH: "We"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"must've": [
{ORTH: "must"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"That's": [
{ORTH: "That"},
{ORTH: "'s"}
],
"whatre": [
{ORTH: "what"},
{ORTH: "re"}
],
"you'd've": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Dont": [
{ORTH: "Do", LEMMA: "do"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"thered": [
{ORTH: "there"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Youd": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"couldn't've": [
{ORTH: "could", TAG: "MD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Whens": [
{ORTH: "When"},
{ORTH: "s"}
],
"Isnt": [
{ORTH: "Is", LEMMA: "be", TAG: "VBZ"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"mightve": [
{ORTH: "might"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"didnt": [
{ORTH: "did", LEMMA: "do", TAG: "VBD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"ive": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"It'd've": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"\t": [
{ORTH: "\t", TAG: "SP"}
],
"Itll": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"didn't": [
{ORTH: "did", LEMMA: "do", TAG: "VBD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"cant": [
{ORTH: "ca", LEMMA: "can", TAG: "MD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"im": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
],
"they'd've": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Hadntve": [
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Weve": [
{ORTH: "We"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Mightnt": [
{ORTH: "Might"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"youdve": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Shedve": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"theyd": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Cannot": [
{ORTH: "Can", LEMMA: "can", TAG: "MD"},
{ORTH: "not", LEMMA: "not", TAG: "RB"}
],
"Hadn't": [
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"What're": [
{ORTH: "What"},
{ORTH: "'re"}
],
"He'll": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"wholl": [
{ORTH: "who"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"They're": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "'re"}
],
"shouldnt": [
{ORTH: "should"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"\n": [
{ORTH: "\n", TAG: "SP"}
],
"whered": [
{ORTH: "where"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"youve": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"notve": [
{ORTH: "not", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"couldve": [
{ORTH: "could", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"mustve": [
{ORTH: "must"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Youve": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"therell": [
{ORTH: "there"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"might've": [
{ORTH: "might"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Mustn't": [
{ORTH: "Must"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"wheres": [
{ORTH: "where"},
{ORTH: "s"}
],
"they're": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "'re"}
],
"idve": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"hows": [
{ORTH: "how"},
{ORTH: "s"}
],
"youre": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "re"}
],
"Didn't": [
{ORTH: "Did", LEMMA: "do", TAG: "VBD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Couldve": [
{ORTH: "Could", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"cannot": [
{ORTH: "can", LEMMA: "can", TAG: "MD"},
{ORTH: "not", LEMMA: "not", TAG: "RB"}
],
"Im": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "m", TAG: "VBP", "tenspect": 1, "number": 1, LEMMA: "be"}
],
"howd": [
{ORTH: "how"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"you've": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"You're": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "'re"}
],
"she'll": [
{ORTH: "she", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Theyll": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"don't": [
{ORTH: "do", LEMMA: "do"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"itd": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Hedve": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"isnt": [
{ORTH: "is", LEMMA: "be", TAG: "VBZ"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"won't": [
{ORTH: "wo"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"We're": [
{ORTH: "We"},
{ORTH: "'re"}
],
"\u2018S": [
{ORTH: "\u2018S", LEMMA: "'s"}
],
"\u2018s": [
{ORTH: "\u2018s", LEMMA: "'s"}
],
"dont": [
{ORTH: "do", LEMMA: "do"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"ima": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "ma"}
],
"Let's": [
{ORTH: "Let"},
{ORTH: "'s", LEMMA: "us"}
],
"he's": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "'s"}
],
"we've": [
{ORTH: "we"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"What's": [
{ORTH: "What"},
{ORTH: "'s"}
],
"Who's": [
{ORTH: "Who"},
{ORTH: "'s"}
],
"hedve": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"he'd": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"When's": [
{ORTH: "When"},
{ORTH: "'s"}
],
"Mightn't've": [
{ORTH: "Might"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"We've": [
{ORTH: "We"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Couldntve": [
{ORTH: "Could", TAG: "MD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Who'd": [
{ORTH: "Who"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"haven't": [
{ORTH: "have", TAG: "VB"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"arent": [
{ORTH: "are", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"You'd've": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Wouldn't": [
{ORTH: "Would"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"who's": [
{ORTH: "who"},
{ORTH: "'s"}
],
"Mightve": [
{ORTH: "Might"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Theredve": [
{ORTH: "There"},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"theredve": [
{ORTH: "there"},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"who'd": [
{ORTH: "who"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Where's": [
{ORTH: "Where"},
{ORTH: "'s"}
],
"wont": [
{ORTH: "wo"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"she'd've": [
{ORTH: "she", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Should've": [
{ORTH: "Should"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"theyre": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "re"}
],
"Wouldntve": [
{ORTH: "Would"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Where've": [
{ORTH: "Where"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"mustn't": [
{ORTH: "must"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"isn't": [
{ORTH: "is", LEMMA: "be", TAG: "VBZ"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Aint": [
{ORTH: "Ai", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"why's": [
{ORTH: "why"},
{ORTH: "'s"}
],
"There'd": [
{ORTH: "There"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"They'll": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"how'll": [
{ORTH: "how"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Wedve": [
{ORTH: "We"},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"couldntve": [
{ORTH: "could", TAG: "MD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"There's": [
{ORTH: "There"},
{ORTH: "'s"}
],
"we'd": [
{ORTH: "we"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Whod": [
{ORTH: "Who"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"whatve": [
{ORTH: "what"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Wouldve": [
{ORTH: "Would"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"there'll": [
{ORTH: "there"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"needn't": [
{ORTH: "need"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"shouldntve": [
{ORTH: "should"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"why're": [
{ORTH: "why"},
{ORTH: "'re"}
],
"Doesnt": [
{ORTH: "Does", LEMMA: "do", TAG: "VBZ"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"whereve": [
{ORTH: "where"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"they'll": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"I'd": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Might've": [
{ORTH: "Might"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"mightnt": [
{ORTH: "might"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Not've": [
{ORTH: "Not", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"mightn't": [
{ORTH: "might"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"you're": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "'re"}
],
"They've": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"what'll": [
{ORTH: "what"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Could've": [
{ORTH: "Could", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Would've": [
{ORTH: "Would"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Isn't": [
{ORTH: "Is", LEMMA: "be", TAG: "VBZ"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"let's": [
{ORTH: "let"},
{ORTH: "'s", LEMMA: "us"}
],
"She'll": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"You'd": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"wouldnt": [
{ORTH: "would"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Why'll": [
{ORTH: "Why"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Where'd": [
{ORTH: "Where"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Theyre": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "re"}
],
"Won't": [
{ORTH: "Wo"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Couldn't": [
{ORTH: "Could", TAG: "MD"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"it's": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "'s"}
],
"it'll": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"They'd've": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Ima": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "ma"}
],
"whats": [
{ORTH: "what"},
{ORTH: "s"}
],
"How's": [
{ORTH: "How"},
{ORTH: "'s"}
],
"Shouldntve": [
{ORTH: "Should"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"youd": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"Whatll": [
{ORTH: "What"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"Wouldn't've": [
{ORTH: "Would"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"How'd": [
{ORTH: "How"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"doesnt": [
{ORTH: "does", LEMMA: "do", TAG: "VBZ"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Shouldn't": [
{ORTH: "Should"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"He'd've": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Mightntve": [
{ORTH: "Might"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"couldnt": [
{ORTH: "could", TAG: "MD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Haven't": [
{ORTH: "Have", TAG: "VB"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"doesn't": [
{ORTH: "does", LEMMA: "do", TAG: "VBZ"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Hasn't": [
{ORTH: "Has"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"how's": [
{ORTH: "how"},
{ORTH: "'s"}
],
"hes": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "s"}
],
"he'll": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"hed": [
{ORTH: "he", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"how'd": [
{ORTH: "how"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"we're": [
{ORTH: "we"},
{ORTH: "'re"}
],
"Hadnt": [
{ORTH: "Had", LEMMA: "have", TAG: "VBD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Shant": [
{ORTH: "Sha"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Theyve": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Hows": [
{ORTH: "How"},
{ORTH: "s"}
],
"We'll": [
{ORTH: "We"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"i've": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Whove": [
{ORTH: "Who"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"i'ma": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "'ma"}
],
"Howd": [
{ORTH: "How"},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"hadnt": [
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"shant": [
{ORTH: "sha"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"There'd've": [
{ORTH: "There"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"I'll": [
{ORTH: "I", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Why's": [
{ORTH: "Why"},
{ORTH: "'s"}
],
"Shouldn't've": [
{ORTH: "Should"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Wasnt": [
{ORTH: "Was"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"whove": [
{ORTH: "who"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"hasn't": [
{ORTH: "has"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"wouldntve": [
{ORTH: "would"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Wheres": [
{ORTH: "Where"},
{ORTH: "s"}
],
"How'll": [
{ORTH: "How"},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"there'd've": [
{ORTH: "there"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Whos": [
{ORTH: "Who"},
{ORTH: "s"}
],
"shes": [
{ORTH: "she", LEMMA: PRON_LEMMA},
{ORTH: "s"}
],
"Doesn't": [
{ORTH: "Does", LEMMA: "do", TAG: "VBZ"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Arent": [
{ORTH: "Are", TAG: "VBP", "number": 2, LEMMA: "be"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Hasnt": [
{ORTH: "Has"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"He's": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "'s"}
],
"wasnt": [
{ORTH: "was"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"whyll": [
{ORTH: "why"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"mustnt": [
{ORTH: "must"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"He'd": [
{ORTH: "He", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Shes": [
{ORTH: "i", LEMMA: PRON_LEMMA},
{ORTH: "s"}
],
"where've": [
{ORTH: "where"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Youll": [
{ORTH: "You", LEMMA: PRON_LEMMA},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"hasnt": [
{ORTH: "has"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"theyll": [
{ORTH: "they", LEMMA: PRON_LEMMA},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"it'd've": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"itdve": [
{ORTH: "it", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"wedve": [
{ORTH: "we"},
{ORTH: "d", LEMMA: "would", TAG: "MD"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"Werent": [
{ORTH: "Were"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Therell": [
{ORTH: "There"},
{ORTH: "ll", LEMMA: "will", TAG: "MD"}
],
"shan't": [
{ORTH: "sha"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}
],
"Wont": [
{ORTH: "Wo"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"hadntve": [
{ORTH: "had", LEMMA: "have", TAG: "VBD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"who've": [
{ORTH: "who"},
{ORTH: "'ve", LEMMA: "have", TAG: "VB"}
],
"Whatre": [
{ORTH: "What"},
{ORTH: "re"}
],
"'s": [
{ORTH: "'s", LEMMA: "'s"}
],
"where'd": [
{ORTH: "where"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"shouldve": [
{ORTH: "should"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"where's": [
{ORTH: "where"},
{ORTH: "'s"}
],
"neednt": [
{ORTH: "need"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"It'll": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"We'd": [
{ORTH: "We"},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Whats": [
{ORTH: "What"},
{ORTH: "s"}
],
"\u2014": [
{ORTH: "\u2014", TAG: ":", LEMMA: "--"}
],
"Itd": [
{ORTH: "It", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"she'd": [
{ORTH: "she", LEMMA: PRON_LEMMA},
{ORTH: "'d", LEMMA: "would", TAG: "MD"}
],
"Mustnt": [
{ORTH: "Must"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"Notve": [
{ORTH: "Not", LEMMA: "not", TAG: "RB"},
{ORTH: "ve", LEMMA: "have", TAG: "VB"}
],
"you'll": [
{ORTH: "you", LEMMA: PRON_LEMMA},
{ORTH: "'ll", LEMMA: "will", TAG: "MD"}
],
"Theyd": [
{ORTH: "They", LEMMA: PRON_LEMMA},
{ORTH: "d", LEMMA: "would", TAG: "MD"}
],
"she's": [
{ORTH: "she", LEMMA: PRON_LEMMA},
{ORTH: "'s"}
],
"Couldnt": [
{ORTH: "Could", TAG: "MD"},
{ORTH: "nt", LEMMA: "not", TAG: "RB"}
],
"that's": [
{ORTH: "that"},
{ORTH: "'s"}
]
}
2016-12-07 19:29:52 +00:00
self_map = [
"''",
"'em",
"'ol'",
"\")",
"a.",
"a.m.",
"Adm.",
"Ala.",
"Apr.",
"Ariz.",
"Ark.",
"Aug.",
"b.",
"Bros.",
"c.",
"Calif.",
"co.",
"Co.",
"Colo.",
"Conn.",
"Corp.",
"d.",
"D.C.",
"Dec.",
"Del.",
"Dr.",
"e.",
"e.g.",
"E.g.",
"E.G.",
"f.",
"Feb.",
"Fla.",
"g.",
"Ga.",
"Gen.",
"Gov.",
"h.",
"i.",
"i.e.",
"I.e.",
"I.E.",
"Ill.",
"Inc.",
"Ind.",
"j.",
"Jan.",
"Jr.",
"Jul.",
"Jun.",
"k.",
"Kan.",
"Kans.",
"Ky.",
"l.",
"La.",
"Ltd.",
"m.",
"Mar.",
"Mass.",
"May."
"Md.",
"Messrs.",
"Mich.",
"Minn.",
"Miss.",
"Mo.",
"Mont.",
"Mr.",
"Mrs.",
"Ms.",
"n.",
"N.C.",
"N.D.",
"N.H.",
"N.J.",
"N.M.",
"N.Y.",
"Neb.",
"Nebr.",
"Nev.",
"Nov.",
"o.",
"Oct.",
"Okla.",
"Ore.",
"p.",
"p.m.",
"Pa.",
"Ph.D.",
"q.",
"r.",
"Rep.",
"Rev.",
"s.",
"Sen.",
"Sep.",
"Sept.",
"St.",
"t.",
"Tenn.",
"u.",
"v.",
"Va.",
"vs.",
"w.",
"Wash.",
"Wis.",
"x.",
"y.",
2016-12-07 20:11:29 +00:00
"z."
2016-12-07 19:29:52 +00:00
]
2016-12-07 20:11:59 +00:00
for orths in [self_map, EMOTICONS]:
overlap = set(TOKENIZER_EXCEPTIONS.keys()).intersection(set(orths))
assert not overlap, overlap
TOKENIZER_EXCEPTIONS.update({orth: [{ORTH: orth}] for orth in orths})
2016-12-07 19:29:52 +00:00
2016-11-24 12:51:32 +00:00
TOKENIZER_PREFIXES = r'''
,
"
(
[
{
*
<
$
£
'
``
`
#
US$
C$
A$
a-
....
...
'''.strip().split('\n')
2016-11-24 12:51:32 +00:00
TOKENIZER_SUFFIXES = r'''
,
\"
\)
\]
\}
\*
\!
\?
%
\$
>
:
;
'
''
's
'S
s
S
\.\.
\.\.\.
\.\.\.\.
(?<=[a-z0-9)\]"'%\)])\.
(?<=[0-9])km
'''.strip().split('\n')
2016-11-24 12:51:32 +00:00
TOKENIZER_INFIXES = r'''
\.\.\.+
(?<=[a-z])\.(?=[A-Z])
(?<=[a-zA-Z])-(?=[a-zA-z])
(?<=[a-zA-Z])--(?=[a-zA-z])
(?<=[0-9])-(?=[0-9])
(?<=[A-Za-z]),(?=[A-Za-z])
'''.strip().split('\n')