From 5e27bd0c4c990bf6e4d6e5ba5c007fbe234e1d78 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 25 Feb 2015 17:10:32 -0500 Subject: [PATCH] * Add en language data, for tokenizer etc --- lang_data/en/infix.txt | 1 + lang_data/en/morphs.json | 42 +++ lang_data/en/prefix.txt | 21 ++ lang_data/en/specials.json | 561 +++++++++++++++++++++++++++++++++++++ lang_data/en/suffix.txt | 24 ++ 5 files changed, 649 insertions(+) create mode 100644 lang_data/en/infix.txt create mode 100644 lang_data/en/morphs.json create mode 100644 lang_data/en/prefix.txt create mode 100644 lang_data/en/specials.json create mode 100644 lang_data/en/suffix.txt diff --git a/lang_data/en/infix.txt b/lang_data/en/infix.txt new file mode 100644 index 000000000..28169ecbb --- /dev/null +++ b/lang_data/en/infix.txt @@ -0,0 +1 @@ +(?<=[a-z])\.(?=[A-Z]) diff --git a/lang_data/en/morphs.json b/lang_data/en/morphs.json new file mode 100644 index 000000000..fe361654a --- /dev/null +++ b/lang_data/en/morphs.json @@ -0,0 +1,42 @@ +{ + "PRP": { + "I": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 1}, + "me": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3}, + "mine": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2}, + "myself": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 4}, + "you": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 0}, + "yours": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2}, + "yourself": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4}, + "he": {"L": "-PRON-", "person": 3, "number": 1, "gender": 1, "case": 1}, + "him": {"L": "-PRON-", "person": 3, "number": 1, "gender": 1, "case": 3}, + "his": {"L": "-PRON-", "person": 3, "number": 1, "gender": 1, "case": 2}, + "himself": {"L": "-PRON-", "person": 3, "number": 1, "gender": 1, "case": 4}, + "she": {"L": "-PRON-", "person": 3, "number": 1, "gender": 2, "case": 1}, + "her": {"L": "-PRON-", "person": 3, "number": 1, "gender": 2, "case": 3}, + "hers": {"L": "-PRON-", "person": 3, "number": 1, "gender": 2, "case": 2}, + "herself": {"L": "-PRON-", "person": 3, "number": 1, "gender": 2, "case": 4}, + "it": {"L": "-PRON-", "person": 3, "number": 1, "gender": 3, "case": 0}, + "its": {"L": "-PRON-", "person": 3, "number": 1, "gender": 3, "case": 2}, + "itself": {"L": "-PRON-", "person": 3, "number": 1, "gender": 3, "case": 4}, + "themself": {"L": "-PRON-", "person": 3, "number": 1, "gender": 0, "case": 4}, + "we": {"L": "-PRON-", "person": 1, "number": 2, "gender": 0, "case": 1}, + "us": {"L": "-PRON-", "person": 1, "number": 2, "gender": 0, "case": 3}, + "ours": {"L": "-PRON-", "person": 1, "number": 2, "gender": 0, "case": 3}, + "ourselves": {"L": "-PRON-", "person": 1, "number": 2, "gender": 0, "case": 4}, + "yourselves": {"L": "-PRON-", "person": 2, "number": 2, "gender": 0, "case": 4}, + "they": {"L": "-PRON-", "person": 3, "number": 2, "gender": 0, "case": 1}, + "them": {"L": "-PRON-", "person": 3, "number": 2, "gender": 0, "case": 3}, + "their": {"L": "-PRON-", "person": 3, "number": 2, "gender": 0, "case": 2}, + "themselves": {"L": "-PRON-", "person": 3, "number": 2, "gender": 0, "case": 4} + }, + + "PRP$": { + "my": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2}, + "your": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2}, + "his": {"L": "-PRON-", "person": 3, "number": 1, "gender": 1, "case": 2}, + "her": {"L": "-PRON-", "person": 3, "number": 1, "gender": 2, "case": 2}, + "its": {"L": "-PRON-", "person": 3, "number": 1, "gender": 3, "case": 2}, + "our": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2}, + "their": {"L": "-PRON-", "person": 3, "number": 2, "gender": 0, "case": 2} + } +} diff --git a/lang_data/en/prefix.txt b/lang_data/en/prefix.txt new file mode 100644 index 000000000..48c4fc549 --- /dev/null +++ b/lang_data/en/prefix.txt @@ -0,0 +1,21 @@ +, +" +( +[ +{ +* +< +$ +£ +“ +' +`` +` +# +US$ +C$ +A$ +a- +‘ +.... +... diff --git a/lang_data/en/specials.json b/lang_data/en/specials.json new file mode 100644 index 000000000..3638e3ad8 --- /dev/null +++ b/lang_data/en/specials.json @@ -0,0 +1,561 @@ +{ +"'s": [{"F": "'s", "L": "'s"}], + +"'S": [{"F": "'S", "L": "'s"}], + +"ain't": [{"F": "ai", "L": "be", "pos": "VBP", "number": 2}, + {"F": "n't", "L": "not", "pos": "RB"}], +"aint": [{"F": "ai", "L": "be", "pos": "VBP", "number": 2}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Ain't": [{"F": "Ai", "L": "be", "pos": "VBP", "number": 2}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"aren't": [{"F": "are", "L": "be", "pos": "VBP", "number": 2}, + {"F": "n't", "L": "not"}], +"arent": [{"F": "are", "L": "be", "pos": "VBP", "number": 2}, + {"F": "n't", "L": "not"}], +"Aren't": [{"F": "Are", "L": "be", "pos": "VBP", "number": 2}, + {"F": "n't", "L": "not"}], + +"can't": [{"F": "ca", "L": "can", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"cant": [{"F": "ca", "L": "can", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Can't": [{"F": "Ca", "L": "can", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"cannot": [{"F": "can", "pos": "MD"}, + {"F": "not", "L": "not", "pos": "RB"}], +"Cannot": [{"F": "Can", "pos": "MD"}, + {"F": "not", "L": "not", "pos": "RB"}], + +"could've": [{"F": "could", "pos": "MD"}, + {"F": "'ve", "L": "have", "pos": "VB"}], +"couldve": [{"F": "could", "pos": "MD"}, + {"F": "'ve", "L": "have", "pos": "VB"}], +"Could've": [{"F": "Could", "pos": "MD"}, + {"F": "'ve", "L": "have", "pos": "VB"}], + +"couldn't": [{"F": "could", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"couldnt": [{"F": "could", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Couldn't": [{"F": "Could", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"couldn't've": [{"F": "could", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve", "pos": "VB"}], +"couldntve": [{"F": "could", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve", "pos": "VB"}], +"Couldn't've": [{"F": "Could", "pos": "MD"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve", "pos": "VB"}], + +"didn't": [{"F": "did", "pos": "VBD", "L": "do"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"didnt": [{"F": "did", "pos": "VBD", "L": "do"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Didn't": [{"F": "Did", "pos": "VBD", "L": "do"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"doesn't": [{"F": "does", "L": "do", "pos": "VBZ"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"doesnt": [{"F": "does", "L": "do", "pos": "VBZ"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Doesn't": [{"F": "Does", "L": "do", "pos": "VBZ"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"don't": [{"F": "do", "L": "do"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"dont": [{"F": "do", "L": "do"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Don't": [{"F": "Do", "L": "do"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"hadn't": [{"F": "had", "L": "have", "pos": "VBD"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"hadnt": [{"F": "had", "L": "have", "pos": "VBD"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"Hadn't": [{"F": "Had", "L": "have", "pos": "VBD"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"hadn't've": [{"F": "had", "L": "have", "pos": "VBD"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve", "L": "have", "pos": "VB"}], + +"hasn't": [{"F": "has"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"hasnt": [{"F": "has"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"haven't": [{"F": "have", "pos": "VB"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"havent": [{"F": "have", "pos": "VB"}, + {"F": "n't", "L": "not", "pos": "RB"}], + + +"he'd": [{"F": "he"}, + {"F": "'d", "L": "would", "pos": "MD"}], +"hed": [{"F": "he"}, + {"F": "'d", "L": "would", "pos": "MD"}], + + +"he'd've": [{"F": "he"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], +"hedve": [{"F": "he"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], + + +"he'll": [{"F": "he"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + +"he's": [{"F": "he"}, + {"F": "'s"}], + +"hes": [{"F": "he"}, + {"F": "'s"}], + + +"how'd": [{"F": "he"}, + {"F": "'d", "L": "would", "pos": "MD"}], +"howd": [{"F": "he"}, + {"F": "'d", "L": "would", "pos": "MD"}], + + +"how'll": [{"F": "how"}, + {"F": "'ll", "L": "will", "pos": "MD"}], +"howll": [{"F": "how"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + + +"how's": [{"F": "how"}, + {"F": "'s"}], +"hows": [{"F": "how"}, + {"F": "'s"}], + + +"I'd": [{"F": "I"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"I'd've": [{"F": "I"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], + +"I'll": [{"F": "I"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + +"I'm": [{"F": "I"}, + {"F": "'m", "L": "be", "pos": "VBP", "number": 1, "tenspect": 1}], +"Im": [{"F": "I"}, + {"F": "'m", "L": "be", "pos": "VBP", "number": 1, "tenspect": 1}], +"im": [{"F": "m"}, + {"F": "'m", "L": "be", "pos": "VBP", "number": 1, "tenspect": 1}], + +"I'ma": [{"F": "I"}, + {"F": "'ma"}], + +"I've": [{"F": "I"}, + {"F": "'ve", "pos": "VB", "L": "have", "pos": "MD"}], + +"isn't": [{"F": "is", "L": "be", "pos": "VBZ"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"isnt": [{"F": "is", "L": "be", "pos": "VBZ"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"Isn't": [{"F": "Is", "L": "be", "pos": "VBZ"}, + {"F": "n't", "L": "not", "pos": "RB"}], + + +"it'd": [{"F": "it"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"it'd've": [{"F": "it"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve"}], + +"it'll": [{"F": "it"}, + {"F": "'ll", "L": "will", "pos": "MD"}], +"itll": [{"F": "it"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + + +"it's": [{"F": "it"}, + {"F": "'s"}], + +"let's": [{"F": "let"}, + {"F": "'s"}], +"lets": [{"F": "let"}, + {"F": "'s"}], + + +"mightn't": [{"F": "might"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"mightn't've": [{"F": "might"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve", "pos": "VB"}], + +"might've": [{"F": "might"}, + {"F": "'ve", "pos": "VB"}], + +"mustn't": [{"F": "must"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"must've": [{"F": "must"}, + {"F": "'ve", "pos": "VB"}], + +"needn't": [{"F": "need"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"not've": [{"F": "not"}, + {"F": "'ve", "pos": "VB"}], + +"shan't": [{"F": "sha"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"she'd": [{"F": "she"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"she'd've": [{"F": "she"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], + +"she'll": [{"F": "she"}, + {"F": "will"}], + +"she's": [{"F": "she"}, + {"F": "'s"}], + +"should've": [{"F": "should"}, + {"F": "'ve", "pos": "VB"}], + +"shouldn't": [{"F": "should"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"shouldn't've": [{"F": "should"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve"}], + +"that's": [{"F": "that"}, + {"F": "'s"}], + +"thats": [{"F": "that"}, + {"F": "'s"}], + + +"there'd": [{"F": "there"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"there'd've": [{"F": "there"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], + +"there's": [{"F": "there"}, + {"F": "'s"}], + +"they'd": [{"F": "they"}, + {"F": "'d", "L": "would", "pos": "MD", "pos": "VB"}], +"They'd": [{"F": "They"}, + {"F": "'d", "L": "would", "pos": "MD", "pos": "VB"}], + + +"they'd've": [{"F": "they"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], +"They'd've": [{"F": "They"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], + +"they'll": [{"F": "they"}, + {"F": "'ll", "L": "will", "pos": "MD"}], +"They'll": [{"F": "They"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + + +"they're": [{"F": "they"}, + {"F": "'re"}], +"They're": [{"F": "They"}, + {"F": "'re"}], + +"they've": [{"F": "they"}, + {"F": "'ve", "pos": "VB"}], +"They've": [{"F": "They"}, + {"F": "'ve", "pos": "VB"}], + +"wasn't": [{"F": "was"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"we'd": [{"F": "we"}, + {"F": "'d", "L": "would", "pos": "MD"}], +"We'd": [{"F": "We"}, + {"F": "'d", "L": "would", "pos": "MD"}], + + +"we'd've": [{"F": "we"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "pos": "VB"}], + +"we'll": [{"F": "we"}, + {"F": "'ll", "L": "will", "pos": "MD"}], +"We'll": [{"F": "We", "L": "we"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + + +"we're": [{"F": "we"}, + {"F": "'re"}], +"We're": [{"F": "We"}, + {"F": "'re"}], + +"we've": [{"F": "we"}, + {"F": "'ve", "pos": "VB"}], +"We've": [{"F": "We"}, + {"F": "'ve", "pos": "VB"}], + + +"weren't": [{"F": "were"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"what'll": [{"F": "what"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + +"what're": [{"F": "what"}, + {"F": "'re"}], + +"what's": [{"F": "what"}, + {"F": "'s"}], + +"what've": [{"F": "what"}, + {"F": "'ve", "pos": "VB"}], + +"when's": [{"F": "when"}, + {"F": "'s"}], + +"where'd": [{"F": "where"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"where's": [{"F": "where"}, + {"F": "'s"}], + +"where've": [{"F": "where"}, + {"F": "'ve", "pos": "VB"}], + +"who'd": [{"F": "who"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"who'll": [{"F": "who"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + +"who're": [{"F": "who"}, + {"F": "'re"}], + +"who's": [{"F": "who"}, + {"F": "'s"}], + +"who've": [{"F": "who"}, + {"F": "'ve", "pos": "VB"}], + +"why'll": [{"F": "why"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + +"why're": [{"F": "why"}, + {"F": "'re"}], + +"why's": [{"F": "why"}, + {"F": "'s"}], + +"won't": [{"F": "wo"}, + {"F": "n't", "L": "not", "pos": "RB"}], +"wont": [{"F": "wo"}, + {"F": "n't", "L": "not", "pos": "RB"}], + + +"would've": [{"F": "would"}, + {"F": "'ve", "pos": "VB"}], + +"wouldn't": [{"F": "would"}, + {"F": "n't", "L": "not", "pos": "RB"}], + +"wouldn't've": [{"F": "would"}, + {"F": "n't", "L": "not", "pos": "RB"}, + {"F": "'ve", "L": "have", "pos": "VB"}], + +"you'd": [{"F": "you"}, + {"F": "'d", "L": "would", "pos": "MD"}], + +"you'd've": [{"F": "you"}, + {"F": "'d", "L": "would", "pos": "MD"}, + {"F": "'ve", "L": "have", "pos": "VB"}], + +"you'll": [{"F": "you"}, + {"F": "'ll", "L": "will", "pos": "MD"}], + +"you're": [{"F": "you"}, + {"F": "'re"}], +"You're": [{"F": "You"}, + {"F": "'re"}], + + +"you've": [{"F": "you"}, + {"F": "'ve", "L": "have", "pos": "VB"}], + +"'em": [{"F": "'em"}], + +"'ol": [{"F": "'ol"}], + +"vs.": [{"F": "vs."}], + +"Ms.": [{"F": "Ms."}], +"Mr.": [{"F": "Mr."}], +"Dr.": [{"F": "Dr."}], +"Mrs.": [{"F": "Mrs."}], +"Messrs.": [{"F": "Messrs."}], +"Gov.": [{"F": "Gov."}], +"Gen.": [{"F": "Gen."}], + +"''": [{"F": "''"}], + +"Corp.": [{"F": "Corp."}], +"Inc.": [{"F": "Inc."}], +"Co.": [{"F": "Co."}], +"co.": [{"F": "co."}], +"Ltd.": [{"F": "Ltd."}], +"Bros.": [{"F": "Bros."}], + +"Rep.": [{"F": "Rep."}], +"Sen.": [{"F": "Sen."}], +"Jr.": [{"F": "Jr."}], +"Rev.": [{"F": "Rev."}], +"Adm.": [{"F": "Adm."}], +"St.": [{"F": "St."}], + +"a.m.": [{"F": "a.m."}], +"p.m.": [{"F": "p.m."}], + +"Jan.": [{"F": "Jan."}], +"Feb.": [{"F": "Feb."}], +"Mar.": [{"F": "Mar."}], +"Apr.": [{"F": "Apr."}], +"May.": [{"F": "May."}], +"Jun.": [{"F": "Jun."}], +"Jul.": [{"F": "Jul."}], +"Aug.": [{"F": "Aug."}], +"Sep.": [{"F": "Sep."}], +"Sept.": [{"F": "Sept."}], +"Oct.": [{"F": "Oct."}], +"Nov.": [{"F": "Nov."}], +"Dec.": [{"F": "Dec."}], + +"Ala.": [{"F": "Ala."}], +"Ariz.": [{"F": "Ariz."}], +"Ark.": [{"F": "Ark."}], +"Calif.": [{"F": "Calif."}], +"Colo.": [{"F": "Colo."}], +"Conn.": [{"F": "Conn."}], +"Del.": [{"F": "Del."}], +"D.C.": [{"F": "D.C."}], +"Fla.": [{"F": "Fla."}], +"Ga.": [{"F": "Ga."}], +"Ill.": [{"F": "Ill."}], +"Ind.": [{"F": "Ind."}], +"Kans.": [{"F": "Kans."}], +"Kan.": [{"F": "Kan."}], +"Ky.": [{"F": "Ky."}], +"La.": [{"F": "La."}], +"Md.": [{"F": "Md."}], +"Mass.": [{"F": "Mass."}], +"Mich.": [{"F": "Mich."}], +"Minn.": [{"F": "Minn."}], +"Miss.": [{"F": "Miss."}], +"Mo.": [{"F": "Mo."}], +"Mont.": [{"F": "Mont."}], +"Nebr.": [{"F": "Nebr."}], +"Neb.": [{"F": "Neb."}], +"Nev.": [{"F": "Nev."}], +"N.H.": [{"F": "N.H."}], +"N.J.": [{"F": "N.J."}], +"N.M.": [{"F": "N.M."}], +"N.Y.": [{"F": "N.Y."}], +"N.C.": [{"F": "N.C."}], +"N.D.": [{"F": "N.D."}], +"Okla.": [{"F": "Okla."}], +"Ore.": [{"F": "Ore."}], +"Pa.": [{"F": "Pa."}], +"Tenn.": [{"F": "Tenn."}], +"Va.": [{"F": "Va."}], +"Wash.": [{"F": "Wash."}], +"Wis.": [{"F": "Wis."}], + +":)": [{"F": ":)"}], +"<3": [{"F": "<3"}], +";)": [{"F": ";)"}], +"(:": [{"F": "(:"}], +":(": [{"F": ":("}], +"-_-": [{"F": "-_-"}], +"=)": [{"F": "=)"}], +":/": [{"F": ":/"}], +":>": [{"F": ":>"}], +";-)": [{"F": ";-)"}], +":Y": [{"F": ":Y"}], +":P": [{"F": ":P"}], +":-P": [{"F": ":-P"}], +":3": [{"F": ":3"}], +"=3": [{"F": "=3"}], +"xD": [{"F": "xD"}], +"^_^": [{"F": "^_^"}], +"=]": [{"F": "=]"}], +"=D": [{"F": "=D"}], +"<333": [{"F": "<333"}], +":))": [{"F": ":))"}], +":0": [{"F": ":0"}], +"-__-": [{"F": "-__-"}], +"xDD": [{"F": "xDD"}], +"o_o": [{"F": "o_o"}], +"o_O": [{"F": "o_O"}], +"V_V": [{"F": "V_V"}], +"=[[": [{"F": "=[["}], +"<33": [{"F": "<33"}], +";p": [{"F": ";p"}], +";D": [{"F": ";D"}], +";-p": [{"F": ";-p"}], +";(": [{"F": ";("}], +":p": [{"F": ":p"}], +":]": [{"F": ":]"}], +":O": [{"F": ":O"}], +":-/": [{"F": ":-/"}], +":-)": [{"F": ":-)"}], +":(((": [{"F": ":((("}], +":((": [{"F": ":(("}], +":')": [{"F": ":')"}], +"(^_^)": [{"F": "(^_^)"}], +"(=": [{"F": "(="}], +"o.O": [{"F": "o.O"}], +"\")": [{"F": "\")"}], +"a.": [{"F": "a."}], +"b.": [{"F": "b."}], +"c.": [{"F": "c."}], +"d.": [{"F": "d."}], +"e.": [{"F": "e."}], +"f.": [{"F": "f."}], +"g.": [{"F": "g."}], +"h.": [{"F": "h."}], +"i.": [{"F": "i."}], +"j.": [{"F": "j."}], +"k.": [{"F": "k."}], +"l.": [{"F": "l."}], +"m.": [{"F": "m."}], +"n.": [{"F": "n."}], +"o.": [{"F": "o."}], +"p.": [{"F": "p."}], +"q.": [{"F": "q."}], +"s.": [{"F": "s."}], +"t.": [{"F": "t."}], +"u.": [{"F": "u."}], +"v.": [{"F": "v."}], +"w.": [{"F": "w."}], +"x.": [{"F": "x."}], +"y.": [{"F": "y."}], +"z.": [{"F": "z."}] +} diff --git a/lang_data/en/suffix.txt b/lang_data/en/suffix.txt new file mode 100644 index 000000000..5ac21dbc9 --- /dev/null +++ b/lang_data/en/suffix.txt @@ -0,0 +1,24 @@ +, +\" +\) +\] +\} +\* +\! +\? +% +\$ +> +: +; +' +” +'' +'s +'S +’ +\.\. +\.\.\. +\.\.\.\. +(?<=[a-z0-9)\]"'%\)])\. +(?<=[0-9])km