Fix inconsistencies in generate_specials.py

Re Issue #321, fix inconsistencies in the script that generates specials.json. The result still isn't so satisfying --- we need to revise this as we move to parse more morphologically rich languages.
2016-04-07 11:21:52 +10:00 · 2016-04-07 11:21:52 +10:00 · 85485f5c2b
parent 357e2aaece
commit 85485f5c2b
1 changed files with 30 additions and 31 deletions
--- a/lang_data/en/generate_specials.py
+++ b/lang_data/en/generate_specials.py
@ -11,49 +11,48 @@ token_properties = {
            "are": {"L": "be", "pos": "VBP", "number": 2},
            "ca": {"L": "can", "pos": "MD"},
            "can": {"L": "can", "pos": "MD"},
-            "could": {"pos": "MD"},                         # no lemma for could?
+            "could": {"pos": "MD", "L": "could"},
            "'d": {"L": "would", "pos": "MD"},
            "did": {"L": "do", "pos": "VBD"},
-            "do": {"L": "do"},                              # no POS for do?
+            "do": {"L": "do"},
            "does": {"L": "do", "pos": "VBZ"},
            "had": {"L": "have", "pos": "VBD"},
-            "has": {},                                      # no POS or lemma for has?
-            "have": {"pos": "VB"},                          # no lemma for have?
-            "he": {"L": "-PRON-"},                          # no POS for he?
-            "how": {},                                      # no POS or lemma for how?
-            "i": {"L": "-PRON-"},                           # no POS for i?
+            "has": {"L": "have", "pos": "VBZ"},
+            "have": {"pos": "VB"},
+            "he": {"L": "-PRON-", "pos": "PRP"},
+            "how": {},
+            "i": {"L": "-PRON-", "pos": "PRP"},
            "is": {"L": "be", "pos": "VBZ"},
-            "it": {"L": "-PRON-"},                          # no POS for it?
+            "it": {"L": "-PRON-", "pos": "PRP"},
            "'ll": {"L": "will", "pos": "MD"},
            "'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1},
-            "'ma": {},                                      # no POS or lemma for ma?
-            "might": {},                                    # no POS or lemma for might?
-            "must": {},                                     # no POS or lemma for must?
-            "need": {},                                     # no POS or lemma for need?
+            "'ma": {},
+            "might": {},
+            "must": {},
+            "need": {}, 
            "not": {"L": "not", "pos": "RB"},
            "'nt": {"L": "not", "pos": "RB"},
            "n't": {"L": "not", "pos": "RB"},
-            "'re": {},                                      # no POS or lemma for re?
+            "'re": {"L": "be", "pos": "VBZ"},
            "'s": {},                                       # no POS or lemma for s?
-            "sha": {},                                      # no POS or lemma for sha?
-            "she": {"L": "-PRON-"},                         # no POS for she?
-            "should": {},                                   # no POS or lemma for should?
-            "that": {},                                     # no POS or lemma for that?
-            "there": {},                                    # no POS or lemma for there?
-            "they": {"L": "-PRON-"},                        # no POS for they?
-            "was": {},                                      # no POS or lemma for was?
-            "we": {},                                       # no POS or lemma for we?
-            "were": {},                                     # no POS or lemma for were?
-            "what": {},                                     # no POS or lemma for what?
-            "when": {},                                     # no POS or lemma for when?
-            "where": {},                                    # no POS or lemma for where?
-            "who": {},                                      # no POS or lemma for who?
-            "why": {},                                      # no POS or lemma for why?
-            "wo": {},                                       # no POS or lemma for wo?
-            "would": {},                                    # no POS or lemma for would?
-            "you": {"L": "-PRON-"},                         # no POS or lemma for you?
+            "sha": {"L": "shall", "pos": "MD"},
+            "she": {"L": "-PRON-", "pos": "PRP"},
+            "should": {},
+            "that": {},
+            "there": {},
+            "they": {"L": "-PRON-", "pos": "PRP"},
+            "was": {},
+            "we": {"L": "-PRON-", "pos": "PRP"},
+            "were": {},
+            "what": {},
+            "when": {},
+            "where": {},
+            "who": {},
+            "why": {},
+            "wo": {},
+            "would": {},
+            "you": {"L": "-PRON-", "pos": "PRP"},
            "'ve": {"L": "have", "pos": "VB"}
-
 }

 # contains starting tokens with their potential contractions