Add lowercase lemma to tokenizer exceptions

2017-01-03 23:01:31 +01:00 · 2017-01-03 23:01:31 +01:00 · 1d237664af
parent dd7cd44ba5
commit 1d237664af
1 changed files with 13 additions and 13 deletions
--- a/spacy/en/tokenizer_exceptions.py
+++ b/spacy/en/tokenizer_exceptions.py
@ -112,44 +112,44 @@ for pron in ["you", "we", "they"]:
 for word in ["who", "what", "when", "where", "why", "how", "there", "that"]:
    for orth in [word, word.title()]:
        EXC[orth + "'s"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "'s"}
        ]

        EXC[orth + "s"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "s"}
        ]

        EXC[orth + "'ll"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "'ll", LEMMA: "will", TAG: "MD"}
        ]

        EXC[orth + "ll"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "ll", LEMMA: "will", TAG: "MD"}
        ]

        EXC[orth + "'ll've"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "ll", LEMMA: "will", TAG: "MD"},
            {ORTH: "ve", LEMMA: "have", TAG: "VB"}
        ]

        EXC[orth + "llve"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "ll", LEMMA: "will", TAG: "MD"},
            {ORTH: "ve", LEMMA: "have", TAG: "VB"}
        ]

        EXC[orth + "'re"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "'re", LEMMA: "be", NORM: "are"}
        ]

        EXC[orth + "re"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "re", LEMMA: "be", NORM: "are"}
        ]

@ -159,28 +159,28 @@ for word in ["who", "what", "when", "where", "why", "how", "there", "that"]:
        ]

        EXC[orth + "ve"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}
        ]

        EXC[orth + "'d"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "'d"}
        ]

        EXC[orth + "d"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "d"}
        ]

        EXC[orth + "'d've"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "'d", LEMMA: "would", TAG: "MD"},
            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}
        ]

        EXC[orth + "dve"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word},
            {ORTH: "d", LEMMA: "would", TAG: "MD"},
            {ORTH: "ve", LEMMA: "have", TAG: "VB"}
        ]