From 41a322c733808c2ccd6217cd69cb3309df40d2cd Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 8 May 2017 19:57:36 +0200 Subject: [PATCH] Fix LEMMA in exceptions and morph rules --- spacy/de/tokenizer_exceptions.py | 22 ++++++++++------------ spacy/deprecated.py | 1 - spacy/es/tokenizer_exceptions.py | 6 +++--- spacy/nb/morph_rules.py | 12 ++++++------ spacy/sv/morph_rules.py | 12 ++++++------ 5 files changed, 25 insertions(+), 28 deletions(-) diff --git a/spacy/de/tokenizer_exceptions.py b/spacy/de/tokenizer_exceptions.py index a4281c7b0..4bb59c490 100644 --- a/spacy/de/tokenizer_exceptions.py +++ b/spacy/de/tokenizer_exceptions.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals from ..symbols import ORTH, LEMMA, TAG, NORM -from ..deprecated import PRON_LEMMA, DET_LEMMA +from ..deprecated import PRON_LEMMA _exc = { "auf'm": [ {ORTH: "auf", LEMMA: "auf"}, - {ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem" }], + {ORTH: "'m", LEMMA: "der", NORM: "dem" }], "du's": [ {ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"}, @@ -20,7 +20,7 @@ _exc = { "hinter'm": [ {ORTH: "hinter", LEMMA: "hinter"}, - {ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}], + {ORTH: "'m", LEMMA: "der", NORM: "dem"}], "ich's": [ {ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"}, @@ -36,11 +36,11 @@ _exc = { "unter'm": [ {ORTH: "unter", LEMMA: "unter"}, - {ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}], + {ORTH: "'m", LEMMA: "der", NORM: "dem"}], "vor'm": [ {ORTH: "vor", LEMMA: "vor"}, - {ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}], + {ORTH: "'m", LEMMA: "der", NORM: "dem"}], "wir's": [ {ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"}, @@ -48,7 +48,7 @@ _exc = { "über'm": [ {ORTH: "über", LEMMA: "über"}, - {ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}] + {ORTH: "'m", LEMMA: "der", NORM: "dem"}] } @@ -57,12 +57,10 @@ for exc_data in [ {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'n", LEMMA: DET_LEMMA, NORM: "ein"}, - {ORTH: "'ne", LEMMA: DET_LEMMA, NORM: "eine"}, - {ORTH: "'nen", LEMMA: DET_LEMMA, NORM: "einen"}, - {ORTH: "'nem", LEMMA: DET_LEMMA, NORM: "einem"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"}, - + {ORTH: "'n", LEMMA: "ein", NORM: "ein"}, + {ORTH: "'ne", LEMMA: "eine", NORM: "eine"}, + {ORTH: "'nen", LEMMA: "ein", NORM: "einen"}, + {ORTH: "'nem", LEMMA: "ein", NORM: "einem"}, {ORTH: "Abb.", LEMMA: "Abbildung"}, {ORTH: "Abk.", LEMMA: "Abkürzung"}, {ORTH: "Abt.", LEMMA: "Abteilung"}, diff --git a/spacy/deprecated.py b/spacy/deprecated.py index d4409dbf0..b806acad3 100644 --- a/spacy/deprecated.py +++ b/spacy/deprecated.py @@ -7,7 +7,6 @@ from .cli import download PRON_LEMMA = "-PRON-" -DET_LEMMA = "-DET-" def depr_model_download(lang): diff --git a/spacy/es/tokenizer_exceptions.py b/spacy/es/tokenizer_exceptions.py index b66d2cda0..524977c73 100644 --- a/spacy/es/tokenizer_exceptions.py +++ b/spacy/es/tokenizer_exceptions.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET -from ..deprecated import PRON_LEMMA, DET_LEMMA +from ..deprecated import PRON_LEMMA _exc = { @@ -32,11 +32,11 @@ _exc = { "pal": [ {ORTH: "pa", LEMMA: "para"}, - {ORTH: "l", LEMMA: DET_LEMMA, NORM: "el"}], + {ORTH: "l", LEMMA: "el"}], "pala": [ {ORTH: "pa", LEMMA: "para"}, - {ORTH: "la", LEMMA: DET_LEMMA}] + {ORTH: "la"}] } diff --git a/spacy/nb/morph_rules.py b/spacy/nb/morph_rules.py index 4cf8efee5..f41d2efe2 100644 --- a/spacy/nb/morph_rules.py +++ b/spacy/nb/morph_rules.py @@ -51,17 +51,17 @@ MORPH_RULES = { }, "VBZ": { - "er": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, - "er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, - "er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, + "er": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, + "er": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, + "er": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, }, "VBP": { - "er": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} + "er": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} }, "VBD": { - "var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, - "vært": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} + "var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, + "vært": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} } } diff --git a/spacy/sv/morph_rules.py b/spacy/sv/morph_rules.py index 5641f43ca..0c3b1804e 100644 --- a/spacy/sv/morph_rules.py +++ b/spacy/sv/morph_rules.py @@ -53,17 +53,17 @@ MORPH_RULES = { }, "VBZ": { - "är": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, - "är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, - "är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, + "är": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, + "är": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, + "är": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, }, "VBP": { - "är": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} + "är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} }, "VBD": { - "var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, - "vart": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} + "var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, + "vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} } }