Fix LEMMA in exceptions and morph rules

This commit is contained in:
ines 2017-05-08 19:57:36 +02:00
parent 2edc0aee12
commit 41a322c733
5 changed files with 25 additions and 28 deletions

View File

@ -2,13 +2,13 @@
from __future__ import unicode_literals
from ..symbols import ORTH, LEMMA, TAG, NORM
from ..deprecated import PRON_LEMMA, DET_LEMMA
from ..deprecated import PRON_LEMMA
_exc = {
"auf'm": [
{ORTH: "auf", LEMMA: "auf"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem" }],
{ORTH: "'m", LEMMA: "der", NORM: "dem" }],
"du's": [
{ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"},
@ -20,7 +20,7 @@ _exc = {
"hinter'm": [
{ORTH: "hinter", LEMMA: "hinter"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
"ich's": [
{ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"},
@ -36,11 +36,11 @@ _exc = {
"unter'm": [
{ORTH: "unter", LEMMA: "unter"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
"vor'm": [
{ORTH: "vor", LEMMA: "vor"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
"wir's": [
{ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"},
@ -48,7 +48,7 @@ _exc = {
"über'm": [
{ORTH: "über", LEMMA: "über"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}]
{ORTH: "'m", LEMMA: "der", NORM: "dem"}]
}
@ -57,12 +57,10 @@ for exc_data in [
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "'n", LEMMA: DET_LEMMA, NORM: "ein"},
{ORTH: "'ne", LEMMA: DET_LEMMA, NORM: "eine"},
{ORTH: "'nen", LEMMA: DET_LEMMA, NORM: "einen"},
{ORTH: "'nem", LEMMA: DET_LEMMA, NORM: "einem"},
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "'n", LEMMA: "ein", NORM: "ein"},
{ORTH: "'ne", LEMMA: "eine", NORM: "eine"},
{ORTH: "'nen", LEMMA: "ein", NORM: "einen"},
{ORTH: "'nem", LEMMA: "ein", NORM: "einem"},
{ORTH: "Abb.", LEMMA: "Abbildung"},
{ORTH: "Abk.", LEMMA: "Abkürzung"},
{ORTH: "Abt.", LEMMA: "Abteilung"},

View File

@ -7,7 +7,6 @@ from .cli import download
PRON_LEMMA = "-PRON-"
DET_LEMMA = "-DET-"
def depr_model_download(lang):

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
from ..deprecated import PRON_LEMMA, DET_LEMMA
from ..deprecated import PRON_LEMMA
_exc = {
@ -32,11 +32,11 @@ _exc = {
"pal": [
{ORTH: "pa", LEMMA: "para"},
{ORTH: "l", LEMMA: DET_LEMMA, NORM: "el"}],
{ORTH: "l", LEMMA: "el"}],
"pala": [
{ORTH: "pa", LEMMA: "para"},
{ORTH: "la", LEMMA: DET_LEMMA}]
{ORTH: "la"}]
}

View File

@ -51,17 +51,17 @@ MORPH_RULES = {
},
"VBZ": {
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
"er": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"er": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"er": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
},
"VBP": {
"er": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
"er": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
},
"VBD": {
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vært": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vært": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
}
}

View File

@ -53,17 +53,17 @@ MORPH_RULES = {
},
"VBZ": {
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
"är": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"är": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"är": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
},
"VBP": {
"är": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
"är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
},
"VBD": {
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vart": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
}
}