mirror of https://github.com/explosion/spaCy.git
Fix LEMMA in exceptions and morph rules
This commit is contained in:
parent
2edc0aee12
commit
41a322c733
|
@ -2,13 +2,13 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import ORTH, LEMMA, TAG, NORM
|
||||
from ..deprecated import PRON_LEMMA, DET_LEMMA
|
||||
from ..deprecated import PRON_LEMMA
|
||||
|
||||
|
||||
_exc = {
|
||||
"auf'm": [
|
||||
{ORTH: "auf", LEMMA: "auf"},
|
||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem" }],
|
||||
{ORTH: "'m", LEMMA: "der", NORM: "dem" }],
|
||||
|
||||
"du's": [
|
||||
{ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
|
@ -20,7 +20,7 @@ _exc = {
|
|||
|
||||
"hinter'm": [
|
||||
{ORTH: "hinter", LEMMA: "hinter"},
|
||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
|
||||
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
|
||||
|
||||
"ich's": [
|
||||
{ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
|
@ -36,11 +36,11 @@ _exc = {
|
|||
|
||||
"unter'm": [
|
||||
{ORTH: "unter", LEMMA: "unter"},
|
||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
|
||||
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
|
||||
|
||||
"vor'm": [
|
||||
{ORTH: "vor", LEMMA: "vor"},
|
||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
|
||||
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
|
||||
|
||||
"wir's": [
|
||||
{ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
|
@ -48,7 +48,7 @@ _exc = {
|
|||
|
||||
"über'm": [
|
||||
{ORTH: "über", LEMMA: "über"},
|
||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}]
|
||||
{ORTH: "'m", LEMMA: "der", NORM: "dem"}]
|
||||
}
|
||||
|
||||
|
||||
|
@ -57,12 +57,10 @@ for exc_data in [
|
|||
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
{ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
{ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
{ORTH: "'n", LEMMA: DET_LEMMA, NORM: "ein"},
|
||||
{ORTH: "'ne", LEMMA: DET_LEMMA, NORM: "eine"},
|
||||
{ORTH: "'nen", LEMMA: DET_LEMMA, NORM: "einen"},
|
||||
{ORTH: "'nem", LEMMA: DET_LEMMA, NORM: "einem"},
|
||||
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||
|
||||
{ORTH: "'n", LEMMA: "ein", NORM: "ein"},
|
||||
{ORTH: "'ne", LEMMA: "eine", NORM: "eine"},
|
||||
{ORTH: "'nen", LEMMA: "ein", NORM: "einen"},
|
||||
{ORTH: "'nem", LEMMA: "ein", NORM: "einem"},
|
||||
{ORTH: "Abb.", LEMMA: "Abbildung"},
|
||||
{ORTH: "Abk.", LEMMA: "Abkürzung"},
|
||||
{ORTH: "Abt.", LEMMA: "Abteilung"},
|
||||
|
|
|
@ -7,7 +7,6 @@ from .cli import download
|
|||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
DET_LEMMA = "-DET-"
|
||||
|
||||
|
||||
def depr_model_download(lang):
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
|
||||
from ..deprecated import PRON_LEMMA, DET_LEMMA
|
||||
from ..deprecated import PRON_LEMMA
|
||||
|
||||
|
||||
_exc = {
|
||||
|
@ -32,11 +32,11 @@ _exc = {
|
|||
|
||||
"pal": [
|
||||
{ORTH: "pa", LEMMA: "para"},
|
||||
{ORTH: "l", LEMMA: DET_LEMMA, NORM: "el"}],
|
||||
{ORTH: "l", LEMMA: "el"}],
|
||||
|
||||
"pala": [
|
||||
{ORTH: "pa", LEMMA: "para"},
|
||||
{ORTH: "la", LEMMA: DET_LEMMA}]
|
||||
{ORTH: "la"}]
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -51,17 +51,17 @@ MORPH_RULES = {
|
|||
},
|
||||
|
||||
"VBZ": {
|
||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||
"er": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||
"er": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||
"er": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||
},
|
||||
|
||||
"VBP": {
|
||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||
"er": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||
},
|
||||
|
||||
"VBD": {
|
||||
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||
"vært": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||
"vært": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,17 +53,17 @@ MORPH_RULES = {
|
|||
},
|
||||
|
||||
"VBZ": {
|
||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||
"är": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||
"är": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||
"är": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||
},
|
||||
|
||||
"VBP": {
|
||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||
"är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||
},
|
||||
|
||||
"VBD": {
|
||||
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||
"vart": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||
"vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue