Added loops to handle contractions with verbs

This commit is contained in:
Magnus Burton 2017-01-19 14:08:52 +01:00 committed by GitHub
parent aad23ab0b4
commit 69eab727d7
1 changed files with 20 additions and 0 deletions

View File

@ -4,6 +4,26 @@ from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
# Verbs
for verb_data in [
{ORTH: "driver"},
{ORTH: "kör"},
{ORTH: "hörr", LEMMA: "hör"},
{ORTH: "fattar"},
{ORTH: "hajar", LEMMA: "förstår"},
{ORTH: "lever"},
{ORTH: "serr", LEMMA: "ser"},
{ORTH: "fixar"}
]:
verb_data_tc = dict(verb_data)
verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
for data in [verb_data, verb_data_tc]:
EXC[data[ORTH] + "u"] = [
dict(data),
{ORTH: "u", LEMMA: PRON_LEMMA, NORM: "du"}
]
TOKENIZER_EXCEPTIONS = {
"jan.": [