mirror of https://github.com/explosion/spaCy.git
Add missing EXC variable and combine tokenizer exceptions
This commit is contained in:
parent
30a52d576b
commit
6715615d55
|
@ -4,6 +4,9 @@ from __future__ import unicode_literals
|
|||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
|
||||
|
||||
EXC = {}
|
||||
|
||||
# Verbs
|
||||
|
||||
for verb_data in [
|
||||
|
@ -25,7 +28,8 @@ for verb_data in [
|
|||
{ORTH: "u", LEMMA: PRON_LEMMA, NORM: "du"}
|
||||
]
|
||||
|
||||
TOKENIZER_EXCEPTIONS = {
|
||||
|
||||
ABBREVIATIONS = {
|
||||
"jan.": [
|
||||
{ORTH: "jan.", LEMMA: "januari"}
|
||||
],
|
||||
|
@ -149,6 +153,10 @@ TOKENIZER_EXCEPTIONS = {
|
|||
}
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(EXC)
|
||||
TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)
|
||||
|
||||
|
||||
ORTH_ONLY = [
|
||||
"ang.",
|
||||
"anm.",
|
||||
|
|
Loading…
Reference in New Issue