Merge pull request #2142 from jimregan/polish-more-tokens

more exceptions
This commit is contained in:
Ines Montani 2018-03-24 19:06:44 +01:00 committed by GitHub
commit 68226109f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 16 additions and 3 deletions

View File

@ -1,7 +1,7 @@
# encoding: utf8
from __future__ import unicode_literals
from ...symbols import ORTH, LEMMA, POS, ADV, ADJ, NOUN
from ...symbols import ORTH, LEMMA, POS, ADV, ADJ, NOUN, ADP
_exc = {}
@ -12,11 +12,24 @@ for exc_data in [
{ORTH: "mgr.", LEMMA: "magister", POS: NOUN},
{ORTH: "tzn.", LEMMA: "to znaczy", POS: ADV},
{ORTH: "tj.", LEMMA: "to jest", POS: ADV},
{ORTH: "tzw.", LEMMA: "tak zwany", POS: ADJ}]:
{ORTH: "tzw.", LEMMA: "tak zwany", POS: ADJ},
{ORTH: "adw.", LEMMA: "adwokat", POS: NOUN},
{ORTH: "afr.", LEMMA: "afrykański", POS: ADJ},
{ORTH: "c.b.d.o.", LEMMA: "co było do okazania", POS: ADV},
{ORTH: "cbdu.", LEMMA: "co było do udowodnienia", POS: ADV},
{ORTH: "mn.w.", LEMMA: "mniej więcej", POS: ADV},
{ORTH: "nt.", LEMMA: "na temat", POS: ADP},
{ORTH: "ok.", LEMMA: "około"},
{ORTH: "n.p.u.", LEMMA: "na psa urok"},
{ORTH: "ww.", LEMMA: "wyżej wymieniony", POS: ADV}]:
_exc[exc_data[ORTH]] = [exc_data]
for orth in [
"w.", "r."]:
"w.", "r.", "br.", "bm.", "b.r.", "amer.", "am.", "bdb.", "św.", "p.", "lit.",
"wym.", "czyt.", "daw.", "d.", "zob.", "gw.", "dn.", "dyr.", "im.", "mł.",
"min.", "dot.", "muz.", "k.k.", "k.p.a.", "k.p.c.", "n.p.m.", "p.p.m.", "nb.",
"ob.", "n.e.", "p.n.e.", "zw.", "zool.", "zach.", "żarg.", "żart.", "wzgl.",
"wyj.", "xx.", "ks.", "x.", "wyd.", "wsch.", "o.o."]:
_exc[orth] = [{ORTH: orth}]