From 28d7f0a672e5286393a71595825d41745c7f80e3 Mon Sep 17 00:00:00 2001
From: Jim O'Regan <joregan@gmail.com>
Date: Mon, 26 Jun 2017 22:38:28 +0100
Subject: [PATCH 1/2] reference

---
 spacy/lang/pl/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py
index 9fad81899..1962c6243 100644
--- a/spacy/lang/pl/__init__.py
+++ b/spacy/lang/pl/__init__.py
@@ -1,6 +1,7 @@
 # coding: utf8
 from __future__ import unicode_literals
 
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .stop_words import STOP_WORDS
 
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
@@ -16,7 +17,7 @@ class Polish(Language):
         lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
         lex_attr_getters[LANG] = lambda text: 'pl'
 
-        tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
+        tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
         stop_words = set(STOP_WORDS)
 
 

From 2f84c735856b1a69e732c1dfcb3842e0458d8781 Mon Sep 17 00:00:00 2001
From: Jim O'Regan <joregan@gmail.com>
Date: Mon, 26 Jun 2017 22:40:04 +0100
Subject: [PATCH 2/2] a start

---
 spacy/lang/pl/tokenizer_exceptions.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 spacy/lang/pl/tokenizer_exceptions.py

diff --git a/spacy/lang/pl/tokenizer_exceptions.py b/spacy/lang/pl/tokenizer_exceptions.py
new file mode 100644
index 000000000..4dffb6209
--- /dev/null
+++ b/spacy/lang/pl/tokenizer_exceptions.py
@@ -0,0 +1,23 @@
+# encoding: utf8
+from __future__ import unicode_literals
+
+from ..symbols import ORTH, LEMMA, POS
+
+
+_exc = {}
+
+for exc_data in [
+    {ORTH: "m.in.", LEMMA: "między innymi", POS: ADV},
+    {ORTH: "inż.", LEMMA: "inżynier", POS: NOUN},
+    {ORTH: "mgr.", LEMMA: "magister", POS: NOUN},
+    {ORTH: "tzn.", LEMMA: "to znaczy", POS: ADV},
+    {ORTH: "tj.", LEMMA: "to jest", POS: ADV},
+    {ORTH: "tzw.", LEMMA: "tak zwany", POS: ADJ}]:
+    _exc[exc_data[ORTH]] = [dict(exc_data)],
+
+for orth in [
+    "w.", "r."]:
+    _exc[orth] = [{ORTH: orth}]
+
+
+TOKENIZER_EXCEPTIONS = dict(_exc)