From 42349471bc815ffa76a83b7c1c7174666d71edfe Mon Sep 17 00:00:00 2001
From: Ali Zarezade <ali.zarezade@yahoo.com>
Date: Tue, 23 Jan 2018 18:11:33 +0330
Subject: [PATCH] =?UTF-8?q?add=20=D9=AA=20as=20punctuation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 spacy/lang/char_classes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py
index f91fa43c8..e76ae89d3 100644
--- a/spacy/lang/char_classes.py
+++ b/spacy/lang/char_classes.py
@@ -30,14 +30,14 @@ ALPHA_UPPER = merge_char_classes(_upper + _uncased)
 
 _units = ('km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm nm yd in ft '
           'kg g mg µg t lb oz m/s km/h kmh mph hPa Pa mbar mb MB kb KB gb GB tb '
-          'TB T G M K % ٪ км км² км³ м м² м³ дм дм² дм³ см см² см³ мм мм² мм³ нм '
+          'TB T G M K % км км² км³ м м² м³ дм дм² дм³ см см² см³ мм мм² мм³ нм '
           'кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб')
 _currency = r'\$ £ € ¥ ฿ US\$ C\$ A\$ ₽ ﷼'
 
 # These expressions contain various unicode variations, including characters
 # used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
 # conflicts, spaCy's base tokenizer should handle all of those by default
-_punct = r'… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ？ ！ ， 、 ； ： ～ · । ، ؛'
+_punct = r'… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ？ ！ ， 、 ； ： ～ · । ، ؛ ٪'
 _quotes = r'\' \'\' " ” “ `` ` ‘ ´ ‘‘ ’’ ‚ , „ » « 「 」 『 』 （ ） 〔 〕 【 】 《 》 〈 〉'
 _hyphens = '- – — -- --- —— ~'