From 737816e86eda7d880baee61390c853284f1487b4 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 4 Nov 2016 15:16:20 +0100
Subject: [PATCH] Fix #368: Tokenizer handled pattern 'unicode close quote,
 period' incorrectly.

---
 spacy/en/language_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/en/language_data.py b/spacy/en/language_data.py
index 504103566..4666c0c6f 100644
--- a/spacy/en/language_data.py
+++ b/spacy/en/language_data.py
@@ -102,8 +102,8 @@ TOKENIZER_PREFIXES = r''', " ( [ { * < $ £ “ ' `` ` # US$ C$ A$ a- ‘ .... .
 
 
 TOKENIZER_SUFFIXES = (r''', \" \) \] \} \* \! \? % \$ > : ; ' ” '' 's 'S ’s ’S ’''' 
-                    '''\.\. \.\.\. \.\.\.\. (?<=[a-z0-9)\]"'%\)])\. '''
-                    '''(?<=[0-9])km''').strip().split()
+                      r'''\.\. \.\.\. \.\.\.\. (?<=[a-z0-9)\]”"'%\)])\. '''
+                      r'''(?<=[0-9])km''').strip().split()
 
 
 TOKENIZER_INFIXES = (r'''\.\.\.+ (?<=[a-z])\.(?=[A-Z]) (?<=[a-zA-Z])-(?=[a-zA-z]) '''