From a23504fe07c5d3d55b247e4aa0b185dd0a338ee7 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 5 Jan 2017 19:58:07 +0100
Subject: [PATCH] Move abbreviations below other exceptions

---
 spacy/en/tokenizer_exceptions.py | 135 +------------------------------
 1 file changed, 1 insertion(+), 134 deletions(-)

diff --git a/spacy/en/tokenizer_exceptions.py b/spacy/en/tokenizer_exceptions.py
index 2c046c157..49b612d73 100644
--- a/spacy/en/tokenizer_exceptions.py
+++ b/spacy/en/tokenizer_exceptions.py
@@ -505,142 +505,9 @@ ABBREVIATIONS = {
 }
 
 
-# Other exceptions
-
-OTHER = {
-    " ": [
-        {ORTH: " ", TAG: "SP"}
-    ],
-
-    "\u00a0": [
-        {ORTH: "\u00a0", TAG: "SP", LEMMA: "  "}
-    ],
-
-    "and/or": [
-        {ORTH: "and/or", LEMMA: "and/or", TAG: "CC"}
-    ],
-
-    "'cause": [
-        {ORTH: "'cause", LEMMA: "because"}
-    ],
-
-    "y'all": [
-        {ORTH: "y'", LEMMA: PRON_LEMMA, NORM: "you"},
-        {ORTH: "all"}
-    ],
-
-    "yall": [
-        {ORTH: "y", LEMMA: PRON_LEMMA, NORM: "you"},
-        {ORTH: "all"}
-    ],
-
-    "'em": [
-        {ORTH: "'em", LEMMA: PRON_LEMMA, NORM: "them"}
-    ],
-
-    "em": [
-        {ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"}
-    ],
-
-    "nothin'": [
-        {ORTH: "nothin'", LEMMA: "nothing"}
-    ],
-
-    "nuthin'": [
-        {ORTH: "nuthin'", LEMMA: "nothing"}
-    ],
-
-    "'nuff": [
-        {ORTH: "'nuff", LEMMA: "enough"}
-    ],
-
-    "ol'": [
-        {ORTH: "ol'", LEMMA: "old"}
-    ],
-
-    "not've": [
-        {ORTH: "not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "'ve", LEMMA: "have", TAG: "VB"}
-    ],
-
-    "notve": [
-        {ORTH: "not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "ve", LEMMA: "have", TAG: "VB"}
-    ],
-
-    "Not've": [
-        {ORTH: "Not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "'ve", LEMMA: "have", TAG: "VB"}
-    ],
-
-    "Notve": [
-        {ORTH: "Not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "ve", LEMMA: "have", TAG: "VB"}
-    ],
-
-    "cannot": [
-        {ORTH: "can", LEMMA: "can", TAG: "MD"},
-        {ORTH: "not", LEMMA: "not", TAG: "RB"}
-    ],
-
-    "Cannot": [
-        {ORTH: "Can", LEMMA: "can", TAG: "MD"},
-        {ORTH: "not", LEMMA: "not", TAG: "RB"}
-    ],
-
-    "gonna": [
-        {ORTH: "gon", LEMMA: "go", NORM: "going"},
-        {ORTH: "na", LEMMA: "to"}
-    ],
-
-    "Gonna": [
-        {ORTH: "Gon", LEMMA: "go", NORM: "going"},
-        {ORTH: "na", LEMMA: "to"}
-    ],
-
-    "let's": [
-        {ORTH: "let"},
-        {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}
-    ],
-
-    "Let's": [
-        {ORTH: "Let"},
-        {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}
-    ],
-
-    "'S": [
-        {ORTH: "'S", LEMMA: "'s"}
-    ],
-
-    "'s": [
-        {ORTH: "'s", LEMMA: "'s"}
-    ],
-
-    "\u2018S": [
-        {ORTH: "\u2018S", LEMMA: "'s"}
-    ],
-
-    "\u2018s": [
-        {ORTH: "\u2018s", LEMMA: "'s"}
-    ],
-
-    "\u2014": [
-        {ORTH: "\u2014", TAG: ":", LEMMA: "--"}
-    ],
-
-    "\n": [
-        {ORTH: "\n", TAG: "SP"}
-    ],
-
-    "\t": [
-        {ORTH: "\t", TAG: "SP"}
-    ]
-}
-
-
 TOKENIZER_EXCEPTIONS = dict(EXC)
-TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)
 TOKENIZER_EXCEPTIONS.update(OTHER)
+TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)
 
 
 # Remove EXCLUDE_EXC if in exceptions