From df0b68f60eda43865d4b7271c55670784b214ade Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 4 Sep 2020 13:19:16 +0200
Subject: [PATCH] Remove unicode declarations and update language data

---
 spacy/lang/cs/examples.py                | 10 +++-----
 spacy/lang/cs/lex_attrs.py               |  5 +---
 spacy/lang/he/lex_attrs.py               |  6 ++---
 spacy/lang/ne/stop_words.py              |  4 ---
 spacy/lang/sa/__init__.py                | 10 +-------
 spacy/lang/sa/examples.py                |  4 ---
 spacy/lang/sa/lex_attrs.py               | 32 +++++++++++-------------
 spacy/lang/sa/stop_words.py              |  3 ---
 spacy/tests/lang/cs/test_text.py         |  3 ---
 spacy/tests/lang/ne/test_text.py         |  3 ---
 spacy/tests/lang/sa/test_text.py         |  3 ---
 spacy/tests/regression/test_issue5838.py | 14 +++++------
 spacy/tests/regression/test_issue5918.py |  3 ---
 13 files changed, 27 insertions(+), 73 deletions(-)

diff --git a/spacy/lang/cs/examples.py b/spacy/lang/cs/examples.py
index fe8a9f6d1..a30b5ac14 100644
--- a/spacy/lang/cs/examples.py
+++ b/spacy/lang/cs/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.cs.examples import sentences
@@ -10,9 +6,9 @@ Example sentences to test spaCy and its language models.
 
 
 sentences = [
-   "Máma mele maso.",
+    "Máma mele maso.",
     "Příliš žluťoučký kůň úpěl ďábelské ódy.",
-    "ArcGIS je geografický informační systém určený pro práci s prostorovými daty." ,
+    "ArcGIS je geografický informační systém určený pro práci s prostorovými daty.",
     "Může data vytvářet a spravovat, ale především je dokáže analyzovat, najít v nich nové vztahy a vše přehledně vizualizovat.",
     "Dnes je krásné počasí.",
     "Nestihl autobus, protože pozdě vstal z postele.",
@@ -39,4 +35,4 @@ sentences = [
     "Jaké PSČ má Praha 1?",
     "PSČ Prahy 1 je 110 00.",
     "Za 20 minut jede vlak.",
-    ]
+]
diff --git a/spacy/lang/cs/lex_attrs.py b/spacy/lang/cs/lex_attrs.py
index 368cab6c8..530d1d5eb 100644
--- a/spacy/lang/cs/lex_attrs.py
+++ b/spacy/lang/cs/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM
 
 _num_words = [
@@ -43,7 +40,7 @@ _num_words = [
     "kvadrilion",
     "kvadriliarda",
     "kvintilion",
-    ]
+]
 
 
 def like_num(text):
diff --git a/spacy/lang/he/lex_attrs.py b/spacy/lang/he/lex_attrs.py
index 9eab93ae4..2953e7592 100644
--- a/spacy/lang/he/lex_attrs.py
+++ b/spacy/lang/he/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM
 
 _num_words = [
@@ -73,6 +70,7 @@ _ordinal_words = [
     "עשירי",
 ]
 
+
 def like_num(text):
     if text.startswith(("+", "-", "±", "~")):
         text = text[1:]
@@ -84,7 +82,7 @@ def like_num(text):
         num, denom = text.split("/")
         if num.isdigit() and denom.isdigit():
             return True
-    
+
     if text in _num_words:
         return True
 
diff --git a/spacy/lang/ne/stop_words.py b/spacy/lang/ne/stop_words.py
index f008697d0..8470297b9 100644
--- a/spacy/lang/ne/stop_words.py
+++ b/spacy/lang/ne/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 # Source: https://github.com/sanjaalcorps/NepaliStopWords/blob/master/NepaliStopWords.txt
 
 STOP_WORDS = set(
diff --git a/spacy/lang/sa/__init__.py b/spacy/lang/sa/__init__.py
index 8a4533341..345137817 100644
--- a/spacy/lang/sa/__init__.py
+++ b/spacy/lang/sa/__init__.py
@@ -1,18 +1,10 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
-
 from ...language import Language
-from ...attrs import LANG
 
 
 class SanskritDefaults(Language.Defaults):
-    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
-    lex_attr_getters.update(LEX_ATTRS)
-    lex_attr_getters[LANG] = lambda text: "sa"
-
+    lex_attr_getters = LEX_ATTRS
     stop_words = STOP_WORDS
 
 
diff --git a/spacy/lang/sa/examples.py b/spacy/lang/sa/examples.py
index 9d4fa1e49..60243c04c 100644
--- a/spacy/lang/sa/examples.py
+++ b/spacy/lang/sa/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/sa/lex_attrs.py b/spacy/lang/sa/lex_attrs.py
index c33be2ce4..f2b51650b 100644
--- a/spacy/lang/sa/lex_attrs.py
+++ b/spacy/lang/sa/lex_attrs.py
@@ -1,9 +1,5 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM
 
-
 # reference 1: https://en.wikibooks.org/wiki/Sanskrit/Numbers
 
 _num_words = [
@@ -106,26 +102,26 @@ _num_words = [
     "सप्तनवतिः",
     "अष्टनवतिः",
     "एकोनशतम्",
-    "शतम्"
+    "शतम्",
 ]
 
 
 def like_num(text):
-   """
+    """
    Check if text resembles a number
    """
-   if text.startswith(("+", "-", "±", "~")):
-       text = text[1:]
-   text = text.replace(",", "").replace(".", "")
-   if text.isdigit():
-       return True
-   if text.count("/") == 1:
-       num, denom = text.split("/")
-       if num.isdigit() and denom.isdigit():
-           return True
-   if text in _num_words:
-       return True
-   return False
+    if text.startswith(("+", "-", "±", "~")):
+        text = text[1:]
+    text = text.replace(",", "").replace(".", "")
+    if text.isdigit():
+        return True
+    if text.count("/") == 1:
+        num, denom = text.split("/")
+        if num.isdigit() and denom.isdigit():
+            return True
+    if text in _num_words:
+        return True
+    return False
 
 
 LEX_ATTRS = {LIKE_NUM: like_num}
diff --git a/spacy/lang/sa/stop_words.py b/spacy/lang/sa/stop_words.py
index aa51ceae0..30302a14d 100644
--- a/spacy/lang/sa/stop_words.py
+++ b/spacy/lang/sa/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 # Source: https://gist.github.com/Akhilesh28/fe8b8e180f64b72e64751bc31cb6d323
 
 STOP_WORDS = set(
diff --git a/spacy/tests/lang/cs/test_text.py b/spacy/tests/lang/cs/test_text.py
index d98961738..b834111b9 100644
--- a/spacy/tests/lang/cs/test_text.py
+++ b/spacy/tests/lang/cs/test_text.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import pytest
 
 
diff --git a/spacy/tests/lang/ne/test_text.py b/spacy/tests/lang/ne/test_text.py
index 794f8fbdc..7dd971132 100644
--- a/spacy/tests/lang/ne/test_text.py
+++ b/spacy/tests/lang/ne/test_text.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import pytest
 
 
diff --git a/spacy/tests/lang/sa/test_text.py b/spacy/tests/lang/sa/test_text.py
index 7c961bdae..41257a4d8 100644
--- a/spacy/tests/lang/sa/test_text.py
+++ b/spacy/tests/lang/sa/test_text.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import pytest
 
 
diff --git a/spacy/tests/regression/test_issue5838.py b/spacy/tests/regression/test_issue5838.py
index c008c5aec..4e4d98beb 100644
--- a/spacy/tests/regression/test_issue5838.py
+++ b/spacy/tests/regression/test_issue5838.py
@@ -1,15 +1,13 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from spacy.lang.en import English
 from spacy.tokens import Span
 from spacy import displacy
 
-SAMPLE_TEXT = '''First line
+
+SAMPLE_TEXT = """First line
 Second line, with ent
 Third line
 Fourth line
-'''
+"""
 
 
 def test_issue5838():
@@ -18,8 +16,8 @@ def test_issue5838():
 
     nlp = English()
     doc = nlp(SAMPLE_TEXT)
-    doc.ents = [Span(doc, 7, 8, label='test')]
+    doc.ents = [Span(doc, 7, 8, label="test")]
 
-    html = displacy.render(doc, style='ent')
-    found = html.count('</br>')
+    html = displacy.render(doc, style="ent")
+    found = html.count("</br>")
     assert found == 4
diff --git a/spacy/tests/regression/test_issue5918.py b/spacy/tests/regression/test_issue5918.py
index 2dee26d82..3b96009a8 100644
--- a/spacy/tests/regression/test_issue5918.py
+++ b/spacy/tests/regression/test_issue5918.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from spacy.lang.en import English
 from spacy.pipeline import merge_entities, EntityRuler