From 8199012d26b12caf0a3791676e213c5a29966be0 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Wed, 30 Sep 2015 20:10:15 +0200
Subject: [PATCH 1/3] changing deprecated codecs.open to io.open =)

---
 spacy/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/util.py b/spacy/util.py
index 1d48ab7e9..34a660c4c 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -1,5 +1,5 @@
 from os import path
-import codecs
+import io
 import json
 import re
 
@@ -7,7 +7,7 @@ DATA_DIR = path.join(path.dirname(__file__), '..', 'data')
 
 
 def utf8open(loc, mode='r'):
-    return codecs.open(loc, mode, 'utf8')
+    return io.open(loc, mode, encoding='utf8')
 
 
 def read_lang_data(data_dir):

From 764bdc62e7f4e91ef571d6b655da8e53b7839447 Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Wed, 30 Sep 2015 20:16:52 +0200
Subject: [PATCH 2/3] caught another codecs.open

---
 bin/parser/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/parser/train.py b/bin/parser/train.py
index 267b26275..57889511d 100755
--- a/bin/parser/train.py
+++ b/bin/parser/train.py
@@ -5,7 +5,7 @@ from __future__ import unicode_literals
 import os
 from os import path
 import shutil
-import codecs
+import io
 import random
 
 import plac
@@ -169,7 +169,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
     nlp = Language()
     gold_tuples = read_docparse_file(dev_loc)
     scorer = Scorer()
-    out_file = codecs.open(out_loc, 'w', 'utf8')
+    out_file = io.open(out_loc, 'w', encoding='utf8')
     for raw_text, segmented_text, annot_tuples in gold_tuples:
         tokens = nlp(raw_text)
         for t in tokens:

From 8caedba42a5255b9996533a732e17eee3f20a2dd Mon Sep 17 00:00:00 2001
From: alvations <alvations@gmail.com>
Date: Wed, 30 Sep 2015 20:20:09 +0200
Subject: [PATCH 3/3] caught more codecs.open -> io.open

---
 bin/init_model.py            | 6 +++---
 bin/ner_tag.py               | 4 ++--
 bin/prepare_treebank.py      | 4 ++--
 spacy/en/lemmatizer.py       | 6 +++---
 spacy/gold.pyx               | 2 +-
 spacy/strings.pyx            | 6 +++---
 spacy/vocab.pyx              | 2 +-
 tests/test_parse_navigate.py | 4 ++--
 8 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/bin/init_model.py b/bin/init_model.py
index a75bd9827..ba99808f0 100644
--- a/bin/init_model.py
+++ b/bin/init_model.py
@@ -20,7 +20,7 @@ from pathlib import Path
 
 from shutil import copyfile
 from shutil import copytree
-import codecs
+import io
 
 from spacy.en import get_lex_props
 from spacy.vocab import Vocab
@@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir):
 
 def _read_clusters(loc):
     clusters = {}
-    for line in codecs.open(str(loc), 'r', 'utf8'):
+    for line in io.open(str(loc), 'r', encoding='utf8'):
         try:
             cluster, word, freq = line.split()
         except ValueError:
@@ -65,7 +65,7 @@ def _read_clusters(loc):
 
 def _read_probs(loc):
     probs = {}
-    for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
+    for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
         prob, word = line.split()
         prob = float(prob)
         probs[word] = prob
diff --git a/bin/ner_tag.py b/bin/ner_tag.py
index 34588bd12..f990f21a1 100644
--- a/bin/ner_tag.py
+++ b/bin/ner_tag.py
@@ -1,11 +1,11 @@
-import codecs
+import io
 import plac
 
 from spacy.en import English
 
 
 def main(text_loc):
-    with codecs.open(text_loc, 'r', 'utf8') as file_:
+    with io.open(text_loc, 'r', encoding='utf8') as file_:
         text = file_.read()
     NLU = English()
     for paragraph in text.split('\n\n'):
diff --git a/bin/prepare_treebank.py b/bin/prepare_treebank.py
index d13ef7130..f9f4eec21 100644
--- a/bin/prepare_treebank.py
+++ b/bin/prepare_treebank.py
@@ -27,7 +27,7 @@ import json
 from os import path
 import os
 import re
-import codecs
+import io
 from collections import defaultdict
 
 from spacy.munge import read_ptb
@@ -122,7 +122,7 @@ def read_file(*pieces):
     if not path.exists(loc):
         return None
     else:
-        return codecs.open(loc, 'r', 'utf8').read().strip()
+        return io.open(loc, 'r', encoding='utf8').read().strip()
 
 
 def get_file_names(section_dir, subsection):
diff --git a/spacy/en/lemmatizer.py b/spacy/en/lemmatizer.py
index 5883e12c8..a9625f0e9 100644
--- a/spacy/en/lemmatizer.py
+++ b/spacy/en/lemmatizer.py
@@ -1,6 +1,6 @@
 from __future__ import unicode_literals
 from os import path
-import codecs
+import io
 
 
 NOUN_RULES = (
@@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules):
 
 def read_index(loc):
     index = set()
-    for line in codecs.open(loc, 'r', 'utf8'):
+    for line in io.open(loc, 'r', encoding='utf8'):
         if line.startswith(' '):
             continue
         pieces = line.split()
@@ -97,7 +97,7 @@ def read_index(loc):
 
 def read_exc(loc):
     exceptions = {}
-    for line in codecs.open(loc, 'r', 'utf8'):
+    for line in io.open(loc, 'r', encoding='utf8'):
         if line.startswith(' '):
             continue
         pieces = line.split()
diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index cab4ba8a1..4fe5c6b52 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -1,5 +1,5 @@
 import numpy
-import codecs
+import io
 import json
 import ujson
 import random
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index e15f88837..8cf735bb6 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -1,4 +1,4 @@
-import codecs
+import io
 
 from libc.string cimport memcpy
 from murmurhash.mrmr cimport hash64
@@ -112,11 +112,11 @@ cdef class StringStore:
             string = &self.strings[i]
             py_string = string.chars[:string.length]
             strings.append(py_string.decode('utf8'))
-        with codecs.open(loc, 'w', 'utf8') as file_:
+        with io.open(loc, 'w', encoding='utf8') as file_:
             file_.write(SEPARATOR.join(strings))
 
     def load(self, loc):
-        with codecs.open(loc, 'r', 'utf8') as file_:
+        with io.open(loc, 'r', encoding='utf8') as file_:
             strings = file_.read().split(SEPARATOR)
         cdef unicode string
         cdef bytes byte_string
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index c93e4202f..475b06dd1 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -4,7 +4,7 @@ from libc.stdint cimport int32_t
 
 import bz2
 from os import path
-import codecs
+import io
 import math
 
 from .lexeme cimport EMPTY_LEXEME
diff --git a/tests/test_parse_navigate.py b/tests/test_parse_navigate.py
index cf6971c89..1fff0f684 100644
--- a/tests/test_parse_navigate.py
+++ b/tests/test_parse_navigate.py
@@ -1,6 +1,6 @@
 from __future__ import unicode_literals
 from os import path
-import codecs
+import io
 
 from spacy.en import English
 
@@ -9,7 +9,7 @@ import pytest
 
 @pytest.fixture
 def sun_text():
-    with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_:
+    with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
         text = file_.read()
     return text