From 8199012d26b12caf0a3791676e213c5a29966be0 Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 30 Sep 2015 20:10:15 +0200 Subject: [PATCH 1/3] changing deprecated codecs.open to io.open =) --- spacy/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index 1d48ab7e9..34a660c4c 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1,5 +1,5 @@ from os import path -import codecs +import io import json import re @@ -7,7 +7,7 @@ DATA_DIR = path.join(path.dirname(__file__), '..', 'data') def utf8open(loc, mode='r'): - return codecs.open(loc, mode, 'utf8') + return io.open(loc, mode, encoding='utf8') def read_lang_data(data_dir): From 764bdc62e7f4e91ef571d6b655da8e53b7839447 Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 30 Sep 2015 20:16:52 +0200 Subject: [PATCH 2/3] caught another codecs.open --- bin/parser/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/parser/train.py b/bin/parser/train.py index 267b26275..57889511d 100755 --- a/bin/parser/train.py +++ b/bin/parser/train.py @@ -5,7 +5,7 @@ from __future__ import unicode_literals import os from os import path import shutil -import codecs +import io import random import plac @@ -169,7 +169,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc): nlp = Language() gold_tuples = read_docparse_file(dev_loc) scorer = Scorer() - out_file = codecs.open(out_loc, 'w', 'utf8') + out_file = io.open(out_loc, 'w', encoding='utf8') for raw_text, segmented_text, annot_tuples in gold_tuples: tokens = nlp(raw_text) for t in tokens: From 8caedba42a5255b9996533a732e17eee3f20a2dd Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 30 Sep 2015 20:20:09 +0200 Subject: [PATCH 3/3] caught more codecs.open -> io.open --- bin/init_model.py | 6 +++--- bin/ner_tag.py | 4 ++-- bin/prepare_treebank.py | 4 ++-- spacy/en/lemmatizer.py | 6 +++--- spacy/gold.pyx | 2 +- spacy/strings.pyx | 6 +++--- spacy/vocab.pyx | 2 +- tests/test_parse_navigate.py | 4 ++-- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/bin/init_model.py b/bin/init_model.py index a75bd9827..ba99808f0 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -20,7 +20,7 @@ from pathlib import Path from shutil import copyfile from shutil import copytree -import codecs +import io from spacy.en import get_lex_props from spacy.vocab import Vocab @@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir): def _read_clusters(loc): clusters = {} - for line in codecs.open(str(loc), 'r', 'utf8'): + for line in io.open(str(loc), 'r', encoding='utf8'): try: cluster, word, freq = line.split() except ValueError: @@ -65,7 +65,7 @@ def _read_clusters(loc): def _read_probs(loc): probs = {} - for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')): + for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')): prob, word = line.split() prob = float(prob) probs[word] = prob diff --git a/bin/ner_tag.py b/bin/ner_tag.py index 34588bd12..f990f21a1 100644 --- a/bin/ner_tag.py +++ b/bin/ner_tag.py @@ -1,11 +1,11 @@ -import codecs +import io import plac from spacy.en import English def main(text_loc): - with codecs.open(text_loc, 'r', 'utf8') as file_: + with io.open(text_loc, 'r', encoding='utf8') as file_: text = file_.read() NLU = English() for paragraph in text.split('\n\n'): diff --git a/bin/prepare_treebank.py b/bin/prepare_treebank.py index d13ef7130..f9f4eec21 100644 --- a/bin/prepare_treebank.py +++ b/bin/prepare_treebank.py @@ -27,7 +27,7 @@ import json from os import path import os import re -import codecs +import io from collections import defaultdict from spacy.munge import read_ptb @@ -122,7 +122,7 @@ def read_file(*pieces): if not path.exists(loc): return None else: - return codecs.open(loc, 'r', 'utf8').read().strip() + return io.open(loc, 'r', encoding='utf8').read().strip() def get_file_names(section_dir, subsection): diff --git a/spacy/en/lemmatizer.py b/spacy/en/lemmatizer.py index 5883e12c8..a9625f0e9 100644 --- a/spacy/en/lemmatizer.py +++ b/spacy/en/lemmatizer.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals from os import path -import codecs +import io NOUN_RULES = ( @@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules): def read_index(loc): index = set() - for line in codecs.open(loc, 'r', 'utf8'): + for line in io.open(loc, 'r', encoding='utf8'): if line.startswith(' '): continue pieces = line.split() @@ -97,7 +97,7 @@ def read_index(loc): def read_exc(loc): exceptions = {} - for line in codecs.open(loc, 'r', 'utf8'): + for line in io.open(loc, 'r', encoding='utf8'): if line.startswith(' '): continue pieces = line.split() diff --git a/spacy/gold.pyx b/spacy/gold.pyx index cab4ba8a1..4fe5c6b52 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -1,5 +1,5 @@ import numpy -import codecs +import io import json import ujson import random diff --git a/spacy/strings.pyx b/spacy/strings.pyx index e15f88837..8cf735bb6 100644 --- a/spacy/strings.pyx +++ b/spacy/strings.pyx @@ -1,4 +1,4 @@ -import codecs +import io from libc.string cimport memcpy from murmurhash.mrmr cimport hash64 @@ -112,11 +112,11 @@ cdef class StringStore: string = &self.strings[i] py_string = string.chars[:string.length] strings.append(py_string.decode('utf8')) - with codecs.open(loc, 'w', 'utf8') as file_: + with io.open(loc, 'w', encoding='utf8') as file_: file_.write(SEPARATOR.join(strings)) def load(self, loc): - with codecs.open(loc, 'r', 'utf8') as file_: + with io.open(loc, 'r', encoding='utf8') as file_: strings = file_.read().split(SEPARATOR) cdef unicode string cdef bytes byte_string diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index c93e4202f..475b06dd1 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -4,7 +4,7 @@ from libc.stdint cimport int32_t import bz2 from os import path -import codecs +import io import math from .lexeme cimport EMPTY_LEXEME diff --git a/tests/test_parse_navigate.py b/tests/test_parse_navigate.py index cf6971c89..1fff0f684 100644 --- a/tests/test_parse_navigate.py +++ b/tests/test_parse_navigate.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals from os import path -import codecs +import io from spacy.en import English @@ -9,7 +9,7 @@ import pytest @pytest.fixture def sun_text(): - with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_: + with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_: text = file_.read() return text