From 8caedba42a5255b9996533a732e17eee3f20a2dd Mon Sep 17 00:00:00 2001 From: alvations Date: Wed, 30 Sep 2015 20:20:09 +0200 Subject: [PATCH] caught more codecs.open -> io.open --- bin/init_model.py | 6 +++--- bin/ner_tag.py | 4 ++-- bin/prepare_treebank.py | 4 ++-- spacy/en/lemmatizer.py | 6 +++--- spacy/gold.pyx | 2 +- spacy/strings.pyx | 6 +++--- spacy/vocab.pyx | 2 +- tests/test_parse_navigate.py | 4 ++-- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/bin/init_model.py b/bin/init_model.py index a75bd9827..ba99808f0 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -20,7 +20,7 @@ from pathlib import Path from shutil import copyfile from shutil import copytree -import codecs +import io from spacy.en import get_lex_props from spacy.vocab import Vocab @@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir): def _read_clusters(loc): clusters = {} - for line in codecs.open(str(loc), 'r', 'utf8'): + for line in io.open(str(loc), 'r', encoding='utf8'): try: cluster, word, freq = line.split() except ValueError: @@ -65,7 +65,7 @@ def _read_clusters(loc): def _read_probs(loc): probs = {} - for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')): + for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')): prob, word = line.split() prob = float(prob) probs[word] = prob diff --git a/bin/ner_tag.py b/bin/ner_tag.py index 34588bd12..f990f21a1 100644 --- a/bin/ner_tag.py +++ b/bin/ner_tag.py @@ -1,11 +1,11 @@ -import codecs +import io import plac from spacy.en import English def main(text_loc): - with codecs.open(text_loc, 'r', 'utf8') as file_: + with io.open(text_loc, 'r', encoding='utf8') as file_: text = file_.read() NLU = English() for paragraph in text.split('\n\n'): diff --git a/bin/prepare_treebank.py b/bin/prepare_treebank.py index d13ef7130..f9f4eec21 100644 --- a/bin/prepare_treebank.py +++ b/bin/prepare_treebank.py @@ -27,7 +27,7 @@ import json from os import path import os import re -import codecs +import io from collections import defaultdict from spacy.munge import read_ptb @@ -122,7 +122,7 @@ def read_file(*pieces): if not path.exists(loc): return None else: - return codecs.open(loc, 'r', 'utf8').read().strip() + return io.open(loc, 'r', encoding='utf8').read().strip() def get_file_names(section_dir, subsection): diff --git a/spacy/en/lemmatizer.py b/spacy/en/lemmatizer.py index 5883e12c8..a9625f0e9 100644 --- a/spacy/en/lemmatizer.py +++ b/spacy/en/lemmatizer.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals from os import path -import codecs +import io NOUN_RULES = ( @@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules): def read_index(loc): index = set() - for line in codecs.open(loc, 'r', 'utf8'): + for line in io.open(loc, 'r', encoding='utf8'): if line.startswith(' '): continue pieces = line.split() @@ -97,7 +97,7 @@ def read_index(loc): def read_exc(loc): exceptions = {} - for line in codecs.open(loc, 'r', 'utf8'): + for line in io.open(loc, 'r', encoding='utf8'): if line.startswith(' '): continue pieces = line.split() diff --git a/spacy/gold.pyx b/spacy/gold.pyx index cab4ba8a1..4fe5c6b52 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -1,5 +1,5 @@ import numpy -import codecs +import io import json import ujson import random diff --git a/spacy/strings.pyx b/spacy/strings.pyx index e15f88837..8cf735bb6 100644 --- a/spacy/strings.pyx +++ b/spacy/strings.pyx @@ -1,4 +1,4 @@ -import codecs +import io from libc.string cimport memcpy from murmurhash.mrmr cimport hash64 @@ -112,11 +112,11 @@ cdef class StringStore: string = &self.strings[i] py_string = string.chars[:string.length] strings.append(py_string.decode('utf8')) - with codecs.open(loc, 'w', 'utf8') as file_: + with io.open(loc, 'w', encoding='utf8') as file_: file_.write(SEPARATOR.join(strings)) def load(self, loc): - with codecs.open(loc, 'r', 'utf8') as file_: + with io.open(loc, 'r', encoding='utf8') as file_: strings = file_.read().split(SEPARATOR) cdef unicode string cdef bytes byte_string diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index c93e4202f..475b06dd1 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -4,7 +4,7 @@ from libc.stdint cimport int32_t import bz2 from os import path -import codecs +import io import math from .lexeme cimport EMPTY_LEXEME diff --git a/tests/test_parse_navigate.py b/tests/test_parse_navigate.py index cf6971c89..1fff0f684 100644 --- a/tests/test_parse_navigate.py +++ b/tests/test_parse_navigate.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals from os import path -import codecs +import io from spacy.en import English @@ -9,7 +9,7 @@ import pytest @pytest.fixture def sun_text(): - with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_: + with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_: text = file_.read() return text