caught more codecs.open -> io.open

This commit is contained in:
alvations 2015-09-30 20:20:09 +02:00
parent 764bdc62e7
commit 8caedba42a
8 changed files with 17 additions and 17 deletions

View File

@ -20,7 +20,7 @@ from pathlib import Path
from shutil import copyfile
from shutil import copytree
import codecs
import io
from spacy.en import get_lex_props
from spacy.vocab import Vocab
@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir):
def _read_clusters(loc):
clusters = {}
for line in codecs.open(str(loc), 'r', 'utf8'):
for line in io.open(str(loc), 'r', encoding='utf8'):
try:
cluster, word, freq = line.split()
except ValueError:
@ -65,7 +65,7 @@ def _read_clusters(loc):
def _read_probs(loc):
probs = {}
for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
prob, word = line.split()
prob = float(prob)
probs[word] = prob

View File

@ -1,11 +1,11 @@
import codecs
import io
import plac
from spacy.en import English
def main(text_loc):
with codecs.open(text_loc, 'r', 'utf8') as file_:
with io.open(text_loc, 'r', encoding='utf8') as file_:
text = file_.read()
NLU = English()
for paragraph in text.split('\n\n'):

View File

@ -27,7 +27,7 @@ import json
from os import path
import os
import re
import codecs
import io
from collections import defaultdict
from spacy.munge import read_ptb
@ -122,7 +122,7 @@ def read_file(*pieces):
if not path.exists(loc):
return None
else:
return codecs.open(loc, 'r', 'utf8').read().strip()
return io.open(loc, 'r', encoding='utf8').read().strip()
def get_file_names(section_dir, subsection):

View File

@ -1,6 +1,6 @@
from __future__ import unicode_literals
from os import path
import codecs
import io
NOUN_RULES = (
@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules):
def read_index(loc):
index = set()
for line in codecs.open(loc, 'r', 'utf8'):
for line in io.open(loc, 'r', encoding='utf8'):
if line.startswith(' '):
continue
pieces = line.split()
@ -97,7 +97,7 @@ def read_index(loc):
def read_exc(loc):
exceptions = {}
for line in codecs.open(loc, 'r', 'utf8'):
for line in io.open(loc, 'r', encoding='utf8'):
if line.startswith(' '):
continue
pieces = line.split()

View File

@ -1,5 +1,5 @@
import numpy
import codecs
import io
import json
import ujson
import random

View File

@ -1,4 +1,4 @@
import codecs
import io
from libc.string cimport memcpy
from murmurhash.mrmr cimport hash64
@ -112,11 +112,11 @@ cdef class StringStore:
string = &self.strings[i]
py_string = string.chars[:string.length]
strings.append(py_string.decode('utf8'))
with codecs.open(loc, 'w', 'utf8') as file_:
with io.open(loc, 'w', encoding='utf8') as file_:
file_.write(SEPARATOR.join(strings))
def load(self, loc):
with codecs.open(loc, 'r', 'utf8') as file_:
with io.open(loc, 'r', encoding='utf8') as file_:
strings = file_.read().split(SEPARATOR)
cdef unicode string
cdef bytes byte_string

View File

@ -4,7 +4,7 @@ from libc.stdint cimport int32_t
import bz2
from os import path
import codecs
import io
import math
from .lexeme cimport EMPTY_LEXEME

View File

@ -1,6 +1,6 @@
from __future__ import unicode_literals
from os import path
import codecs
import io
from spacy.en import English
@ -9,7 +9,7 @@ import pytest
@pytest.fixture
def sun_text():
with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_:
with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
text = file_.read()
return text