mirror of https://github.com/explosion/spaCy.git
* Fix data reading for lemmatizer
This commit is contained in:
parent
72613a5fca
commit
477e7fbffe
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from os import path
|
from os import path
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
|
||||||
NOUN_RULES = (
|
NOUN_RULES = (
|
||||||
|
@ -84,7 +85,7 @@ def lemmatize(string, index, exceptions, rules):
|
||||||
|
|
||||||
def read_index(loc):
|
def read_index(loc):
|
||||||
index = set()
|
index = set()
|
||||||
for line in open(loc):
|
for line in codecs.open(loc, 'r', 'utf8'):
|
||||||
if line.startswith(' '):
|
if line.startswith(' '):
|
||||||
continue
|
continue
|
||||||
pieces = line.split()
|
pieces = line.split()
|
||||||
|
@ -96,7 +97,7 @@ def read_index(loc):
|
||||||
|
|
||||||
def read_exc(loc):
|
def read_exc(loc):
|
||||||
exceptions = {}
|
exceptions = {}
|
||||||
for line in open(loc):
|
for line in codecs.open(loc, 'r', 'utf8'):
|
||||||
if line.startswith(' '):
|
if line.startswith(' '):
|
||||||
continue
|
continue
|
||||||
pieces = line.split()
|
pieces = line.split()
|
||||||
|
|
Loading…
Reference in New Issue