Remove dependency on ftfy

This commit is contained in:
Matthew Honnibal 2018-03-28 12:07:02 +02:00
parent 6d2c85f428
commit 070b6c6495
1 changed files with 9 additions and 1 deletions

View File

@ -15,6 +15,11 @@ from ..compat import fix_text
from ..vectors import Vectors from ..vectors import Vectors
from ..util import prints, ensure_path, get_lang_class from ..util import prints, ensure_path, get_lang_class
try:
import ftfy
except ImportError:
ftfy = None
@plac.annotations( @plac.annotations(
lang=("model language", "positional", None, str), lang=("model language", "positional", None, str),
@ -140,11 +145,14 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
def read_clusters(clusters_loc): def read_clusters(clusters_loc):
print("Reading clusters...") print("Reading clusters...")
clusters = {} clusters = {}
if ftfy is None:
print("Warning: No text fixing. Run pip install ftfy if necessary")
with clusters_loc.open() as f: with clusters_loc.open() as f:
for line in tqdm(f): for line in tqdm(f):
try: try:
cluster, word, freq = line.split() cluster, word, freq = line.split()
word = fix_text(word) if ftfy is not None:
word = fix_text(word)
except ValueError: except ValueError:
continue continue
# If the clusterer has only seen the word a few times, its # If the clusterer has only seen the word a few times, its