From 070b6c6495f631fc874f66c3b6ee68e60429bad2 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 28 Mar 2018 12:07:02 +0200 Subject: [PATCH] Remove dependency on ftfy --- spacy/cli/init_model.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 71efe1b2e..a8a359051 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -15,6 +15,11 @@ from ..compat import fix_text from ..vectors import Vectors from ..util import prints, ensure_path, get_lang_class +try: + import ftfy +except ImportError: + ftfy = None + @plac.annotations( lang=("model language", "positional", None, str), @@ -140,11 +145,14 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50): def read_clusters(clusters_loc): print("Reading clusters...") clusters = {} + if ftfy is None: + print("Warning: No text fixing. Run pip install ftfy if necessary") with clusters_loc.open() as f: for line in tqdm(f): try: cluster, word, freq = line.split() - word = fix_text(word) + if ftfy is not None: + word = fix_text(word) except ValueError: continue # If the clusterer has only seen the word a few times, its