mirror of https://github.com/explosion/spaCy.git
* Upd get_freqs script
This commit is contained in:
parent
2b7bd46508
commit
105305b4aa
|
@ -40,6 +40,7 @@ def null_props(string):
|
||||||
|
|
||||||
|
|
||||||
def count_freqs(input_loc, output_loc):
|
def count_freqs(input_loc, output_loc):
|
||||||
|
print output_loc
|
||||||
nlp = spacy.en.English(Parser=None, Tagger=None, Entity=None, load_vectors=False)
|
nlp = spacy.en.English(Parser=None, Tagger=None, Entity=None, load_vectors=False)
|
||||||
nlp.vocab.lexeme_props_getter = null_props
|
nlp.vocab.lexeme_props_getter = null_props
|
||||||
|
|
||||||
|
@ -94,8 +95,10 @@ def main(input_loc, freqs_dir, output_loc, n_jobs=2, skip_existing=False):
|
||||||
if not path.exists(output_path) or not skip_existing:
|
if not path.exists(output_path) or not skip_existing:
|
||||||
tasks.append((input_path, output_path))
|
tasks.append((input_path, output_path))
|
||||||
|
|
||||||
|
if tasks:
|
||||||
parallelize(count_freqs, tasks, n_jobs)
|
parallelize(count_freqs, tasks, n_jobs)
|
||||||
|
|
||||||
|
print "Merge"
|
||||||
merge_counts(outputs, output_loc)
|
merge_counts(outputs, output_loc)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue