mirror of https://github.com/explosion/spaCy.git
* Filter out phrases that consist of common, lower-case words.
This commit is contained in:
parent
4bbc8f45c6
commit
5af4b62fe7
|
@ -45,6 +45,8 @@ def read_gazetteer(tokenizer, loc, n=-1):
|
|||
if i >= n:
|
||||
break
|
||||
phrase = tokenizer(phrase)
|
||||
if all((t.is_lower and t.prob >= -10) for t in phrase):
|
||||
continue
|
||||
if len(phrase) >= 2:
|
||||
yield phrase
|
||||
|
||||
|
|
Loading…
Reference in New Issue