diff --git a/examples/multi_word_matches.py b/examples/multi_word_matches.py index 3c715736e..73f48bf42 100644 --- a/examples/multi_word_matches.py +++ b/examples/multi_word_matches.py @@ -45,6 +45,8 @@ def read_gazetteer(tokenizer, loc, n=-1): if i >= n: break phrase = tokenizer(phrase) + if all((t.is_lower and t.prob >= -10) for t in phrase): + continue if len(phrase) >= 2: yield phrase