* Add multi-word expression RegexMatcher

This commit is contained in:
Matthew Honnibal 2015-04-07 03:45:40 +02:00
parent 2fee67cfa3
commit 0ea5af88b6
1 changed files with 8 additions and 0 deletions

8
spacy/multi_words.py Normal file
View File

@ -0,0 +1,8 @@
class RegexMerger(object):
def __init__(self, regexes):
self.regexes = regexes
def __call__(self, tokens):
for tag, entity_type, regex in self.regexes:
for m in regex.finditer(unicode(tokens)):
tokens.merge(m.start(), m.end(), tag, m.group(), entity_type)