diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 41d7029f0..58f88fc1a 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -230,14 +230,27 @@ cdef class Matcher: def add(self, key, on_match, *patterns): """Add a match-rule to the matcher. A match-rule consists of: an ID key, an on_match callback, and one or - more patterns. If the key exists, the patterns are appended to the - previous ones, and the previous on_match callback is replaced. The - `on_match` callback will receive the arguments `(matcher, doc, i, - matches)`. You can also set `on_match` to `None` to not perform any - actions. A pattern consists of one or more `token_specs`, where a - `token_spec` is a dictionary mapping attribute IDs to values. Token - descriptors can also include quantifiers. There are currently important - known problems with the quantifiers – see the docs. + more patterns. + + If the key exists, the patterns are appended to the previous ones, and + the previous on_match callback is replaced. The `on_match` callback will + receive the arguments `(matcher, doc, i, matches)`. You can also set + `on_match` to `None` to not perform any actions. + + A pattern consists of one or more `token_specs`, where a `token_spec` + is a dictionary mapping attribute IDs to values, and optionally a + quantifier operator under the key "op". The available quantifiers are: + + '!': Negate the pattern, by requiring it to match exactly 0 times. + '?': Make the pattern optional, by allowing it to match 0 or 1 times. + '+': Require the pattern to match 1 or more times. + '*': Allow the pattern to zero or more times. + + The + and * operators are usually interpretted "greedily", i.e. longer + matches are returned where possible. However, if you specify two '+' + and '*' patterns in a row and their matches overlap, the first + operator will behave non-greedily. This quirk in the semantics + makes the matcher more efficient, by avoiding the need for back-tracking. """ for pattern in patterns: if len(pattern) == 0: