mirror of https://github.com/explosion/spaCy.git
80 lines
3.1 KiB
Plaintext
80 lines
3.1 KiB
Plaintext
//- ----------------------------------
|
|
//- 💫 DOCS > API > MATCHER
|
|
//- ----------------------------------
|
|
|
|
+section("matcher")
|
|
+h(2, "matcher", "https://github.com/" + SOCIAL.github + "/spaCy/blob/master/spacy/matcher.pyx")
|
|
| #[+tag class] Matcher
|
|
|
|
p A full example can be found #[a(href="https://github.com/" + SOCIAL.github + "blob/master/examples/matcher_example.py") here].
|
|
|
|
+table(["Usage", "Description"])
|
|
+row
|
|
+cell #[code.lang-python nlp(doc)]
|
|
+cell As part of annotation pipeline.
|
|
|
|
+row
|
|
+cell #[code.lang-python nlp.matcher(doc)]
|
|
+cell Explicit invocation.
|
|
|
|
+row
|
|
+cell #[code.lang-python nlp.matcher.add(u'FooCorp', u'ORG', {}, [[{u'ORTH': u'Foo'}]])]
|
|
+cell Add a pattern to match.
|
|
|
|
+section("matcher-init")
|
|
+h(3, "matcher-init") __init__(self, vocab, patterns)
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell vocab
|
|
+cell #[code.lang-python spacy.vocab.Vocab]
|
|
+cell Reference to the shared vocabulary object.
|
|
|
|
+row
|
|
+cell patterns
|
|
+cell #[code {entity_key: (etype, attrs, specs)}]
|
|
+cell.
|
|
Initial patterns to match. See #[code Matcher.add]
|
|
|
|
+section("matcher-add")
|
|
+h(3, "matcher-add") add(self, entity_key, etype, attrs, specs)
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell entity_key
|
|
+cell unicode or int
|
|
+cell Your arbitrary ID string (or its integer encoding)
|
|
+row
|
|
+cell etype
|
|
+cell unicode or int
|
|
+cell A pre-registered entity type, e.g. u'PERSON', u'ORG', etc.
|
|
+row
|
|
+cell attrs
|
|
+cell #[code dict]
|
|
+cell Placeholder for future support of entity attributes.
|
|
+row
|
|
+cell specs
|
|
+cell #[code [[{int: unicode}]]]
|
|
+cell A list of surface forms, where each surface form is defined as a list of token definitions, and each token definition is a dictionary mapping attribute IDs to attribute values.
|
|
|
|
+section("matcher-saveload")
|
|
+h(3, "matcher-saveload")
|
|
| Save and Load
|
|
|
|
+section("matcher-saveload-dump")
|
|
+h(4, "matcher-saveload-dump") dump(loc)
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell loc
|
|
+cell #[+a(link_unicode) unicode]
|
|
+cell Path to save the gazetteer.json file.
|
|
|
|
+section("matcher-saveload-load")
|
|
+h(4, "matcher-saveload-load") load(loc)
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell loc
|
|
+cell #[+a(link_unicode) unicode]
|
|
+cell.
|
|
Path to load the gazetteer.json file from.
|