spaCy/website/docs/api/language.jade

155 lines
3.7 KiB
Plaintext

//- 💫 DOCS > API > LANGUAGE
include ../../_includes/_mixins
p A text processing pipeline.
+h(2, "attributes") Attributes
+table(["Name", "Type", "Description"])
+row
+cell #[code vocab]
+cell #[code Vocab]
+cell A container for the lexical types.
+row
+cell #[code tokenizer]
+cell #[code Tokenizer]
+cell Find word boundaries and create #[code Doc] object.
+row
+cell #[code tagger]
+cell #[code Tagger]
+cell Annotate #[code Doc] objects with POS tags.
+row
+cell #[code parser]
+cell #[code DependencyParser]
+cell Annotate #[code Doc] objects with syntactic dependencies.
+row
+cell #[code entity]
+cell #[code EntityRecognizer]
+cell Annotate #[code Doc] objects with named entities.
+row
+cell #[code matcher]
+cell #[code Matcher]
+cell Rule-based sequence matcher.
+row
+cell #[code make_doc]
+cell #[code lambda text: Doc]
+cell Create a #[code Doc] object from unicode text.
+row
+cell #[code pipeline]
+cell -
+cell Sequence of annotation functions.
+h(2, "init") Language.__init__
+tag method
p Create or load the pipeline.
+table(["Name", "Type", "Description"])
+row
+cell #[code **overrides]
+cell -
+cell Keyword arguments indicating which defaults to override.
+footrow
+cell return
+cell #[code Language]
+cell The newly constructed object.
+h(2, "call") Language.__call__
+tag method
p Apply the pipeline to a single text.
+aside-code("Example").
from spacy.en import English
nlp = English()
doc = nlp('An example sentence. Another example sentence.')
doc[0].orth_, doc[0].head.tag_
# ('An', 'NN')
+table(["Name", "Type", "Description"])
+row
+cell #[code text]
+cell unicode
+cell The text to be processed.
+row
+cell #[code tag]
+cell bool
+cell Whether to apply the part-of-speech tagger.
+row
+cell #[code parse]
+cell bool
+cell Whether to apply the syntactic dependency parser.
+row
+cell #[code entity]
+cell bool
+cell Whether to apply the named entity recognizer.
+footrow
+cell return
+cell #[code Doc]
+cell A container for accessing the linguistic annotations.
+h(2, "pipe") Language.pipe
+tag method
p
| Process texts as a stream, and yield #[code Doc] objects in order.
| Supports GIL-free multi-threading.
+aside-code("Example").
texts = [u'One document.', u'...', u'Lots of documents']
for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
assert doc.is_parsed
+table(["Name", "Type", "Description"])
+row
+cell #[code texts]
+cell -
+cell A sequence of unicode objects.
+row
+cell #[code n_threads]
+cell int
+cell
| The number of worker threads to use. If #[code -1], OpenMP will
| decide how many to use at run time. Default is #[code 2].
+row
+cell #[code batch_size]
+cell int
+cell The number of texts to buffer.
+footrow
+cell yield
+cell #[code Doc]
+cell Containers for accessing the linguistic annotations.
+h(2, "save_to_directory") Language.save_to_directory
+tag method
p Save the #[code Vocab], #[code StringStore] and pipeline to a directory.
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell string or pathlib path
+cell Path to save the model.
+footrow
+cell return
+cell #[code None]
+cell -