diff --git a/spacy/util.py b/spacy/util.py index 6028d85b5..bff00b585 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -315,7 +315,7 @@ def read_regex(path): def compile_prefix_regex(entries): - """Compile a list of prefix rules into a regex object. + """Compile a sequence of prefix rules into a regex object. entries (tuple): The prefix rules, e.g. spacy.lang.punctuation.TOKENIZER_PREFIXES. RETURNS (regex object): The regex object. to be used for Tokenizer.prefix_search. @@ -332,7 +332,7 @@ def compile_prefix_regex(entries): def compile_suffix_regex(entries): - """Compile a list of suffix rules into a regex object. + """Compile a sequence of suffix rules into a regex object. entries (tuple): The suffix rules, e.g. spacy.lang.punctuation.TOKENIZER_SUFFIXES. RETURNS (regex object): The regex object. to be used for Tokenizer.suffix_search. @@ -342,7 +342,7 @@ def compile_suffix_regex(entries): def compile_infix_regex(entries): - """Compile a list of infix rules into a regex object. + """Compile a sequence of infix rules into a regex object. entries (tuple): The infix rules, e.g. spacy.lang.punctuation.TOKENIZER_INFIXES. RETURNS (regex object): The regex object. to be used for Tokenizer.infix_finditer. diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md index a39482c21..ab780485f 100644 --- a/website/docs/usage/processing-pipelines.md +++ b/website/docs/usage/processing-pipelines.md @@ -29,10 +29,10 @@ components. spaCy then does the following: > > ```json > { -> "name": "example_model", > "lang": "en", +> "name": "core_web_sm", > "description": "Example model for spaCy", -> "pipeline": ["tagger", "parser"] +> "pipeline": ["tagger", "parser", "ner"] > } > ``` @@ -51,11 +51,11 @@ components. spaCy then does the following: So when you call this... ```python -nlp = spacy.load("en") +nlp = spacy.load("en_core_web_sm") ``` -... the model tells spaCy to use the language `"en"` and the pipeline -`["tagger", "parser", "ner"]`. spaCy will then initialize +... the model's `meta.json` tells spaCy to use the language `"en"` and the +pipeline `["tagger", "parser", "ner"]`. spaCy will then initialize `spacy.lang.en.English`, and create each pipeline component and add it to the processing pipeline. It'll then load in the model's data from its data directory and return the modified `Language` class for you to use as the `nlp` object.