diff --git a/spacy/language.py b/spacy/language.py index c3c49d331..ba244617e 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1410,7 +1410,9 @@ class Language: kwargs = component_cfg.get(name, {}) # Allow component_cfg to overwrite the top-level kwargs. kwargs.setdefault("batch_size", batch_size) - if hasattr(proc, "pipe"): + # non-trainable components may have a pipe() implementation that refers to dummy + # predict and set_annotations methods + if hasattr(proc, "pipe") and hasattr(proc, "is_trainable") and proc.is_trainable(): f = functools.partial(proc.pipe, **kwargs) else: # Apply the function, but yield the doc diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index cad6dbdbc..6ca586d05 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -1,4 +1,4 @@ -from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable +from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence from collections import defaultdict from pathlib import Path import srsly @@ -190,19 +190,18 @@ class EntityRuler(Pipe): get_examples: Callable[[], Iterable[Example]], *, nlp: Optional[Language] = None, - patterns_path: Optional[Path] = None + patterns: Optional[Sequence[PatternType]] = None, ): """Initialize the pipe for training. get_examples (Callable[[], Iterable[Example]]): Function that returns a representative sample of gold-standard Example objects. nlp (Language): The current nlp object the component is part of. - patterns_path: Path to serialized patterns. + patterns Optional[Iterable[PatternType]]: The list of patterns. DOCS: https://nightly.spacy.io/api/entityruler#initialize """ - if patterns_path: - patterns = srsly.read_jsonl(patterns_path) + if patterns: self.add_patterns(patterns) diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 2ad0acd3a..3b4406757 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -437,7 +437,9 @@ cdef class Parser(Pipe): for name, component in nlp.pipeline: if component is self: break - if hasattr(component, "pipe"): + # non-trainable components may have a pipe() implementation that refers to dummy + # predict and set_annotations methods + if hasattr(component, "pipe") and hasattr(component, "is_trainable") and component.is_trainable(): doc_sample = list(component.pipe(doc_sample, batch_size=8)) else: doc_sample = [component(doc) for doc in doc_sample] diff --git a/spacy/schemas.py b/spacy/schemas.py index 591b7e134..f4d306fd7 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -119,7 +119,7 @@ def validate_init_settings( if types don't match or required values are missing. func (Callable): The initialize method of a given component etc. - settings (Dict[str, Any]): The settings from the repsective [initialize] block. + settings (Dict[str, Any]): The settings from the respective [initialize] block. section (str): Initialize section, for error message. name (str): Name of the block in the section. exclude (Iterable[str]): Parameter names to exclude from schema. diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py index c967bcdcd..fedeb192f 100644 --- a/spacy/tests/pipeline/test_attributeruler.py +++ b/spacy/tests/pipeline/test_attributeruler.py @@ -121,7 +121,7 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts): assert doc.has_annotation("LEMMA") assert doc.has_annotation("MORPH") nlp.remove_pipe("attribute_ruler") - # initialize with patterns from asset + # initialize with patterns from misc registry nlp.config["initialize"]["components"]["attribute_ruler"] = { "patterns": {"@misc": "attribute_ruler_patterns"} } diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py index d70d0326e..96deab24b 100644 --- a/spacy/tests/pipeline/test_entity_ruler.py +++ b/spacy/tests/pipeline/test_entity_ruler.py @@ -1,4 +1,6 @@ import pytest + +from spacy import registry from spacy.tokens import Span from spacy.language import Language from spacy.pipeline import EntityRuler @@ -11,6 +13,7 @@ def nlp(): @pytest.fixture +@registry.misc("entity_ruler_patterns") def patterns(): return [ {"label": "HELLO", "pattern": "hello world"}, @@ -42,6 +45,29 @@ def test_entity_ruler_init(nlp, patterns): assert doc.ents[1].label_ == "BYE" +def test_entity_ruler_init_patterns(nlp, patterns): + # initialize with patterns + ruler = nlp.add_pipe("entity_ruler") + assert len(ruler.labels) == 0 + ruler.initialize(lambda: [], patterns=patterns) + assert len(ruler.labels) == 4 + doc = nlp("hello world bye bye") + assert doc.ents[0].label_ == "HELLO" + assert doc.ents[1].label_ == "BYE" + nlp.remove_pipe("entity_ruler") + # initialize with patterns from misc registry + nlp.config["initialize"]["components"]["entity_ruler"] = { + "patterns": {"@misc": "entity_ruler_patterns"} + } + ruler = nlp.add_pipe("entity_ruler") + assert len(ruler.labels) == 0 + nlp.initialize() + assert len(ruler.labels) == 4 + doc = nlp("hello world bye bye") + assert doc.ents[0].label_ == "HELLO" + assert doc.ents[1].label_ == "BYE" + + def test_entity_ruler_existing(nlp, patterns): ruler = nlp.add_pipe("entity_ruler") ruler.add_patterns(patterns) diff --git a/website/docs/api/entityruler.md b/website/docs/api/entityruler.md index 052047635..b8aab2f50 100644 --- a/website/docs/api/entityruler.md +++ b/website/docs/api/entityruler.md @@ -82,13 +82,16 @@ Initialize the component with patterns from a file. > > ```python > entity_ruler = nlp.add_pipe("entity_ruler") -> entity_ruler.initialize(lambda: [], nlp=nlp, patterns_path=patterns_path) +> entity_ruler.initialize(lambda: [], nlp=nlp, patterns=patterns) > ``` > > ```ini > ### config.cfg > [initialize.components.entity_ruler] -> patterns_path = "data/patterns/patterns.jsonl" +> +> [initialize.components.entity_ruler.patterns] +> @readers = "srsly.read_jsonl.v1" +> path = "corpus/entity_ruler_patterns.jsonl > ``` | Name | Description | @@ -96,7 +99,7 @@ Initialize the component with patterns from a file. | `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. Not used by the `EntityRuler`. ~~Callable[[], Iterable[Example]]~~ | | _keyword-only_ | | | `nlp` | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~ | -| `labels` | Path to the .json file holding the serialized patterns. ~~Path~~ | +| `patterns` | The list of patterns. Defaults to `None`. ~~Optional[Sequence[Dict[str, Union[str, List[Dict[str, Any]]]]]]~~ | ## EntityRuler.\_\len\_\_ {#len tag="method"}