mirror of https://github.com/explosion/spaCy.git
read entity_ruler patterns with srsly.read_jsonl.v1
This commit is contained in:
parent
193e0d5a98
commit
ff9ac39c88
|
@ -1410,7 +1410,9 @@ class Language:
|
|||
kwargs = component_cfg.get(name, {})
|
||||
# Allow component_cfg to overwrite the top-level kwargs.
|
||||
kwargs.setdefault("batch_size", batch_size)
|
||||
if hasattr(proc, "pipe"):
|
||||
# non-trainable components may have a pipe() implementation that refers to dummy
|
||||
# predict and set_annotations methods
|
||||
if hasattr(proc, "pipe") and hasattr(proc, "is_trainable") and proc.is_trainable():
|
||||
f = functools.partial(proc.pipe, **kwargs)
|
||||
else:
|
||||
# Apply the function, but yield the doc
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable
|
||||
from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import srsly
|
||||
|
@ -190,19 +190,18 @@ class EntityRuler(Pipe):
|
|||
get_examples: Callable[[], Iterable[Example]],
|
||||
*,
|
||||
nlp: Optional[Language] = None,
|
||||
patterns_path: Optional[Path] = None
|
||||
patterns: Optional[Sequence[PatternType]] = None,
|
||||
):
|
||||
"""Initialize the pipe for training.
|
||||
|
||||
get_examples (Callable[[], Iterable[Example]]): Function that
|
||||
returns a representative sample of gold-standard Example objects.
|
||||
nlp (Language): The current nlp object the component is part of.
|
||||
patterns_path: Path to serialized patterns.
|
||||
patterns Optional[Iterable[PatternType]]: The list of patterns.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#initialize
|
||||
"""
|
||||
if patterns_path:
|
||||
patterns = srsly.read_jsonl(patterns_path)
|
||||
if patterns:
|
||||
self.add_patterns(patterns)
|
||||
|
||||
|
||||
|
|
|
@ -437,7 +437,9 @@ cdef class Parser(Pipe):
|
|||
for name, component in nlp.pipeline:
|
||||
if component is self:
|
||||
break
|
||||
if hasattr(component, "pipe"):
|
||||
# non-trainable components may have a pipe() implementation that refers to dummy
|
||||
# predict and set_annotations methods
|
||||
if hasattr(component, "pipe") and hasattr(component, "is_trainable") and component.is_trainable():
|
||||
doc_sample = list(component.pipe(doc_sample, batch_size=8))
|
||||
else:
|
||||
doc_sample = [component(doc) for doc in doc_sample]
|
||||
|
|
|
@ -119,7 +119,7 @@ def validate_init_settings(
|
|||
if types don't match or required values are missing.
|
||||
|
||||
func (Callable): The initialize method of a given component etc.
|
||||
settings (Dict[str, Any]): The settings from the repsective [initialize] block.
|
||||
settings (Dict[str, Any]): The settings from the respective [initialize] block.
|
||||
section (str): Initialize section, for error message.
|
||||
name (str): Name of the block in the section.
|
||||
exclude (Iterable[str]): Parameter names to exclude from schema.
|
||||
|
|
|
@ -121,7 +121,7 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
|
|||
assert doc.has_annotation("LEMMA")
|
||||
assert doc.has_annotation("MORPH")
|
||||
nlp.remove_pipe("attribute_ruler")
|
||||
# initialize with patterns from asset
|
||||
# initialize with patterns from misc registry
|
||||
nlp.config["initialize"]["components"]["attribute_ruler"] = {
|
||||
"patterns": {"@misc": "attribute_ruler_patterns"}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import pytest
|
||||
|
||||
from spacy import registry
|
||||
from spacy.tokens import Span
|
||||
from spacy.language import Language
|
||||
from spacy.pipeline import EntityRuler
|
||||
|
@ -11,6 +13,7 @@ def nlp():
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
@registry.misc("entity_ruler_patterns")
|
||||
def patterns():
|
||||
return [
|
||||
{"label": "HELLO", "pattern": "hello world"},
|
||||
|
@ -42,6 +45,29 @@ def test_entity_ruler_init(nlp, patterns):
|
|||
assert doc.ents[1].label_ == "BYE"
|
||||
|
||||
|
||||
def test_entity_ruler_init_patterns(nlp, patterns):
|
||||
# initialize with patterns
|
||||
ruler = nlp.add_pipe("entity_ruler")
|
||||
assert len(ruler.labels) == 0
|
||||
ruler.initialize(lambda: [], patterns=patterns)
|
||||
assert len(ruler.labels) == 4
|
||||
doc = nlp("hello world bye bye")
|
||||
assert doc.ents[0].label_ == "HELLO"
|
||||
assert doc.ents[1].label_ == "BYE"
|
||||
nlp.remove_pipe("entity_ruler")
|
||||
# initialize with patterns from misc registry
|
||||
nlp.config["initialize"]["components"]["entity_ruler"] = {
|
||||
"patterns": {"@misc": "entity_ruler_patterns"}
|
||||
}
|
||||
ruler = nlp.add_pipe("entity_ruler")
|
||||
assert len(ruler.labels) == 0
|
||||
nlp.initialize()
|
||||
assert len(ruler.labels) == 4
|
||||
doc = nlp("hello world bye bye")
|
||||
assert doc.ents[0].label_ == "HELLO"
|
||||
assert doc.ents[1].label_ == "BYE"
|
||||
|
||||
|
||||
def test_entity_ruler_existing(nlp, patterns):
|
||||
ruler = nlp.add_pipe("entity_ruler")
|
||||
ruler.add_patterns(patterns)
|
||||
|
|
|
@ -82,13 +82,16 @@ Initialize the component with patterns from a file.
|
|||
>
|
||||
> ```python
|
||||
> entity_ruler = nlp.add_pipe("entity_ruler")
|
||||
> entity_ruler.initialize(lambda: [], nlp=nlp, patterns_path=patterns_path)
|
||||
> entity_ruler.initialize(lambda: [], nlp=nlp, patterns=patterns)
|
||||
> ```
|
||||
>
|
||||
> ```ini
|
||||
> ### config.cfg
|
||||
> [initialize.components.entity_ruler]
|
||||
> patterns_path = "data/patterns/patterns.jsonl"
|
||||
>
|
||||
> [initialize.components.entity_ruler.patterns]
|
||||
> @readers = "srsly.read_jsonl.v1"
|
||||
> path = "corpus/entity_ruler_patterns.jsonl
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
|
@ -96,7 +99,7 @@ Initialize the component with patterns from a file.
|
|||
| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. Not used by the `EntityRuler`. ~~Callable[[], Iterable[Example]]~~ |
|
||||
| _keyword-only_ | |
|
||||
| `nlp` | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~ |
|
||||
| `labels` | Path to the .json file holding the serialized patterns. ~~Path~~ |
|
||||
| `patterns` | The list of patterns. Defaults to `None`. ~~Optional[Sequence[Dict[str, Union[str, List[Dict[str, Any]]]]]]~~ |
|
||||
|
||||
## EntityRuler.\_\len\_\_ {#len tag="method"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue