mirror of https://github.com/explosion/spaCy.git
Prefix span getters
This commit is contained in:
parent
5afe6447cd
commit
23b7d9cfa3
|
@ -29,7 +29,7 @@ name = "{{ transformer["name"] }}"
|
|||
tokenizer_config = {"use_fast": true}
|
||||
|
||||
[components.transformer.model.get_spans]
|
||||
@span_getters = "strided_spans.v1"
|
||||
@span_getters = "spacy-transformers.strided_spans.v1"
|
||||
window = 128
|
||||
stride = 96
|
||||
|
||||
|
|
|
@ -320,7 +320,7 @@ for details and system requirements.
|
|||
> tokenizer_config = {"use_fast": true}
|
||||
>
|
||||
> [model.get_spans]
|
||||
> @span_getters = "strided_spans.v1"
|
||||
> @span_getters = "spacy-transformers.strided_spans.v1"
|
||||
> window = 128
|
||||
> stride = 96
|
||||
> ```
|
||||
|
|
|
@ -453,7 +453,7 @@ using the `@spacy.registry.span_getters` decorator.
|
|||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> @spacy.registry.span_getters("sent_spans.v1")
|
||||
> @spacy.registry.span_getters("custom_sent_spans")
|
||||
> def configure_get_sent_spans() -> Callable:
|
||||
> def get_sent_spans(docs: Iterable[Doc]) -> List[List[Span]]:
|
||||
> return [list(doc.sents) for doc in docs]
|
||||
|
@ -472,7 +472,7 @@ using the `@spacy.registry.span_getters` decorator.
|
|||
>
|
||||
> ```ini
|
||||
> [transformer.model.get_spans]
|
||||
> @span_getters = "doc_spans.v1"
|
||||
> @span_getters = "spacy-transformers.doc_spans.v1"
|
||||
> ```
|
||||
|
||||
Create a span getter that uses the whole document as its spans. This is the best
|
||||
|
@ -485,7 +485,7 @@ texts.
|
|||
>
|
||||
> ```ini
|
||||
> [transformer.model.get_spans]
|
||||
> @span_getters = "sent_spans.v1"
|
||||
> @span_getters = "spacy-transformers.sent_spans.v1"
|
||||
> ```
|
||||
|
||||
Create a span getter that uses sentence boundary markers to extract the spans.
|
||||
|
@ -500,7 +500,7 @@ more meaningful windows to attend over.
|
|||
>
|
||||
> ```ini
|
||||
> [transformer.model.get_spans]
|
||||
> @span_getters = "strided_spans.v1"
|
||||
> @span_getters = "spacy-transformers.strided_spans.v1"
|
||||
> window = 128
|
||||
> stride = 96
|
||||
> ```
|
||||
|
|
|
@ -331,7 +331,7 @@ name = "bert-base-cased"
|
|||
tokenizer_config = {"use_fast": true}
|
||||
|
||||
[components.transformer.model.get_spans]
|
||||
@span_getters = "doc_spans.v1"
|
||||
@span_getters = "spacy-transformers.doc_spans.v1"
|
||||
|
||||
[components.transformer.annotation_setter]
|
||||
@annotation_setters = "spacy-transformers.null_annotation_setter.v1"
|
||||
|
@ -369,8 +369,9 @@ all defaults.
|
|||
|
||||
To change any of the settings, you can edit the `config.cfg` and re-run the
|
||||
training. To change any of the functions, like the span getter, you can replace
|
||||
the name of the referenced function – e.g. `@span_getters = "sent_spans.v1"` to
|
||||
process sentences. You can also register your own functions using the
|
||||
the name of the referenced function – e.g.
|
||||
`@span_getters = "spacy-transformers.sent_spans.v1"` to process sentences. You
|
||||
can also register your own functions using the
|
||||
[`span_getters` registry](/api/top-level#registry). For instance, the following
|
||||
custom function returns [`Span`](/api/span) objects following sentence
|
||||
boundaries, unless a sentence succeeds a certain amount of tokens, in which case
|
||||
|
|
Loading…
Reference in New Issue