mirror of https://github.com/explosion/spaCy.git
configure_custom_sent_spans example
This commit is contained in:
parent
c68169f83f
commit
4d37ac3f33
|
@ -368,13 +368,17 @@ To change any of the settings, you can edit the `config.cfg` and re-run the
|
||||||
training. To change any of the functions, like the span getter, you can replace
|
training. To change any of the functions, like the span getter, you can replace
|
||||||
the name of the referenced function – e.g. `@span_getters = "sent_spans.v1"` to
|
the name of the referenced function – e.g. `@span_getters = "sent_spans.v1"` to
|
||||||
process sentences. You can also register your own functions using the
|
process sentences. You can also register your own functions using the
|
||||||
`span_getters` registry:
|
`span_getters` registry. For instance, the following custom function returns
|
||||||
|
`Span` objects following sentence boundaries, unless a sentence succeeds a
|
||||||
|
certain amount of tokens, in which case subsentences of at most `max_length`
|
||||||
|
tokens are returned.
|
||||||
|
|
||||||
> #### config.cfg
|
> #### config.cfg
|
||||||
>
|
>
|
||||||
> ```ini
|
> ```ini
|
||||||
> [components.transformer.model.get_spans]
|
> [components.transformer.model.get_spans]
|
||||||
> @span_getters = "custom_sent_spans"
|
> @span_getters = "custom_sent_spans"
|
||||||
|
> max_length = 25
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -382,12 +386,23 @@ process sentences. You can also register your own functions using the
|
||||||
import spacy_transformers
|
import spacy_transformers
|
||||||
|
|
||||||
@spacy_transformers.registry.span_getters("custom_sent_spans")
|
@spacy_transformers.registry.span_getters("custom_sent_spans")
|
||||||
def configure_custom_sent_spans():
|
def configure_custom_sent_spans(max_length: int):
|
||||||
# TODO: write custom example
|
def get_custom_sent_spans(docs):
|
||||||
def get_sent_spans(docs):
|
spans = []
|
||||||
return [list(doc.sents) for doc in docs]
|
for doc in docs:
|
||||||
|
spans.append([])
|
||||||
|
for sent in doc.sents:
|
||||||
|
start = 0
|
||||||
|
end = max_length
|
||||||
|
while end <= len(sent):
|
||||||
|
spans[-1].append(sent[start:end])
|
||||||
|
start += max_length
|
||||||
|
end += max_length
|
||||||
|
if start < len(sent):
|
||||||
|
spans[-1].append(sent[start : len(sent)])
|
||||||
|
return spans
|
||||||
|
|
||||||
return get_sent_spans
|
return get_custom_sent_spans
|
||||||
```
|
```
|
||||||
|
|
||||||
To resolve the config during training, spaCy needs to know about your custom
|
To resolve the config during training, spaCy needs to know about your custom
|
||||||
|
|
Loading…
Reference in New Issue