mirror of https://github.com/explosion/spaCy.git
Document offsets_from_biluo_tags
This commit is contained in:
parent
fb663f9b7d
commit
b078e276e6
|
@ -163,3 +163,41 @@ p
|
||||||
+cell
|
+cell
|
||||||
| Unicode strings, describing the
|
| Unicode strings, describing the
|
||||||
| #[+a("/api/annotation#biluo") BILUO] tags.
|
| #[+a("/api/annotation#biluo") BILUO] tags.
|
||||||
|
|
||||||
|
+h(3, "offsets_from_biluo_tags") gold.offsets_from_biluo_tags
|
||||||
|
|
||||||
|
p
|
||||||
|
| Encode per-token tags following the
|
||||||
|
| #[+a("/api/annotation#biluo") BILUO scheme] into entity offsets.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.gold import offsets_from_biluo_tags
|
||||||
|
|
||||||
|
doc = nlp('I like London.')
|
||||||
|
tags = ['O', 'O', 'U-LOC', 'O']
|
||||||
|
entities = offsets_from_biluo_tags(doc, tags)
|
||||||
|
assert entities == [(7, 13, 'LOC')]
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code doc]
|
||||||
|
+cell #[code Doc]
|
||||||
|
+cell The document that the BILUO tags refer to.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code entities]
|
||||||
|
+cell iterable
|
||||||
|
+cell
|
||||||
|
| A sequence of #[+a("/api/annotation#biluo") BILUO] tags with
|
||||||
|
| each tag describing one token. Each tag string will be of the
|
||||||
|
| form of either #[code ""], #[code "O"] or
|
||||||
|
| #[code "{action}-{label}"], where action is one of #[code "B"],
|
||||||
|
| #[code "I"], #[code "L"], #[code "U"].
|
||||||
|
|
||||||
|
+row("foot")
|
||||||
|
+cell returns
|
||||||
|
+cell list
|
||||||
|
+cell
|
||||||
|
| A sequence of #[code (start, end, label)] triples. #[code start]
|
||||||
|
| and #[code end] will be character-offset integers denoting the
|
||||||
|
| slice into the original string.
|
||||||
|
|
|
@ -21,6 +21,13 @@ p
|
||||||
| #[strong experiment on your data] to find a solution that works best
|
| #[strong experiment on your data] to find a solution that works best
|
||||||
| for you.
|
| for you.
|
||||||
|
|
||||||
|
+aside("Tip: Converting entity annotations", "💡")
|
||||||
|
| You can train the entity recognizer with entity offsets or
|
||||||
|
| annotations in the #[+a("/api/annotation#biluo") BILUO scheme]. The
|
||||||
|
| #[code spacy.gold] module also exposes
|
||||||
|
| #[+a("/api/goldparse#util") two helper functions] to convert offsets to
|
||||||
|
| BILUO tags, and BILUO tags to entity offsets.
|
||||||
|
|
||||||
+h(3, "example-train-ner") Updating the Named Entity Recognizer
|
+h(3, "example-train-ner") Updating the Named Entity Recognizer
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
Loading…
Reference in New Issue