diff --git a/spacy/training/iob_utils.py b/spacy/training/iob_utils.py index 63deed3a5..03a502912 100644 --- a/spacy/training/iob_utils.py +++ b/spacy/training/iob_utils.py @@ -50,6 +50,10 @@ def _consume_ent(tags): return [start] + middle + [end] +def biluo_tags_from_doc(doc, missing="O"): + return doc_to_biluo_tags(doc, missing) + + def doc_to_biluo_tags(doc, missing="O"): return offsets_to_biluo_tags( doc, @@ -58,6 +62,10 @@ def doc_to_biluo_tags(doc, missing="O"): ) +def biluo_tags_from_offsets(doc, entities, missing="O"): + return offsets_to_biluo_tags(doc, entities, missing) + + def offsets_to_biluo_tags(doc, entities, missing="O"): """Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out scheme (BILUO). @@ -143,6 +151,10 @@ def offsets_to_biluo_tags(doc, entities, missing="O"): return biluo +def spans_from_biluo_tags(doc, tags): + return biluo_tags_to_spans(doc, tags) + + def biluo_tags_to_spans(doc, tags): """Encode per-token tags following the BILUO scheme into Span object, e.g. to overwrite the doc.ents. @@ -161,6 +173,10 @@ def biluo_tags_to_spans(doc, tags): return spans +def offsets_from_biluo_tags(doc, tags): + return biluo_tags_to_offsets(doc, tags) + + def biluo_tags_to_offsets(doc, tags): """Encode per-token tags following the BILUO scheme into entity offsets.