mirror of https://github.com/explosion/spaCy.git
Replace pytokenizations with internal alignment (#6293)
* Replace pytokenizations with internal alignment Replace pytokenizations with internal alignment algorithm that is restricted to only allow differences in whitespace and capitalization. * Rename `spacy.training.align` to `spacy.training.alignment` to contain the `Alignment` dataclass * Implement `get_alignments` in `spacy.training.align` * Refactor trailing whitespace handling * Remove unnecessary exception for empty docs Allow a non-empty whitespace-only doc to be aligned with an empty doc * Remove empty docs exceptions completely
This commit is contained in:
parent
a4b32b9552
commit
1c4df8fd09
|
@ -8,7 +8,6 @@ requires = [
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
"thinc>=8.0.0rc0,<8.1.0",
|
"thinc>=8.0.0rc0,<8.1.0",
|
||||||
"blis>=0.4.0,<0.8.0",
|
"blis>=0.4.0,<0.8.0",
|
||||||
"pytokenizations",
|
|
||||||
"pathy"
|
"pathy"
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
|
@ -15,7 +15,6 @@ numpy>=1.15.0
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
tqdm>=4.38.0,<5.0.0
|
tqdm>=4.38.0,<5.0.0
|
||||||
pydantic>=1.5.0,<1.7.0
|
pydantic>=1.5.0,<1.7.0
|
||||||
pytokenizations
|
|
||||||
# Official Python utilities
|
# Official Python utilities
|
||||||
setuptools
|
setuptools
|
||||||
packaging>=20.0
|
packaging>=20.0
|
||||||
|
|
|
@ -52,7 +52,6 @@ install_requires =
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
pydantic>=1.5.0,<1.7.0
|
pydantic>=1.5.0,<1.7.0
|
||||||
pytokenizations
|
|
||||||
jinja2
|
jinja2
|
||||||
# Official Python utilities
|
# Official Python utilities
|
||||||
setuptools
|
setuptools
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -49,6 +49,7 @@ MOD_NAMES = [
|
||||||
"spacy.pipeline._parser_internals.stateclass",
|
"spacy.pipeline._parser_internals.stateclass",
|
||||||
"spacy.pipeline._parser_internals.transition_system",
|
"spacy.pipeline._parser_internals.transition_system",
|
||||||
"spacy.tokenizer",
|
"spacy.tokenizer",
|
||||||
|
"spacy.training.align",
|
||||||
"spacy.training.gold_io",
|
"spacy.training.gold_io",
|
||||||
"spacy.tokens.doc",
|
"spacy.tokens.doc",
|
||||||
"spacy.tokens.span",
|
"spacy.tokens.span",
|
||||||
|
|
|
@ -564,7 +564,10 @@ class Errors:
|
||||||
"a string value from {expected} but got: '{arg}'")
|
"a string value from {expected} but got: '{arg}'")
|
||||||
E948 = ("`Matcher.add` received invalid 'patterns' argument: expected "
|
E948 = ("`Matcher.add` received invalid 'patterns' argument: expected "
|
||||||
"a list, but got: {arg_type}")
|
"a list, but got: {arg_type}")
|
||||||
E949 = ("Can only create an alignment when the texts are the same.")
|
E949 = ("Unable to align tokens for the predicted and reference docs. It "
|
||||||
|
"is only possible to align the docs when both texts are the same "
|
||||||
|
"except for whitespace and capitalization. The predicted tokens "
|
||||||
|
"start with: {x}. The reference tokens start with: {y}.")
|
||||||
E952 = ("The section '{name}' is not a valid section in the provided config.")
|
E952 = ("The section '{name}' is not a valid section in the provided config.")
|
||||||
E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
|
E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
|
||||||
E954 = ("The Tok2Vec listener did not receive any valid input from an upstream "
|
E954 = ("The Tok2Vec listener did not receive any valid input from an upstream "
|
||||||
|
|
|
@ -2,6 +2,7 @@ import numpy
|
||||||
from spacy.training import offsets_to_biluo_tags, biluo_tags_to_offsets, Alignment
|
from spacy.training import offsets_to_biluo_tags, biluo_tags_to_offsets, Alignment
|
||||||
from spacy.training import biluo_tags_to_spans, iob_to_biluo
|
from spacy.training import biluo_tags_to_spans, iob_to_biluo
|
||||||
from spacy.training import Corpus, docs_to_json, Example
|
from spacy.training import Corpus, docs_to_json, Example
|
||||||
|
from spacy.training.align import get_alignments
|
||||||
from spacy.training.converters import json_to_docs
|
from spacy.training.converters import json_to_docs
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.tokens import Doc, DocBin
|
from spacy.tokens import Doc, DocBin
|
||||||
|
@ -492,36 +493,35 @@ def test_roundtrip_docs_to_docbin(doc):
|
||||||
assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
|
assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip("Outdated")
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"tokens_a,tokens_b,expected",
|
"tokens_a,tokens_b,expected",
|
||||||
[
|
[
|
||||||
(["a", "b", "c"], ["ab", "c"], (3, [-1, -1, 1], [-1, 2], {0: 0, 1: 0}, {})),
|
(["a", "b", "c"], ["ab", "c"], ([[0], [0], [1]], [[0, 1], [2]])),
|
||||||
(
|
(
|
||||||
["a", "b", '"', "c"],
|
["a", "b", '"', "c"],
|
||||||
['ab"', "c"],
|
['ab"', "c"],
|
||||||
(4, [-1, -1, -1, 1], [-1, 3], {0: 0, 1: 0, 2: 0}, {}),
|
([[0], [0], [0], [1]], [[0, 1, 2], [3]]),
|
||||||
),
|
),
|
||||||
(["a", "bc"], ["ab", "c"], (4, [-1, -1], [-1, -1], {0: 0}, {1: 1})),
|
(["a", "bc"], ["ab", "c"], ([[0], [0, 1]], [[0, 1], [1]])),
|
||||||
(
|
(
|
||||||
["ab", "c", "d"],
|
["ab", "c", "d"],
|
||||||
["a", "b", "cd"],
|
["a", "b", "cd"],
|
||||||
(6, [-1, -1, -1], [-1, -1, -1], {1: 2, 2: 2}, {0: 0, 1: 0}),
|
([[0, 1], [2], [2]], [[0], [0], [1, 2]]),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
["a", "b", "cd"],
|
["a", "b", "cd"],
|
||||||
["a", "b", "c", "d"],
|
["a", "b", "c", "d"],
|
||||||
(3, [0, 1, -1], [0, 1, -1, -1], {}, {2: 2, 3: 2}),
|
([[0], [1], [2, 3]], [[0], [1], [2], [2]]),
|
||||||
),
|
),
|
||||||
([" ", "a"], ["a"], (1, [-1, 0], [1], {}, {})),
|
([" ", "a"], ["a"], ([[], [0]], [[1]])),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_align(tokens_a, tokens_b, expected): # noqa
|
def test_align(tokens_a, tokens_b, expected): # noqa
|
||||||
cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_a, tokens_b) # noqa
|
a2b, b2a = get_alignments(tokens_a, tokens_b)
|
||||||
assert (cost, list(a2b), list(b2a), a2b_multi, b2a_multi) == expected # noqa
|
assert (a2b, b2a) == expected # noqa
|
||||||
# check symmetry
|
# check symmetry
|
||||||
cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a) # noqa
|
a2b, b2a = get_alignments(tokens_b, tokens_a) # noqa
|
||||||
assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected # noqa
|
assert (b2a, a2b) == expected # noqa
|
||||||
|
|
||||||
|
|
||||||
def test_goldparse_startswith_space(en_tokenizer):
|
def test_goldparse_startswith_space(en_tokenizer):
|
||||||
|
@ -539,6 +539,21 @@ def test_goldparse_startswith_space(en_tokenizer):
|
||||||
assert example.get_aligned("DEP", as_string=True) == [None, "ROOT"]
|
assert example.get_aligned("DEP", as_string=True) == [None, "ROOT"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_goldparse_endswith_space(en_tokenizer):
|
||||||
|
text = "a\n"
|
||||||
|
doc = en_tokenizer(text)
|
||||||
|
gold_words = ["a"]
|
||||||
|
entities = ["U-DATE"]
|
||||||
|
deps = ["ROOT"]
|
||||||
|
heads = [0]
|
||||||
|
example = Example.from_dict(
|
||||||
|
doc, {"words": gold_words, "entities": entities, "deps": deps, "heads": heads}
|
||||||
|
)
|
||||||
|
ner_tags = example.get_aligned_ner()
|
||||||
|
assert ner_tags == ["U-DATE", "O"]
|
||||||
|
assert example.get_aligned("DEP", as_string=True) == ["ROOT", None]
|
||||||
|
|
||||||
|
|
||||||
def test_gold_constructor():
|
def test_gold_constructor():
|
||||||
"""Test that the Example constructor works fine"""
|
"""Test that the Example constructor works fine"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -676,6 +691,87 @@ def test_alignment_different_texts():
|
||||||
Alignment.from_strings(other_tokens, spacy_tokens)
|
Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def test_alignment_spaces(en_vocab):
|
||||||
|
# single leading whitespace
|
||||||
|
other_tokens = [" ", "i listened to", "obama", "'", "s", "podcasts", "."]
|
||||||
|
spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [0, 3, 1, 1, 1, 1, 1]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5]
|
||||||
|
assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2,]
|
||||||
|
assert list(align.y2x.dataXd) == [1, 1, 1, 2, 3, 4, 5, 6]
|
||||||
|
|
||||||
|
# multiple leading whitespace tokens
|
||||||
|
other_tokens = [" ", " ", "i listened to", "obama", "'", "s", "podcasts", "."]
|
||||||
|
spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [0, 0, 3, 1, 1, 1, 1, 1]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5]
|
||||||
|
assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2,]
|
||||||
|
assert list(align.y2x.dataXd) == [2, 2, 2, 3, 4, 5, 6, 7]
|
||||||
|
|
||||||
|
# both with leading whitespace, not identical
|
||||||
|
other_tokens = [" ", " ", "i listened to", "obama", "'", "s", "podcasts", "."]
|
||||||
|
spacy_tokens = [" ", "i", "listened", "to", "obama", "'s", "podcasts."]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [1, 0, 3, 1, 1, 1, 1, 1]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 5, 5, 6, 6]
|
||||||
|
assert list(align.y2x.lengths) == [1, 1, 1, 1, 1, 2, 2]
|
||||||
|
assert list(align.y2x.dataXd) == [0, 2, 2, 2, 3, 4, 5, 6, 7]
|
||||||
|
|
||||||
|
# same leading whitespace, different tokenization
|
||||||
|
other_tokens = [" ", " ", "i listened to", "obama", "'", "s", "podcasts", "."]
|
||||||
|
spacy_tokens = [" ", "i", "listened", "to", "obama", "'s", "podcasts."]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [1, 1, 3, 1, 1, 1, 1, 1]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 0, 1, 2, 3, 4, 5, 5, 6, 6]
|
||||||
|
assert list(align.y2x.lengths) == [2, 1, 1, 1, 1, 2, 2]
|
||||||
|
assert list(align.y2x.dataXd) == [0, 1, 2, 2, 2, 3, 4, 5, 6, 7]
|
||||||
|
|
||||||
|
# only one with trailing whitespace
|
||||||
|
other_tokens = ["i listened to", "obama", "'", "s", "podcasts", ".", " "]
|
||||||
|
spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1, 0]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5]
|
||||||
|
assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
|
||||||
|
assert list(align.y2x.dataXd) == [0, 0, 0, 1, 2, 3, 4, 5]
|
||||||
|
|
||||||
|
# different trailing whitespace
|
||||||
|
other_tokens = ["i listened to", "obama", "'", "s", "podcasts", ".", " ", " "]
|
||||||
|
spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts.", " "]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1, 1, 0]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5, 6]
|
||||||
|
assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2, 1]
|
||||||
|
assert list(align.y2x.dataXd) == [0, 0, 0, 1, 2, 3, 4, 5, 6]
|
||||||
|
|
||||||
|
# same trailing whitespace, different tokenization
|
||||||
|
other_tokens = ["i listened to", "obama", "'", "s", "podcasts", ".", " ", " "]
|
||||||
|
spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts.", " "]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1, 1, 1]
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5, 6, 6]
|
||||||
|
assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2, 2]
|
||||||
|
assert list(align.y2x.dataXd) == [0, 0, 0, 1, 2, 3, 4, 5, 6, 7]
|
||||||
|
|
||||||
|
# differing whitespace is allowed
|
||||||
|
other_tokens = ["a", " \n ", "b", "c"]
|
||||||
|
spacy_tokens = ["a", "b", " ", "c"]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
assert list(align.x2y.dataXd) == [0, 1, 3]
|
||||||
|
assert list(align.y2x.dataXd) == [0, 2, 3]
|
||||||
|
|
||||||
|
# other differences in whitespace are allowed
|
||||||
|
other_tokens = [" ", "a"]
|
||||||
|
spacy_tokens = [" ", "a", " "]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
|
||||||
|
other_tokens = ["a", " "]
|
||||||
|
spacy_tokens = ["a", " "]
|
||||||
|
align = Alignment.from_strings(other_tokens, spacy_tokens)
|
||||||
|
|
||||||
|
|
||||||
def test_retokenized_docs(doc):
|
def test_retokenized_docs(doc):
|
||||||
a = doc.to_array(["TAG"])
|
a = doc.to_array(["TAG"])
|
||||||
doc1 = Doc(doc.vocab, words=[t.text for t in doc]).from_array(["TAG"], a)
|
doc1 = Doc(doc.vocab, words=[t.text for t in doc]).from_array(["TAG"], a)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from .corpus import Corpus # noqa: F401
|
from .corpus import Corpus # noqa: F401
|
||||||
from .example import Example, validate_examples, validate_get_examples # noqa: F401
|
from .example import Example, validate_examples, validate_get_examples # noqa: F401
|
||||||
from .align import Alignment # noqa: F401
|
from .alignment import Alignment # noqa: F401
|
||||||
from .augment import dont_augment, orth_variants_augmenter # noqa: F401
|
from .augment import dont_augment, orth_variants_augmenter # noqa: F401
|
||||||
from .iob_utils import iob_to_biluo, biluo_to_iob # noqa: F401
|
from .iob_utils import iob_to_biluo, biluo_to_iob # noqa: F401
|
||||||
from .iob_utils import offsets_to_biluo_tags, biluo_tags_to_offsets # noqa: F401
|
from .iob_utils import offsets_to_biluo_tags, biluo_tags_to_offsets # noqa: F401
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
from typing import List, Tuple
|
||||||
|
from itertools import chain
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..errors import Errors
|
||||||
|
|
||||||
|
|
||||||
|
def get_alignments(A: List[str], B: List[str]) -> Tuple[List[List[int]], List[List[int]]]:
|
||||||
|
# Create character-to-token mappings
|
||||||
|
char_to_token_a = tuple(chain(*((i,) * len(x) for i, x in enumerate(A))))
|
||||||
|
char_to_token_b = tuple(chain(*((i,) * len(x) for i, x in enumerate(B))))
|
||||||
|
str_a = "".join(A).lower()
|
||||||
|
str_b = "".join(B).lower()
|
||||||
|
cdef int len_str_a = len(str_a)
|
||||||
|
cdef int len_str_b = len(str_b)
|
||||||
|
# Check that the two texts only differ in whitespace and capitalization
|
||||||
|
if re.sub(r"\s+", "", str_a) != re.sub(r"\s+", "", str_b) or \
|
||||||
|
len_str_a != len(char_to_token_a) or \
|
||||||
|
len_str_b != len(char_to_token_b):
|
||||||
|
raise ValueError(Errors.E949.format(x=str(A[:10]), y=str(B[:10])))
|
||||||
|
cdef int char_idx_a = 0
|
||||||
|
cdef int char_idx_b = 0
|
||||||
|
cdef int token_idx_a = 0
|
||||||
|
cdef int token_idx_b = 0
|
||||||
|
cdef int prev_token_idx_a = -1
|
||||||
|
cdef int prev_token_idx_b = -1
|
||||||
|
a2b = []
|
||||||
|
b2a = []
|
||||||
|
while char_idx_a < len_str_a and char_idx_b < len_str_b:
|
||||||
|
# Find the current token position from the character position
|
||||||
|
token_idx_a = char_to_token_a[char_idx_a]
|
||||||
|
token_idx_b = char_to_token_b[char_idx_b]
|
||||||
|
# Add a set for the next token if a token boundary has been crossed
|
||||||
|
if prev_token_idx_a != token_idx_a:
|
||||||
|
a2b.append(set())
|
||||||
|
if prev_token_idx_b != token_idx_b:
|
||||||
|
b2a.append(set())
|
||||||
|
# Process the alignment at the current position
|
||||||
|
if A[token_idx_a] == B[token_idx_b]:
|
||||||
|
# Current tokens are identical
|
||||||
|
a2b[-1].add(token_idx_b)
|
||||||
|
b2a[-1].add(token_idx_a)
|
||||||
|
char_idx_a += len(A[token_idx_a])
|
||||||
|
char_idx_b += len(B[token_idx_b])
|
||||||
|
elif str_a[char_idx_a] == str_b[char_idx_b]:
|
||||||
|
# Current chars are identical
|
||||||
|
a2b[-1].add(token_idx_b)
|
||||||
|
b2a[-1].add(token_idx_a)
|
||||||
|
char_idx_a += 1
|
||||||
|
char_idx_b += 1
|
||||||
|
elif str_a[char_idx_a].isspace():
|
||||||
|
# Skip unaligned whitespace char in A
|
||||||
|
char_idx_a += 1
|
||||||
|
elif str_b[char_idx_b].isspace():
|
||||||
|
# Skip unaligned whitespace char in B
|
||||||
|
char_idx_b += 1
|
||||||
|
else:
|
||||||
|
# This should never happen
|
||||||
|
raise ValueError(Errors.E949.format(x=str(A[:10]), y=str(B[:10])))
|
||||||
|
prev_token_idx_a = token_idx_a
|
||||||
|
prev_token_idx_b = token_idx_b
|
||||||
|
# Process unaligned trailing whitespace
|
||||||
|
a2b.extend([set()] * len(set(char_to_token_a[char_idx_a:])))
|
||||||
|
b2a.extend([set()] * len(set(char_to_token_b[char_idx_b:])))
|
||||||
|
# Return values as sorted lists per token position
|
||||||
|
return [sorted(x) for x in a2b], [sorted(x) for x in b2a]
|
|
@ -2,9 +2,8 @@ from typing import List
|
||||||
import numpy
|
import numpy
|
||||||
from thinc.types import Ragged
|
from thinc.types import Ragged
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import tokenizations
|
|
||||||
|
|
||||||
from ..errors import Errors
|
from .align import get_alignments
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -20,9 +19,7 @@ class Alignment:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_strings(cls, A: List[str], B: List[str]) -> "Alignment":
|
def from_strings(cls, A: List[str], B: List[str]) -> "Alignment":
|
||||||
if "".join(A).replace(" ", "").lower() != "".join(B).replace(" ", "").lower():
|
x2y, y2x = get_alignments(A, B)
|
||||||
raise ValueError(Errors.E949)
|
|
||||||
x2y, y2x = tokenizations.get_alignments(A, B)
|
|
||||||
return Alignment.from_indices(x2y=x2y, y2x=y2x)
|
return Alignment.from_indices(x2y=x2y, y2x=y2x)
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from ..tokens.doc cimport Doc
|
||||||
from ..tokens.span cimport Span
|
from ..tokens.span cimport Span
|
||||||
from ..tokens.span import Span
|
from ..tokens.span import Span
|
||||||
from ..attrs import IDS
|
from ..attrs import IDS
|
||||||
from .align import Alignment
|
from .alignment import Alignment
|
||||||
from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
|
from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
|
||||||
from .iob_utils import biluo_tags_to_spans
|
from .iob_utils import biluo_tags_to_spans
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors, Warnings
|
||||||
|
|
Loading…
Reference in New Issue