diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py index 4d18d216a..299b6bb52 100644 --- a/spacy/ml/models/entity_linker.py +++ b/spacy/ml/models/entity_linker.py @@ -71,11 +71,10 @@ def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callab cands.append((start_token, end_token)) candidates.append(ops.asarray2i(cands)) - candlens = ops.asarray1i([len(cands) for cands in candidates]) - candidates = ops.xp.concatenate(candidates) - outputs = Ragged(candidates, candlens) + lengths = model.ops.asarray1i([len(cands) for cands in candidates]) + out = Ragged(model.ops.flatten(candidates), lengths) # because this is just rearranging docs, the backprop does nothing - return outputs, lambda x: [] + return out, lambda x: [] @registry.misc("spacy.KBFromFile.v1") diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index 4d683acc5..99f164f15 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -9,6 +9,7 @@ from spacy.compat import pickle from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase from spacy.lang.en import English from spacy.ml import load_kb +from spacy.ml.models.entity_linker import build_span_maker from spacy.pipeline import EntityLinker from spacy.pipeline.legacy import EntityLinker_v1 from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL @@ -715,7 +716,11 @@ TRAIN_DATA = [ ("Russ Cochran was a member of University of Kentucky's golf team.", {"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}}, "entities": [(0, 12, "PERSON"), (43, 51, "LOC")], - "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}) + "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}), + # having a blank instance shouldn't break things + ("The weather is nice today.", + {"links": {}, "entities": [], + "sent_starts": [1, -1, 0, 0, 0, 0]}) ] GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"] # fmt: on @@ -1196,3 +1201,18 @@ def test_threshold(meet_threshold: bool, config: Dict[str, Any]): assert len(doc.ents) == 1 assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL + + +def test_span_maker_forward_with_empty(): + """The forward pass of the span maker may have a doc with no entities.""" + nlp = English() + doc1 = nlp("a b c") + ent = doc1[0:1] + ent.label_ = "X" + doc1.ents = [ent] + # no entities + doc2 = nlp("x y z") + + # just to get a model + span_maker = build_span_maker() + span_maker([doc1, doc2], False)