diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index da3602b79..6b948e585 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -14,7 +14,6 @@ from thinc.neural.util import to_categorical from thinc.neural.util import get_array_module from spacy.kb import KnowledgeBase -from ..cli.pretrain import get_cossim_loss from .functions import merge_subtokens from ..tokens.doc cimport Doc from ..syntax.nn_parser cimport Parser @@ -1164,7 +1163,6 @@ class EntityLinker(Pipe): candidates = self.kb.get_candidates(mention) random.shuffle(candidates) - nr_neg = 0 for c in candidates: kb_id = c.entity_ entity_encoding = c.entity_vector @@ -1180,21 +1178,20 @@ class EntityLinker(Pipe): if kb_id == gold_kb: cats.append([1]) else: - nr_neg += 1 cats.append([0]) if len(entity_encodings) > 0: assert len(priors) == len(entity_encodings) == len(context_docs) == len(cats) == len(type_vectors) - context_encodings, bp_context = self.model.tok2vec.begin_update(context_docs, drop=drop) + cats = self.model.ops.asarray(cats, dtype="float32") entity_encodings = self.model.ops.asarray(entity_encodings, dtype="float32") + context_encodings, bp_context = self.model.tok2vec.begin_update(context_docs, drop=drop) mention_encodings = [list(context_encodings[i]) + list(entity_encodings[i]) + priors[i] + type_vectors[i] for i in range(len(entity_encodings))] pred, bp_mention = self.model.begin_update(self.model.ops.asarray(mention_encodings, dtype="float32"), drop=drop) - cats = self.model.ops.asarray(cats, dtype="float32") - loss, d_scores = self.get_loss(prediction=pred, golds=cats, docs=None) + loss, d_scores = self.get_loss(scores=pred, golds=cats, docs=docs) mention_gradient = bp_mention(d_scores, sgd=sgd) context_gradients = [list(x[0:self.cfg.get("context_width")]) for x in mention_gradient] @@ -1205,18 +1202,12 @@ class EntityLinker(Pipe): return loss return 0 - def get_loss(self, docs, golds, prediction): - d_scores = (prediction - golds) + def get_loss(self, docs, golds, scores): + d_scores = (scores - golds) loss = (d_scores ** 2).sum() loss = loss / len(golds) return loss, d_scores - def get_loss_old(self, docs, golds, scores): - # this loss function assumes we're only using positive examples - loss, gradients = get_cossim_loss(yh=scores, y=golds) - loss = loss / len(golds) - return loss, gradients - def __call__(self, doc): entities, kb_ids = self.predict([doc]) self.set_annotations([doc], entities, kb_ids)