mirror of https://github.com/explosion/spaCy.git
Merge pull request #6571 from adrianeboyd/bugfix/debug-data-missing-vectors
Fix alignment and vector checks in debug data
This commit is contained in:
commit
3f90bffa27
|
@ -504,13 +504,18 @@ def _compile_gold(
|
||||||
for eg in examples:
|
for eg in examples:
|
||||||
gold = eg.reference
|
gold = eg.reference
|
||||||
doc = eg.predicted
|
doc = eg.predicted
|
||||||
valid_words = [x for x in gold if x is not None]
|
valid_words = [x.text for x in gold]
|
||||||
data["words"].update(valid_words)
|
data["words"].update(valid_words)
|
||||||
data["n_words"] += len(valid_words)
|
data["n_words"] += len(valid_words)
|
||||||
data["n_misaligned_words"] += len(gold) - len(valid_words)
|
align = eg.alignment
|
||||||
|
for token in doc:
|
||||||
|
if token.orth_.isspace():
|
||||||
|
continue
|
||||||
|
if align.x2y.lengths[token.i] != 1:
|
||||||
|
data["n_misaligned_words"] += 1
|
||||||
data["texts"].add(doc.text)
|
data["texts"].add(doc.text)
|
||||||
if len(nlp.vocab.vectors):
|
if len(nlp.vocab.vectors):
|
||||||
for word in valid_words:
|
for word in [t.text for t in doc]:
|
||||||
if nlp.vocab.strings[word] not in nlp.vocab.vectors:
|
if nlp.vocab.strings[word] not in nlp.vocab.vectors:
|
||||||
data["words_missing_vectors"].update([word])
|
data["words_missing_vectors"].update([word])
|
||||||
if "ner" in factory_names:
|
if "ner" in factory_names:
|
||||||
|
|
Loading…
Reference in New Issue