mirror of https://github.com/explosion/spaCy.git
Add assert_docs_equal util to compare two docs
This commit is contained in:
parent
eac3f700fb
commit
442237787c
|
@ -43,3 +43,23 @@ def apply_transition_sequence(parser, doc, sequence):
|
||||||
def get_cosine(vec1, vec2):
|
def get_cosine(vec1, vec2):
|
||||||
"""Get cosine for two given vectors"""
|
"""Get cosine for two given vectors"""
|
||||||
return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2))
|
return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2))
|
||||||
|
|
||||||
|
|
||||||
|
def assert_docs_equal(doc1, doc2):
|
||||||
|
# tokens
|
||||||
|
assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ]
|
||||||
|
|
||||||
|
# tags
|
||||||
|
assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ]
|
||||||
|
assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ]
|
||||||
|
|
||||||
|
# parse
|
||||||
|
assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ]
|
||||||
|
assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ]
|
||||||
|
if doc1.is_parsed and doc2.is_parsed:
|
||||||
|
assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ]
|
||||||
|
|
||||||
|
# entities
|
||||||
|
assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ]
|
||||||
|
assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ]
|
||||||
|
assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ]
|
||||||
|
|
Loading…
Reference in New Issue