Add assert_docs_equal util to compare two docs

This commit is contained in:
Ines Montani 2017-01-12 21:56:52 +01:00
parent eac3f700fb
commit 442237787c
1 changed files with 20 additions and 0 deletions

View File

@ -43,3 +43,23 @@ def apply_transition_sequence(parser, doc, sequence):
def get_cosine(vec1, vec2): def get_cosine(vec1, vec2):
"""Get cosine for two given vectors""" """Get cosine for two given vectors"""
return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2)) return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2))
def assert_docs_equal(doc1, doc2):
# tokens
assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ]
# tags
assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ]
assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ]
# parse
assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ]
assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ]
if doc1.is_parsed and doc2.is_parsed:
assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ]
# entities
assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ]
assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ]
assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ]