mirror of https://github.com/explosion/spaCy.git
Merge pull request #935 from ericzhao28/master
Add option to use label=ent_type in doc.merge arguments (Bug fix for issue #862)
This commit is contained in:
commit
0fefdfcbda
|
@ -19,6 +19,15 @@ def test_spans_merge_tokens(en_tokenizer):
|
||||||
assert doc[0].text == 'Los Angeles'
|
assert doc[0].text == 'Los Angeles'
|
||||||
assert doc[0].head.text == 'start'
|
assert doc[0].head.text == 'start'
|
||||||
|
|
||||||
|
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||||
|
assert len(doc) == 4
|
||||||
|
assert doc[0].head.text == 'Angeles'
|
||||||
|
assert doc[1].head.text == 'start'
|
||||||
|
doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', label='GPE')
|
||||||
|
assert len(doc) == 3
|
||||||
|
assert doc[0].text == 'Los Angeles'
|
||||||
|
assert doc[0].head.text == 'start'
|
||||||
|
assert doc[0].ent_type_ == 'GPE'
|
||||||
|
|
||||||
def test_spans_merge_heads(en_tokenizer):
|
def test_spans_merge_heads(en_tokenizer):
|
||||||
text = "I found a pilates class near work."
|
text = "I found a pilates class near work."
|
||||||
|
@ -114,4 +123,4 @@ def test_spans_subtree_size_check(en_tokenizer):
|
||||||
sent1 = list(doc.sents)[0]
|
sent1 = list(doc.sents)[0]
|
||||||
init_len = len(list(sent1.root.subtree))
|
init_len = len(list(sent1.root.subtree))
|
||||||
doc[0:2].merge('none', 'none', 'none')
|
doc[0:2].merge('none', 'none', 'none')
|
||||||
assert len(list(sent1.root.subtree)) == init_len - 1
|
assert len(list(sent1.root.subtree)) == init_len - 1
|
|
@ -667,6 +667,13 @@ cdef class Doc:
|
||||||
attributes[TAG] = self.vocab.strings[tag]
|
attributes[TAG] = self.vocab.strings[tag]
|
||||||
attributes[LEMMA] = self.vocab.strings[lemma]
|
attributes[LEMMA] = self.vocab.strings[lemma]
|
||||||
attributes[ENT_TYPE] = self.vocab.strings[ent_type]
|
attributes[ENT_TYPE] = self.vocab.strings[ent_type]
|
||||||
|
elif not args:
|
||||||
|
if "label" in attributes and ENT_TYPE not in attributes:
|
||||||
|
if type(attributes["label"]) == int:
|
||||||
|
attributes[ENT_TYPE] = attributes["label"]
|
||||||
|
else:
|
||||||
|
attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]]
|
||||||
|
|
||||||
elif args:
|
elif args:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Doc.merge received %d non-keyword arguments. "
|
"Doc.merge received %d non-keyword arguments. "
|
||||||
|
|
Loading…
Reference in New Issue