mirror of https://github.com/explosion/spaCy.git
Merge branch 'master' of https://github.com/honnibal/spaCy
This commit is contained in:
commit
25ed7be8f8
|
@ -38,3 +38,15 @@ def test_left_right(EN):
|
||||||
for child in word.rights:
|
for child in word.rights:
|
||||||
assert child.head.i == word.i
|
assert child.head.i == word.i
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.models
|
||||||
|
def test_lemmas(EN):
|
||||||
|
orig = EN(u'The geese are flying')
|
||||||
|
result = Doc(orig.vocab).from_bytes(orig.to_bytes())
|
||||||
|
the, geese, are, flying = result
|
||||||
|
assert the.lemma_ == 'the'
|
||||||
|
assert geese.lemma_ == 'goose'
|
||||||
|
assert are.lemma_ == 'be'
|
||||||
|
assert flying.lemma_ == 'fly'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -398,7 +398,7 @@ cdef class Doc:
|
||||||
self.is_parsed = True
|
self.is_parsed = True
|
||||||
elif attr_id == TAG:
|
elif attr_id == TAG:
|
||||||
for i in range(length):
|
for i in range(length):
|
||||||
tokens[i].tag = values[i]
|
self.vocab.morphology.assign_tag(&tokens[i], values[i])
|
||||||
if not self.is_tagged and tokens[i].tag != 0:
|
if not self.is_tagged and tokens[i].tag != 0:
|
||||||
self.is_tagged = True
|
self.is_tagged = True
|
||||||
elif attr_id == POS:
|
elif attr_id == POS:
|
||||||
|
@ -413,6 +413,8 @@ cdef class Doc:
|
||||||
elif attr_id == ENT_TYPE:
|
elif attr_id == ENT_TYPE:
|
||||||
for i in range(length):
|
for i in range(length):
|
||||||
tokens[i].ent_type = values[i]
|
tokens[i].ent_type = values[i]
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown attribute ID: %d" % attr_id)
|
||||||
set_children_from_heads(self.data, self.length)
|
set_children_from_heads(self.data, self.length)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -469,8 +471,7 @@ cdef class Doc:
|
||||||
# Update fields
|
# Update fields
|
||||||
token.lex = lex
|
token.lex = lex
|
||||||
token.spacy = self.data[end-1].spacy
|
token.spacy = self.data[end-1].spacy
|
||||||
# What to do about morphology??
|
self.vocab.morphology.assign_tag(token, self.vocab.strings[tag])
|
||||||
# TODO: token.morph = ???
|
|
||||||
token.tag = self.vocab.strings[tag]
|
token.tag = self.vocab.strings[tag]
|
||||||
token.lemma = self.vocab.strings[lemma]
|
token.lemma = self.vocab.strings[lemma]
|
||||||
if ent_type == 'O':
|
if ent_type == 'O':
|
||||||
|
|
Loading…
Reference in New Issue