mirror of https://github.com/explosion/spaCy.git
* Fix issue #37: missing check_flag attribute from Token class
This commit is contained in:
parent
5032f2a5c7
commit
0962ffc095
|
@ -89,3 +89,5 @@ cdef class Token:
|
|||
return self
|
||||
|
||||
cdef int take_ownership_of_c_data(self) except -1
|
||||
|
||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1
|
||||
|
|
|
@ -9,6 +9,7 @@ from .typedefs cimport LEMMA
|
|||
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||
from .typedefs cimport POS, LEMMA
|
||||
from .parts_of_speech import UNIV_POS_NAMES
|
||||
from .lexeme cimport check_flag
|
||||
|
||||
from unidecode import unidecode
|
||||
|
||||
|
@ -252,6 +253,10 @@ cdef class Token:
|
|||
def __unicode__(self):
|
||||
return self.string
|
||||
|
||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||
return check_flag(self.c.lex, flag_id)
|
||||
|
||||
|
||||
cdef int take_ownership_of_c_data(self) except -1:
|
||||
owned_data = <TokenC*>PyMem_Malloc(sizeof(TokenC) * self.array_len)
|
||||
memcpy(owned_data, self.c, sizeof(TokenC) * self.array_len)
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
from __future__ import unicode_literals
|
||||
from spacy.en import English
|
||||
from spacy.en.attrs import IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT
|
||||
from spacy.en.attrs import IS_SPACE, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM
|
||||
from spacy.en.attrs import IS_STOP
|
||||
|
||||
import pytest
|
||||
|
||||
@pytest.fixture
|
||||
def token():
|
||||
nlp = English()
|
||||
tokens = nlp(u'Give it back! He pleaded.')
|
||||
return tokens[0]
|
||||
|
||||
|
||||
def test_strings(token):
|
||||
assert token.orth_ == 'Give'
|
||||
assert token.lower_ == 'give'
|
||||
assert token.shape_ == 'Xxxx'
|
||||
assert token.prefix_ == 'G'
|
||||
assert token.suffix_ == 'ive'
|
||||
assert token.lemma_ == 'give'
|
||||
assert token.pos_ == 'VERB'
|
||||
assert token.tag_ == 'VB'
|
||||
assert token.dep_ == 'ROOT'
|
||||
|
||||
|
||||
def test_flags(token):
|
||||
assert token.check_flag(IS_ALPHA)
|
||||
assert not token.check_flag(IS_DIGIT)
|
||||
# TODO: Test more of these, esp. if a bug is found
|
Loading…
Reference in New Issue