From 0962ffc0955ead4f377470498e2f026ce46469d0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 26 Mar 2015 15:06:26 +0100 Subject: [PATCH] * Fix issue #37: missing check_flag attribute from Token class --- spacy/tokens.pxd | 2 ++ spacy/tokens.pyx | 5 +++++ tests/test_token_api.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 tests/test_token_api.py diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index 1b482f597..fc9a3a784 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -89,3 +89,5 @@ cdef class Token: return self cdef int take_ownership_of_c_data(self) except -1 + + cpdef bint check_flag(self, attr_id_t flag_id) except -1 diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index 55ebec770..f5683780e 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -9,6 +9,7 @@ from .typedefs cimport LEMMA from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER from .typedefs cimport POS, LEMMA from .parts_of_speech import UNIV_POS_NAMES +from .lexeme cimport check_flag from unidecode import unidecode @@ -252,6 +253,10 @@ cdef class Token: def __unicode__(self): return self.string + cpdef bint check_flag(self, attr_id_t flag_id) except -1: + return check_flag(self.c.lex, flag_id) + + cdef int take_ownership_of_c_data(self) except -1: owned_data = PyMem_Malloc(sizeof(TokenC) * self.array_len) memcpy(owned_data, self.c, sizeof(TokenC) * self.array_len) diff --git a/tests/test_token_api.py b/tests/test_token_api.py new file mode 100644 index 000000000..6e35701b4 --- /dev/null +++ b/tests/test_token_api.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals +from spacy.en import English +from spacy.en.attrs import IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT +from spacy.en.attrs import IS_SPACE, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM +from spacy.en.attrs import IS_STOP + +import pytest + +@pytest.fixture +def token(): + nlp = English() + tokens = nlp(u'Give it back! He pleaded.') + return tokens[0] + + +def test_strings(token): + assert token.orth_ == 'Give' + assert token.lower_ == 'give' + assert token.shape_ == 'Xxxx' + assert token.prefix_ == 'G' + assert token.suffix_ == 'ive' + assert token.lemma_ == 'give' + assert token.pos_ == 'VERB' + assert token.tag_ == 'VB' + assert token.dep_ == 'ROOT' + + +def test_flags(token): + assert token.check_flag(IS_ALPHA) + assert not token.check_flag(IS_DIGIT) + # TODO: Test more of these, esp. if a bug is found