* Add Token.conjuncts property

This commit is contained in:
Matthew Honnibal 2015-04-17 01:40:53 +02:00
parent 4757899370
commit f7ffd94e6a
2 changed files with 56 additions and 0 deletions

View File

@ -11,6 +11,7 @@ from .typedefs cimport LEMMA
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
from .typedefs cimport POS, LEMMA, TAG, DEP
from .parts_of_speech import UNIV_POS_NAMES
from .parts_of_speech cimport CONJ, PUNCT
from .lexeme cimport check_flag
from .spans import Span
from .structs cimport UniStr
@ -538,6 +539,27 @@ cdef class Token:
return Token.cinit(self.vocab, self._string,
self.c + self.c.head, self.i + self.c.head, self.array_len,
self._seq)
property conjuncts:
def __get__(self):
"""Get a list of conjoined words"""
cdef Token word
conjs = []
if self.c.pos != CONJ and self.c.pos != PUNCT:
seen_conj = False
for word in reversed(list(self.lefts)):
if word.c.pos == CONJ:
seen_conj = True
elif seen_conj and word.c.pos == self.c.pos:
conjs.append(word)
conjs.reverse()
conjs.append(self)
if seen_conj:
return conjs
elif self is not self.head and self in self.head.conjuncts:
return self.head.conjuncts
else:
return []
property ent_type:
def __get__(self):

34
tests/test_conjuncts.py Normal file
View File

@ -0,0 +1,34 @@
"""Test the Token.conjuncts property"""
from __future__ import unicode_literals
from spacy.en import English
import pytest
NLU = English()
def orths(tokens):
return [t.orth_ for t in tokens]
def test_simple_two():
tokens = NLU('I lost money and pride.')
pride = tokens[4]
assert orths(pride.conjuncts) == ['money', 'pride']
money = tokens[2]
assert orths(money.conjuncts) == ['money', 'pride']
def test_comma_three():
tokens = NLU('I found my wallet, phone and keys.')
keys = tokens[-2]
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
wallet = tokens[3]
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
def test_and_three():
tokens = NLU('I found my wallet and phone and keys.')
keys = tokens[-2]
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
wallet = tokens[3]
assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']