* Set up an array POS_SENSES that denotes the set of valid senses for each POS tag. This way, we can do bitwise & between a lexeme's senses and the ones available for its POS tag, to get the allowable senses for the token.

2015-07-01 20:12:13 +02:00 · 2015-07-01 20:12:13 +02:00 · e6d828a9af
parent 2b8459d9a8
commit e6d828a9af
2 changed files with 37 additions and 0 deletions
--- a/spacy/senses.pxd
+++ b/spacy/senses.pxd
@ -1,4 +1,6 @@
 # Enum of Wordnet supersenses
 cimport parts_of_speech
 from .typedefs cimport flags_t
 cpdef enum:
    A_behavior
@ -54,3 +56,7 @@ cpdef enum:
    V_social
    V_stative
    V_weather
 cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
--- a/spacy/senses.pyx
+++ b/spacy/senses.pyx
@ -1,4 +1,35 @@
 from __future__ import unicode_literals
 cimport parts_of_speech
 POS_SENSES[<int>parts_of_speech.NO_TAG] = 0
 POS_SENSES[<int>parts_of_speech.ADJ] = 0
 POS_SENSES[<int>parts_of_speech.ADV] = 0
 POS_SENSES[<int>parts_of_speech.ADP] = 0
 POS_SENSES[<int>parts_of_speech.CONJ] = 0
 POS_SENSES[<int>parts_of_speech.DET] = 0
 POS_SENSES[<int>parts_of_speech.NOUN] = 0
 POS_SENSES[<int>parts_of_speech.NUM] = 0
 POS_SENSES[<int>parts_of_speech.PRON] = 0
 POS_SENSES[<int>parts_of_speech.PRT] = 0
 POS_SENSES[<int>parts_of_speech.VERB] = 0
 POS_SENSES[<int>parts_of_speech.X] = 0
 POS_SENSES[<int>parts_of_speech.PUNCT] = 0
 POS_SENSES[<int>parts_of_speech.EOL] = 0
 cdef int _sense = 0
 for _sense in range(A_behavior, N_act):
    POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
 for _sense in range(N_act, V_body):
    POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
 for _sense in range(V_body, V_weather+1):
    POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
 STRINGS = (
    'A_behavior',