* Set up an array POS_SENSES that denotes the set of valid senses for each POS tag. This way, we can do bitwise & between a lexeme's senses and the ones available for its POS tag, to get the allowable senses for the token.

This commit is contained in:
Matthew Honnibal 2015-07-01 20:12:13 +02:00
parent 2b8459d9a8
commit e6d828a9af
2 changed files with 37 additions and 0 deletions

View File

@ -1,4 +1,6 @@
# Enum of Wordnet supersenses # Enum of Wordnet supersenses
cimport parts_of_speech
from .typedefs cimport flags_t
cpdef enum: cpdef enum:
A_behavior A_behavior
@ -54,3 +56,7 @@ cpdef enum:
V_social V_social
V_stative V_stative
V_weather V_weather
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES

View File

@ -1,4 +1,35 @@
from __future__ import unicode_literals from __future__ import unicode_literals
cimport parts_of_speech
POS_SENSES[<int>parts_of_speech.NO_TAG] = 0
POS_SENSES[<int>parts_of_speech.ADJ] = 0
POS_SENSES[<int>parts_of_speech.ADV] = 0
POS_SENSES[<int>parts_of_speech.ADP] = 0
POS_SENSES[<int>parts_of_speech.CONJ] = 0
POS_SENSES[<int>parts_of_speech.DET] = 0
POS_SENSES[<int>parts_of_speech.NOUN] = 0
POS_SENSES[<int>parts_of_speech.NUM] = 0
POS_SENSES[<int>parts_of_speech.PRON] = 0
POS_SENSES[<int>parts_of_speech.PRT] = 0
POS_SENSES[<int>parts_of_speech.VERB] = 0
POS_SENSES[<int>parts_of_speech.X] = 0
POS_SENSES[<int>parts_of_speech.PUNCT] = 0
POS_SENSES[<int>parts_of_speech.EOL] = 0
cdef int _sense = 0
for _sense in range(A_behavior, N_act):
POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
for _sense in range(N_act, V_body):
POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
for _sense in range(V_body, V_weather+1):
POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
STRINGS = ( STRINGS = (
'A_behavior', 'A_behavior',