mirror of https://github.com/explosion/spaCy.git
* Set up an array POS_SENSES that denotes the set of valid senses for each POS tag. This way, we can do bitwise & between a lexeme's senses and the ones available for its POS tag, to get the allowable senses for the token.
This commit is contained in:
parent
2b8459d9a8
commit
e6d828a9af
|
@ -1,4 +1,6 @@
|
||||||
# Enum of Wordnet supersenses
|
# Enum of Wordnet supersenses
|
||||||
|
cimport parts_of_speech
|
||||||
|
from .typedefs cimport flags_t
|
||||||
|
|
||||||
cpdef enum:
|
cpdef enum:
|
||||||
A_behavior
|
A_behavior
|
||||||
|
@ -54,3 +56,7 @@ cpdef enum:
|
||||||
V_social
|
V_social
|
||||||
V_stative
|
V_stative
|
||||||
V_weather
|
V_weather
|
||||||
|
|
||||||
|
|
||||||
|
cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,35 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
cimport parts_of_speech
|
||||||
|
|
||||||
|
|
||||||
|
POS_SENSES[<int>parts_of_speech.NO_TAG] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.ADJ] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.ADV] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.ADP] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.CONJ] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.DET] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.NOUN] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.NUM] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.PRON] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.PRT] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.VERB] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.X] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.PUNCT] = 0
|
||||||
|
POS_SENSES[<int>parts_of_speech.EOL] = 0
|
||||||
|
|
||||||
|
|
||||||
|
cdef int _sense = 0
|
||||||
|
|
||||||
|
for _sense in range(A_behavior, N_act):
|
||||||
|
POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
|
||||||
|
|
||||||
|
for _sense in range(N_act, V_body):
|
||||||
|
POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
|
||||||
|
|
||||||
|
for _sense in range(V_body, V_weather+1):
|
||||||
|
POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
STRINGS = (
|
STRINGS = (
|
||||||
'A_behavior',
|
'A_behavior',
|
||||||
|
|
Loading…
Reference in New Issue