* Add pos_type features to POS tagger

This commit is contained in:
Matthew Honnibal 2014-12-10 08:08:55 +11:00
parent 42973c4b37
commit df3be14987
2 changed files with 20 additions and 9 deletions

View File

@ -88,7 +88,8 @@ cpdef enum:
P2_prefix
P2_suffix
P2_pos
P2_sense
P2_lemma
P2_pos_type
P1_sic
P1_cluster
@ -96,7 +97,8 @@ cpdef enum:
P1_prefix
P1_suffix
P1_pos
P1_sense
P1_lemma
P1_pos_type
W_sic
W_cluster
@ -104,7 +106,8 @@ cpdef enum:
W_prefix
W_suffix
W_pos
W_sense
W_lemma
W_pos_type
N1_sic
N1_cluster
@ -112,7 +115,8 @@ cpdef enum:
N1_prefix
N1_suffix
N1_pos
N1_sense
N1_lemma
N1_pos_type
N2_sic
N2_cluster
@ -120,7 +124,8 @@ cpdef enum:
N2_prefix
N2_suffix
N2_pos
N2_sense
N2_lemma
N2_pos_type
N_CONTEXT_FIELDS

View File

@ -98,10 +98,10 @@ POS_TAGS = {
POS_TEMPLATES = (
(W_sic,),
(P1_sic,),
(P1_lemma, P1_pos),
(P2_lemma, P2_pos),
(N1_sic,),
(N2_sic,),
(P2_sic,),
(W_suffix,),
(W_prefix,),
@ -119,6 +119,11 @@ POS_TEMPLATES = (
(N2_cluster,),
(P1_cluster,),
(P2_cluster,),
(W_pos_type,),
(N1_pos_type,),
(N1_pos_type,),
(P1_pos, W_pos_type, N1_pos_type),
)
@ -159,7 +164,7 @@ cdef class English(Language):
else:
fill_pos_context(context, i, t)
t[i].pos = self.pos_tagger.predict(context)
self.morphologizer.set_morph(i, t)
self.morphologizer.set_morph(i, t)
def train_pos(self, Tokens tokens, golds):
cdef int i
@ -189,7 +194,8 @@ cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
context[3] = t.lex.prefix
context[4] = t.lex.suffix
context[5] = t.pos
context[6] = t.sense
context[6] = t.lemma
context[7] = t.lex.pos_type
EN = English('en')