From df3be149871da40b5f15c49d8870220f7fc36b5d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 10 Dec 2014 08:08:55 +1100 Subject: [PATCH] * Add pos_type features to POS tagger --- spacy/en.pxd | 15 ++++++++++----- spacy/en.pyx | 14 ++++++++++---- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/spacy/en.pxd b/spacy/en.pxd index 4ac8a126d..2ca081e47 100644 --- a/spacy/en.pxd +++ b/spacy/en.pxd @@ -88,7 +88,8 @@ cpdef enum: P2_prefix P2_suffix P2_pos - P2_sense + P2_lemma + P2_pos_type P1_sic P1_cluster @@ -96,7 +97,8 @@ cpdef enum: P1_prefix P1_suffix P1_pos - P1_sense + P1_lemma + P1_pos_type W_sic W_cluster @@ -104,7 +106,8 @@ cpdef enum: W_prefix W_suffix W_pos - W_sense + W_lemma + W_pos_type N1_sic N1_cluster @@ -112,7 +115,8 @@ cpdef enum: N1_prefix N1_suffix N1_pos - N1_sense + N1_lemma + N1_pos_type N2_sic N2_cluster @@ -120,7 +124,8 @@ cpdef enum: N2_prefix N2_suffix N2_pos - N2_sense + N2_lemma + N2_pos_type N_CONTEXT_FIELDS diff --git a/spacy/en.pyx b/spacy/en.pyx index 10773e0e2..3ed0eaaa9 100644 --- a/spacy/en.pyx +++ b/spacy/en.pyx @@ -98,10 +98,10 @@ POS_TAGS = { POS_TEMPLATES = ( (W_sic,), - (P1_sic,), + (P1_lemma, P1_pos), + (P2_lemma, P2_pos), (N1_sic,), (N2_sic,), - (P2_sic,), (W_suffix,), (W_prefix,), @@ -119,6 +119,11 @@ POS_TEMPLATES = ( (N2_cluster,), (P1_cluster,), (P2_cluster,), + + (W_pos_type,), + (N1_pos_type,), + (N1_pos_type,), + (P1_pos, W_pos_type, N1_pos_type), ) @@ -159,7 +164,7 @@ cdef class English(Language): else: fill_pos_context(context, i, t) t[i].pos = self.pos_tagger.predict(context) - self.morphologizer.set_morph(i, t) + self.morphologizer.set_morph(i, t) def train_pos(self, Tokens tokens, golds): cdef int i @@ -189,7 +194,8 @@ cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil: context[3] = t.lex.prefix context[4] = t.lex.suffix context[5] = t.pos - context[6] = t.sense + context[6] = t.lemma + context[7] = t.lex.pos_type EN = English('en')