From b4454cf036602c593dbef2099c407101603e4539 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 7 Nov 2014 04:40:36 +1100 Subject: [PATCH] * Add extra context tokens --- spacy/context.pxd | 4 ++++ spacy/context.pyx | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/spacy/context.pxd b/spacy/context.pxd index 5e36aaa8b..f2d901fc7 100644 --- a/spacy/context.pxd +++ b/spacy/context.pxd @@ -46,11 +46,15 @@ cdef class Token: cdef class Slots: + cdef readonly Token P4 + cdef readonly Token P3 cdef readonly Token P2 cdef readonly Token P1 cdef readonly Token N0 cdef readonly Token N1 cdef readonly Token N2 + cdef readonly Token N3 + cdef readonly Token N4 cdef int N_FIELDS diff --git a/spacy/context.pyx b/spacy/context.pyx index 5413039cc..7f28c352a 100644 --- a/spacy/context.pyx +++ b/spacy/context.pyx @@ -4,11 +4,15 @@ from .lexeme cimport * cdef class Slots: def __init__(self): + self.P4 = Token() + self.P3 = Token() self.P2 = Token() self.P1 = Token() self.N0 = Token() self.N1 = Token() self.N2 = Token() + self.N3 = Token() + self.N4 = Token() cdef void _number_token(Token t, int* n_fields): @@ -137,26 +141,38 @@ cdef int _flatten_token(atom_t* context, Token ids, Token vals) except -1: cdef hash_t fill_slots(Slots s, int i, Tokens tokens) except 0: + fill_token(s.P4, tokens.lex[i-4], tokens.pos[i-4], tokens.ner[i-4]) + fill_token(s.P3, tokens.lex[i-3], tokens.pos[i-3], tokens.ner[i-3]) fill_token(s.P2, tokens.lex[i-2], tokens.pos[i-2], tokens.ner[i-2]) fill_token(s.P1, tokens.lex[i-1], tokens.pos[i-1], tokens.ner[i-1]) fill_token(s.N0, tokens.lex[i], tokens.pos[i], tokens.ner[i]) fill_token(s.N1, tokens.lex[i+1], tokens.pos[i+1], tokens.ner[i+1]) fill_token(s.N2, tokens.lex[i+2], tokens.pos[i+2], tokens.ner[i+2]) + fill_token(s.N3, tokens.lex[i+3], tokens.pos[i+3], tokens.ner[i+3]) + fill_token(s.N4, tokens.lex[i+4], tokens.pos[i+4], tokens.ner[i+4]) return 1 cdef int fill_flat(atom_t* context, Slots s) except -1: + _flatten_token(context, FIELD_IDS.P4, s.P4) + _flatten_token(context, FIELD_IDS.P3, s.P3) _flatten_token(context, FIELD_IDS.P2, s.P2) _flatten_token(context, FIELD_IDS.P1, s.P1) _flatten_token(context, FIELD_IDS.N0, s.N0) _flatten_token(context, FIELD_IDS.N1, s.N1) _flatten_token(context, FIELD_IDS.N2, s.N2) + _flatten_token(context, FIELD_IDS.N3, s.N4) + _flatten_token(context, FIELD_IDS.N4, s.N4) N_FIELDS = 0 FIELD_IDS = Slots() +_number_token(FIELD_IDS.P4, &N_FIELDS) +_number_token(FIELD_IDS.P3, &N_FIELDS) _number_token(FIELD_IDS.P2, &N_FIELDS) _number_token(FIELD_IDS.P1, &N_FIELDS) _number_token(FIELD_IDS.N0, &N_FIELDS) _number_token(FIELD_IDS.N1, &N_FIELDS) _number_token(FIELD_IDS.N2, &N_FIELDS) +_number_token(FIELD_IDS.N3, &N_FIELDS) +_number_token(FIELD_IDS.N4, &N_FIELDS)