spaCy/spacy/syntax/_state.pxd

135 lines
3.1 KiB
Cython
Raw Normal View History

2014-12-16 11:44:43 +00:00
from libc.stdint cimport uint32_t
from cymem.cymem cimport Pool
2015-03-09 05:46:22 +00:00
from ..structs cimport TokenC, Entity
2014-12-16 11:44:43 +00:00
cdef struct State:
TokenC* sent
2014-12-16 16:19:43 +00:00
int* stack
2015-03-09 05:46:22 +00:00
Entity* ent
2014-12-16 11:44:43 +00:00
int i
int sent_len
int stack_len
2015-03-09 05:46:22 +00:00
int ents_len
2014-12-16 11:44:43 +00:00
2014-12-16 16:19:43 +00:00
cdef int add_dep(const State *s, const int head, const int child, const int label) except -1
2014-12-16 11:44:43 +00:00
2014-12-16 16:19:43 +00:00
cdef int pop_stack(State *s) except -1
2014-12-16 11:44:43 +00:00
cdef int push_stack(State *s) except -1
cdef bint has_head(const TokenC* t) nogil
2014-12-16 11:44:43 +00:00
cdef inline int get_idx(const State* s, const TokenC* t) nogil:
return t - s.sent
cdef inline TokenC* get_n0(const State* s) nogil:
return &s.sent[s.i]
cdef inline TokenC* get_n1(const State* s) nogil:
2014-12-16 16:19:43 +00:00
if (s.i+1) >= s.sent_len:
return NULL
2014-12-16 11:44:43 +00:00
else:
2014-12-16 16:19:43 +00:00
return &s.sent[s.i+1]
2014-12-16 11:44:43 +00:00
cdef inline TokenC* get_p1(const State* s) nogil:
if s.i < 1:
return NULL
else:
return &s.sent[s.i-1]
cdef inline TokenC* get_p2(const State* s) nogil:
if s.i < 2:
return NULL
else:
return &s.sent[s.i-2]
2015-03-11 01:26:13 +00:00
cdef inline TokenC* get_e0(const State* s) nogil:
if s.ent.end != 0:
return NULL
else:
return &s.sent[s.ent.start]
cdef inline TokenC* get_e1(const State* s) nogil:
if s.ent.end != 0 or s.ent.start >= (s.i + 1):
return NULL
else:
return &s.sent[s.ent.start + 1]
2014-12-16 11:44:43 +00:00
cdef inline TokenC* get_n2(const State* s) nogil:
2014-12-16 16:19:43 +00:00
if (s.i + 2) >= s.sent_len:
return NULL
else:
return &s.sent[s.i+2]
2014-12-16 11:44:43 +00:00
cdef inline TokenC* get_s0(const State *s) nogil:
2014-12-16 16:19:43 +00:00
return &s.sent[s.stack[0]]
2014-12-16 11:44:43 +00:00
cdef inline TokenC* get_s1(const State *s) nogil:
# Rely on our padding to ensure we don't go out of bounds here
2014-12-16 16:19:43 +00:00
return &s.sent[s.stack[-1]]
2014-12-16 11:44:43 +00:00
cdef inline TokenC* get_s2(const State *s) nogil:
# Rely on our padding to ensure we don't go out of bounds here
2014-12-16 16:19:43 +00:00
return &s.sent[s.stack[-2]]
cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil
2014-12-16 11:44:43 +00:00
2014-12-16 16:19:43 +00:00
cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil
2014-12-16 11:44:43 +00:00
cdef inline bint at_eol(const State *s) nogil:
return s.i >= s.sent_len
cdef inline bint is_final(const State *s) nogil:
return at_eol(s) # The stack will be attached to root anyway
cdef int children_in_buffer(const State *s, const int head, const int* gold) except -1
cdef int head_in_buffer(const State *s, const int child, const int* gold) except -1
cdef int children_in_stack(const State *s, const int head, const int* gold) except -1
cdef int head_in_stack(const State *s, const int child, const int* gold) except -1
2014-12-16 11:44:43 +00:00
cdef State* new_state(Pool mem, TokenC* sent, const int sent_length) except NULL
2014-12-16 11:44:43 +00:00
cdef int count_left_kids(const TokenC* head) nogil
cdef int count_right_kids(const TokenC* head) nogil
# From https://en.wikipedia.org/wiki/Hamming_weight
cdef inline uint32_t _popcount(uint32_t x) nogil:
"""Find number of non-zero bits."""
cdef int count = 0
while x != 0:
x &= x - 1
count += 1
return count
2014-12-16 11:44:43 +00:00
cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
cdef int i
for i in range(32):
if bits & (1 << i):
2014-12-16 16:19:43 +00:00
n -= 1
if n < 1:
return i
2014-12-16 11:44:43 +00:00
return 0