mirror of https://github.com/explosion/spaCy.git
Let beam forward use minibatches
This commit is contained in:
parent
855872f872
commit
d274d3a3b9
|
@ -17,7 +17,7 @@ from cpython.ref cimport PyObject, Py_XDECREF
|
||||||
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
|
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
|
||||||
from libc.math cimport exp
|
from libc.math cimport exp
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset, memcpy
|
||||||
from libc.stdlib cimport calloc, free
|
from libc.stdlib cimport calloc, free
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from thinc.typedefs cimport weight_t, class_t, hash_t
|
from thinc.typedefs cimport weight_t, class_t, hash_t
|
||||||
|
@ -485,14 +485,14 @@ cdef class Parser:
|
||||||
cdef np.ndarray scores
|
cdef np.ndarray scores
|
||||||
cdef Doc doc
|
cdef Doc doc
|
||||||
cdef int nr_class = self.moves.n_moves
|
cdef int nr_class = self.moves.n_moves
|
||||||
cdef StateClass stcls, output
|
|
||||||
cuda_stream = util.get_cuda_stream()
|
cuda_stream = util.get_cuda_stream()
|
||||||
(tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(
|
(tokvecs, bp_tokvecs), state2vec, vec2scores = self.get_batch_model(
|
||||||
docs, cuda_stream, 0.0)
|
docs, cuda_stream, 0.0)
|
||||||
beams = []
|
|
||||||
cdef int offset = 0
|
cdef int offset = 0
|
||||||
cdef int j = 0
|
cdef int j = 0
|
||||||
cdef int k
|
cdef int k
|
||||||
|
|
||||||
|
beams = []
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
beam = Beam(nr_class, beam_width, min_density=beam_density)
|
beam = Beam(nr_class, beam_width, min_density=beam_density)
|
||||||
beam.initialize(self.moves.init_beam_state, doc.length, doc.c)
|
beam.initialize(self.moves.init_beam_state, doc.length, doc.c)
|
||||||
|
@ -501,34 +501,43 @@ cdef class Parser:
|
||||||
state.offset = offset
|
state.offset = offset
|
||||||
offset += len(doc)
|
offset += len(doc)
|
||||||
beam.check_done(_check_final_state, NULL)
|
beam.check_done(_check_final_state, NULL)
|
||||||
while not beam.is_done:
|
beams.append(beam)
|
||||||
states = []
|
cdef np.ndarray token_ids
|
||||||
|
token_ids = numpy.zeros((len(docs) * beam_width, self.nr_feature),
|
||||||
|
dtype='i', order='C')
|
||||||
|
todo = [beam for beam in beams if not beam.is_done]
|
||||||
|
|
||||||
|
cdef int* c_ids
|
||||||
|
cdef int nr_feature = self.nr_feature
|
||||||
|
cdef int n_states
|
||||||
|
while todo:
|
||||||
|
todo = [beam for beam in beams if not beam.is_done]
|
||||||
|
token_ids.fill(-1)
|
||||||
|
c_ids = <int*>token_ids.data
|
||||||
|
n_states = 0
|
||||||
|
for beam in todo:
|
||||||
for i in range(beam.size):
|
for i in range(beam.size):
|
||||||
stcls = StateClass.borrow(<StateC*>beam.at(i))
|
state = <StateC*>beam.at(i)
|
||||||
# This way we avoid having to score finalized states
|
# This way we avoid having to score finalized states
|
||||||
# We do have to take care to keep indexes aligned, though
|
# We do have to take care to keep indexes aligned, though
|
||||||
if not stcls.is_final():
|
if not state.is_final():
|
||||||
states.append(stcls)
|
state.set_context_tokens(c_ids, nr_feature)
|
||||||
token_ids = self.get_token_ids(states)
|
c_ids += nr_feature
|
||||||
vectors = state2vec(token_ids)
|
n_states += 1
|
||||||
if self.cfg.get('hist_size', 0):
|
if n_states == 0:
|
||||||
hists = numpy.asarray([st.history[:self.cfg['hist_size']]
|
break
|
||||||
for st in states], dtype='i')
|
vectors = state2vec(token_ids[:n_states])
|
||||||
scores = vec2scores((vectors, hists))
|
|
||||||
else:
|
|
||||||
scores = vec2scores(vectors)
|
scores = vec2scores(vectors)
|
||||||
j = 0
|
|
||||||
c_scores = <float*>scores.data
|
c_scores = <float*>scores.data
|
||||||
|
for beam in todo:
|
||||||
for i in range(beam.size):
|
for i in range(beam.size):
|
||||||
state = <StateC*>beam.at(i)
|
state = <StateC*>beam.at(i)
|
||||||
if not state.is_final():
|
if not state.is_final():
|
||||||
self.moves.set_valid(beam.is_valid[i], state)
|
self.moves.set_valid(beam.is_valid[i], state)
|
||||||
for k in range(nr_class):
|
memcpy(beam.scores[i], c_scores, nr_class * sizeof(float))
|
||||||
beam.scores[i][k] = c_scores[j * scores.shape[1] + k]
|
c_scores += nr_class
|
||||||
j += 1
|
|
||||||
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
beam.advance(_transition_state, NULL, <void*>self.moves.c)
|
||||||
beam.check_done(_check_final_state, NULL)
|
beam.check_done(_check_final_state, NULL)
|
||||||
beams.append(beam)
|
|
||||||
tokvecs = self.model[0].ops.unflatten(tokvecs,
|
tokvecs = self.model[0].ops.unflatten(tokvecs,
|
||||||
[len(doc) for doc in docs])
|
[len(doc) for doc in docs])
|
||||||
return beams, tokvecs
|
return beams, tokvecs
|
||||||
|
@ -536,7 +545,7 @@ cdef class Parser:
|
||||||
def update(self, docs, golds, drop=0., sgd=None, losses=None):
|
def update(self, docs, golds, drop=0., sgd=None, losses=None):
|
||||||
if not any(self.moves.has_gold(gold) for gold in golds):
|
if not any(self.moves.has_gold(gold) for gold in golds):
|
||||||
return None
|
return None
|
||||||
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.5:
|
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.0:
|
||||||
return self.update_beam(docs, golds,
|
return self.update_beam(docs, golds,
|
||||||
self.cfg['beam_width'], self.cfg['beam_density'],
|
self.cfg['beam_width'], self.cfg['beam_density'],
|
||||||
drop=drop, sgd=sgd, losses=losses)
|
drop=drop, sgd=sgd, losses=losses)
|
||||||
|
|
Loading…
Reference in New Issue