mirror of https://github.com/explosion/spaCy.git
Add support for history features in parsing models
This commit is contained in:
parent
ee41e4fea7
commit
b50a359e11
|
@ -51,6 +51,7 @@ from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
|
||||||
from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
|
from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
|
||||||
from .._ml import Residual, drop_layer, flatten
|
from .._ml import Residual, drop_layer, flatten
|
||||||
from .._ml import link_vectors_to_models
|
from .._ml import link_vectors_to_models
|
||||||
|
from .._ml import HistoryFeatures
|
||||||
from ..compat import json_dumps
|
from ..compat import json_dumps
|
||||||
|
|
||||||
from . import _parse_features
|
from . import _parse_features
|
||||||
|
@ -68,7 +69,7 @@ from ..gold cimport GoldParse
|
||||||
from ..attrs cimport ID, TAG, DEP, ORTH, NORM, PREFIX, SUFFIX, TAG
|
from ..attrs cimport ID, TAG, DEP, ORTH, NORM, PREFIX, SUFFIX, TAG
|
||||||
from . import _beam_utils
|
from . import _beam_utils
|
||||||
|
|
||||||
USE_FINE_TUNE = True
|
USE_HISTORY = True
|
||||||
|
|
||||||
def get_templates(*args, **kwargs):
|
def get_templates(*args, **kwargs):
|
||||||
return []
|
return []
|
||||||
|
@ -261,18 +262,35 @@ cdef class Parser:
|
||||||
|
|
||||||
with Model.use_device('cpu'):
|
with Model.use_device('cpu'):
|
||||||
if depth == 0:
|
if depth == 0:
|
||||||
upper = chain()
|
hist_size = 8
|
||||||
upper.is_noop = True
|
nr_dim = 8
|
||||||
else:
|
if USE_HISTORY:
|
||||||
|
upper = chain(
|
||||||
|
HistoryFeatures(nr_class=nr_class, hist_size=hist_size,
|
||||||
|
nr_dim=nr_dim),
|
||||||
|
zero_init(Affine(nr_class, nr_class+hist_size*nr_dim,
|
||||||
|
drop_factor=0.0)))
|
||||||
|
upper.is_noop = False
|
||||||
|
else:
|
||||||
|
upper = chain()
|
||||||
|
upper.is_noop = True
|
||||||
|
elif USE_HISTORY:
|
||||||
upper = chain(
|
upper = chain(
|
||||||
clone(Maxout(hidden_width), depth-1),
|
HistoryFeatures(nr_class=nr_class, hist_size=8, nr_dim=8),
|
||||||
|
Maxout(hidden_width, hidden_width+8*8),
|
||||||
zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
|
zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
|
||||||
)
|
)
|
||||||
upper.is_noop = False
|
upper.is_noop = False
|
||||||
|
else:
|
||||||
|
upper = chain(
|
||||||
|
Maxout(hidden_width, hidden_width),
|
||||||
|
zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
|
||||||
|
)
|
||||||
|
upper.is_noop = False
|
||||||
|
|
||||||
# TODO: This is an unfortunate hack atm!
|
# TODO: This is an unfortunate hack atm!
|
||||||
# Used to set input dimensions in network.
|
# Used to set input dimensions in network.
|
||||||
lower.begin_training(lower.ops.allocate((500, token_vector_width)))
|
lower.begin_training(lower.ops.allocate((500, token_vector_width)))
|
||||||
upper.begin_training(upper.ops.allocate((500, hidden_width)))
|
|
||||||
cfg = {
|
cfg = {
|
||||||
'nr_class': nr_class,
|
'nr_class': nr_class,
|
||||||
'depth': depth,
|
'depth': depth,
|
||||||
|
@ -428,12 +446,18 @@ cdef class Parser:
|
||||||
self._parse_step(next_step[i],
|
self._parse_step(next_step[i],
|
||||||
feat_weights, nr_class, nr_feat, nr_piece)
|
feat_weights, nr_class, nr_feat, nr_piece)
|
||||||
else:
|
else:
|
||||||
|
hists = []
|
||||||
for i in range(nr_step):
|
for i in range(nr_step):
|
||||||
st = next_step[i]
|
st = next_step[i]
|
||||||
st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
|
st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
|
||||||
self.moves.set_valid(&c_is_valid[i*nr_class], st)
|
self.moves.set_valid(&c_is_valid[i*nr_class], st)
|
||||||
|
hists.append([st.get_hist(j+1) for j in range(8)])
|
||||||
|
hists = numpy.asarray(hists)
|
||||||
vectors = state2vec(token_ids[:next_step.size()])
|
vectors = state2vec(token_ids[:next_step.size()])
|
||||||
scores = vec2scores(vectors)
|
if USE_HISTORY:
|
||||||
|
scores = vec2scores((vectors, hists))
|
||||||
|
else:
|
||||||
|
scores = vec2scores(vectors)
|
||||||
c_scores = <float*>scores.data
|
c_scores = <float*>scores.data
|
||||||
for i in range(nr_step):
|
for i in range(nr_step):
|
||||||
st = next_step[i]
|
st = next_step[i]
|
||||||
|
@ -441,6 +465,7 @@ cdef class Parser:
|
||||||
&c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
|
&c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
|
||||||
action = self.moves.c[guess]
|
action = self.moves.c[guess]
|
||||||
action.do(st, action.label)
|
action.do(st, action.label)
|
||||||
|
st.push_hist(guess)
|
||||||
this_step, next_step = next_step, this_step
|
this_step, next_step = next_step, this_step
|
||||||
next_step.clear()
|
next_step.clear()
|
||||||
for st in this_step:
|
for st in this_step:
|
||||||
|
@ -551,7 +576,11 @@ cdef class Parser:
|
||||||
if drop != 0:
|
if drop != 0:
|
||||||
mask = vec2scores.ops.get_dropout_mask(vector.shape, drop)
|
mask = vec2scores.ops.get_dropout_mask(vector.shape, drop)
|
||||||
vector *= mask
|
vector *= mask
|
||||||
scores, bp_scores = vec2scores.begin_update(vector, drop=drop)
|
hists = numpy.asarray([st.history for st in states], dtype='i')
|
||||||
|
if USE_HISTORY:
|
||||||
|
scores, bp_scores = vec2scores.begin_update((vector, hists), drop=drop)
|
||||||
|
else:
|
||||||
|
scores, bp_scores = vec2scores.begin_update(vector, drop=drop)
|
||||||
|
|
||||||
d_scores = self.get_batch_loss(states, golds, scores)
|
d_scores = self.get_batch_loss(states, golds, scores)
|
||||||
d_scores /= len(docs)
|
d_scores /= len(docs)
|
||||||
|
@ -570,7 +599,8 @@ cdef class Parser:
|
||||||
else:
|
else:
|
||||||
backprops.append((token_ids, d_vector, bp_vector))
|
backprops.append((token_ids, d_vector, bp_vector))
|
||||||
self.transition_batch(states, scores)
|
self.transition_batch(states, scores)
|
||||||
todo = [st for st in todo if not st[0].is_final()]
|
todo = [(st, gold) for (st, gold) in todo
|
||||||
|
if not st.is_final()]
|
||||||
if losses is not None:
|
if losses is not None:
|
||||||
losses[self.name] += (d_scores**2).sum()
|
losses[self.name] += (d_scores**2).sum()
|
||||||
n_steps += 1
|
n_steps += 1
|
||||||
|
@ -706,12 +736,15 @@ cdef class Parser:
|
||||||
cdef StateClass state
|
cdef StateClass state
|
||||||
cdef int[500] is_valid # TODO: Unhack
|
cdef int[500] is_valid # TODO: Unhack
|
||||||
cdef float* c_scores = &scores[0, 0]
|
cdef float* c_scores = &scores[0, 0]
|
||||||
|
hists = []
|
||||||
for state in states:
|
for state in states:
|
||||||
self.moves.set_valid(is_valid, state.c)
|
self.moves.set_valid(is_valid, state.c)
|
||||||
guess = arg_max_if_valid(c_scores, is_valid, scores.shape[1])
|
guess = arg_max_if_valid(c_scores, is_valid, scores.shape[1])
|
||||||
action = self.moves.c[guess]
|
action = self.moves.c[guess]
|
||||||
action.do(state.c, action.label)
|
action.do(state.c, action.label)
|
||||||
c_scores += scores.shape[1]
|
c_scores += scores.shape[1]
|
||||||
|
hists.append(guess)
|
||||||
|
return hists
|
||||||
|
|
||||||
def get_batch_loss(self, states, golds, float[:, ::1] scores):
|
def get_batch_loss(self, states, golds, float[:, ::1] scores):
|
||||||
cdef StateClass state
|
cdef StateClass state
|
||||||
|
|
Loading…
Reference in New Issue