Fix significant bug in feature calculation -- off by 1

This commit is contained in:
Matthew Honnibal 2017-05-18 06:21:32 -05:00
parent fc8d3a112c
commit a438cef8c5
1 changed files with 5 additions and 4 deletions

View File

@ -121,7 +121,7 @@ class PrecomputableMaxouts(Model):
return Yfp, backward return Yfp, backward
def Tok2Vec(width, embed_size, preprocess=None): def Tok2Vec(width, embed_size, preprocess=None):
cols = [LOWER, PREFIX, SUFFIX, SHAPE] cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE]
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}): with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size) lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size)
prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2) prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2)
@ -150,7 +150,7 @@ def get_col(idx):
ops = NumpyOps() ops = NumpyOps()
else: else:
ops = CupyOps() ops = CupyOps()
output = ops.xp.ascontiguousarray(X[:, idx]) output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)
def backward(y, sgd=None): def backward(y, sgd=None):
dX = ops.allocate(X.shape) dX = ops.allocate(X.shape)
dX[:, idx] += y dX[:, idx] += y
@ -173,9 +173,10 @@ def doc2feats(cols=None):
for doc in docs: for doc in docs:
if 'cached_feats' not in doc.user_data: if 'cached_feats' not in doc.user_data:
doc.user_data['cached_feats'] = model.ops.asarray( doc.user_data['cached_feats'] = model.ops.asarray(
doc.to_array(cols), doc.to_array(cols),
dtype='uint64') dtype='uint64')
feats.append(doc.user_data['cached_feats']) feats.append(doc.user_data['cached_feats'])
assert feats[-1].dtype == 'uint64'
return feats, None return feats, None
model = layerize(forward) model = layerize(forward)
model.cols = cols model.cols = cols