* Apply regularization to the softmax, not the bias

This commit is contained in:
Matthew Honnibal 2015-06-29 11:45:38 +02:00
parent ca30fe1582
commit 1dff04acb5
1 changed files with 11 additions and 8 deletions

View File

@ -35,6 +35,7 @@ import numpy
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
theano.config.profile = False
theano.config.floatX = 'float32' theano.config.floatX = 'float32'
floatX = theano.config.floatX floatX = theano.config.floatX
@ -112,8 +113,7 @@ def compile_theano_model(n_classes, n_hidden, n_in, L1_reg, L2_reg):
cost = ( cost = (
-T.log(T.sum((p_y_given_x[0] + stabilizer) * T.eq(costs, 0))) -T.log(T.sum((p_y_given_x[0] + stabilizer) * T.eq(costs, 0)))
+ L1(L1_reg, hidden_W.curr, hidden_b.curr) + L2(L2_reg, maxent_W.curr, hidden_W.curr)
+ L2(L2_reg, hidden_W.curr, hidden_b.curr)
) )
debug = theano.function( debug = theano.function(
@ -143,12 +143,12 @@ def compile_theano_model(n_classes, n_hidden, n_in, L1_reg, L2_reg):
outputs=[ outputs=[
feed_layer( feed_layer(
T.nnet.softmax, T.nnet.softmax,
maxent_W.curr, maxent_W.avg,
maxent_b.curr, maxent_b.avg,
feed_layer( feed_layer(
relu, relu,
hidden_W.curr, hidden_W.avg,
hidden_b.curr, hidden_b.avg,
x x
) )
)[0] )[0]
@ -200,7 +200,7 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
(nv_tag * len(tags)) + \ (nv_tag * len(tags)) + \
(nv_label * len(labels)) (nv_label * len(labels))
debug, train_func, predict_func = compile_theano_model(n_classes, nv_hidden, debug, train_func, predict_func = compile_theano_model(n_classes, nv_hidden,
n_in, 0.0, 0.0001) n_in, 0.0, 0.00)
return TheanoModel( return TheanoModel(
n_classes, n_classes,
((nv_word, words), (nv_tag, tags), (nv_label, labels)), ((nv_word, words), (nv_tag, tags), (nv_label, labels)),
@ -213,7 +213,7 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
nlp._parser = Parser(nlp.vocab.strings, dep_model_dir, nlp.ParserTransitionSystem, nlp._parser = Parser(nlp.vocab.strings, dep_model_dir, nlp.ParserTransitionSystem,
make_model) make_model)
print "Itn.\tP.Loss\tUAS\tNER F.\tTag %\tToken %" print "Itn.\tP.Loss\tUAS\tTag %\tToken %"
log_loc = path.join(model_dir, 'job.log') log_loc = path.join(model_dir, 'job.log')
for itn in range(n_iter): for itn in range(n_iter):
scorer = Scorer() scorer = Scorer()
@ -274,6 +274,9 @@ def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, verbose=False,
nv_word=10, nv_tag=10, nv_label=10, nv_hidden=10, nv_word=10, nv_tag=10, nv_label=10, nv_hidden=10,
eta=0.1, mu=0.9, eval_only=False): eta=0.1, mu=0.9, eval_only=False):
gold_train = list(read_json_file(train_loc, lambda doc: 'wsj' in doc['id'])) gold_train = list(read_json_file(train_loc, lambda doc: 'wsj' in doc['id']))
nlp = train(English, gold_train, model_dir, nlp = train(English, gold_train, model_dir,