different architecture / settings

This commit is contained in:
svlandeg 2019-05-14 08:37:52 +02:00
parent 4142e8dd1b
commit 09ed446b20
2 changed files with 22 additions and 23 deletions

View File

@ -4,18 +4,17 @@ from __future__ import unicode_literals
import os import os
import datetime import datetime
from os import listdir from os import listdir
import numpy as np
from random import shuffle from random import shuffle
from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator
from spacy._ml import SpacyVectors, create_default_optimizer, zero_init from spacy._ml import SpacyVectors, create_default_optimizer, zero_init
from thinc.api import chain, flatten_add_lengths, with_getitem, clone, with_flatten from thinc.api import chain, flatten_add_lengths, with_getitem, clone
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu from thinc.v2v import Model, Softmax, Maxout, Affine, ReLu
from thinc.t2v import Pooling, sum_pool, mean_pool from thinc.t2v import Pooling, sum_pool, mean_pool
from thinc.t2t import ExtractWindow, ParametricAttention from thinc.t2t import ParametricAttention
from thinc.misc import Residual, LayerNorm as LN from thinc.misc import Residual
from spacy.tokens import Doc from spacy.tokens import Doc
@ -35,18 +34,20 @@ class EL_Model():
self.entity_encoder = self._simple_encoder(in_width=300, out_width=96) self.entity_encoder = self._simple_encoder(in_width=300, out_width=96)
self.article_encoder = self._simple_encoder(in_width=300, out_width=96) self.article_encoder = self._simple_encoder(in_width=300, out_width=96)
def train_model(self, training_dir, entity_descr_output, limit=None, to_print=True): def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
Doc.set_extension("entity_id", default=None) Doc.set_extension("entity_id", default=None)
train_instances, train_pos, train_neg, train_doc = self._get_training_data(training_dir, train_instances, train_pos, train_neg, train_doc = self._get_training_data(training_dir,
entity_descr_output, entity_descr_output,
False, False,
limit, to_print) trainlimit,
to_print)
dev_instances, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir, dev_instances, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir,
entity_descr_output, entity_descr_output,
True, True,
limit / 10, to_print) devlimit,
to_print)
if to_print: if to_print:
print("Training on", len(train_instances.values()), "articles") print("Training on", len(train_instances.values()), "articles")
@ -78,7 +79,6 @@ class EL_Model():
if to_print: if to_print:
print("Trained on", instance_count, "instance clusters") print("Trained on", instance_count, "instance clusters")
def _test_dev(self, dev_instances, dev_pos, dev_neg, dev_doc): def _test_dev(self, dev_instances, dev_pos, dev_neg, dev_doc):
predictions = list() predictions = list()
golds = list() golds = list()
@ -129,19 +129,19 @@ class EL_Model():
conv_depth = 1 conv_depth = 1
cnn_maxout_pieces = 3 cnn_maxout_pieces = 3
with Model.define_operators({">>": chain, "**": clone}): with Model.define_operators({">>": chain, "**": clone}):
# encoder = SpacyVectors \
# >> flatten_add_lengths \
# >> ParametricAttention(in_width)\
# >> Pooling(mean_pool) \
# >> Residual(zero_init(Maxout(in_width, in_width))) \
# >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
encoder = SpacyVectors \ encoder = SpacyVectors \
>> flatten_add_lengths \ >> flatten_add_lengths \
>> with_getitem(0, Affine(in_width, in_width)) \ >> ParametricAttention(in_width)\
>> ParametricAttention(in_width) \ >> Pooling(mean_pool) \
>> Pooling(sum_pool) \ >> Residual(zero_init(Maxout(in_width, in_width))) \
>> Residual(ReLu(in_width, in_width)) ** conv_depth \ >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
>> zero_init(Affine(out_width, in_width, drop_factor=0.0)) # encoder = SpacyVectors \
# >> flatten_add_lengths \
# >> with_getitem(0, Affine(in_width, in_width)) \
# >> ParametricAttention(in_width) \
# >> Pooling(sum_pool) \
# >> Residual(ReLu(in_width, in_width)) ** conv_depth \
# >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
# >> zero_init(Affine(nr_class, width, drop_factor=0.0)) # >> zero_init(Affine(nr_class, width, drop_factor=0.0))
# >> logistic # >> logistic
@ -178,7 +178,6 @@ class EL_Model():
# print("encoding dim", len(true_entity_encoding[0])) # print("encoding dim", len(true_entity_encoding[0]))
consensus_encoding = self._calculate_consensus(doc_encoding, true_entity_encoding) consensus_encoding = self._calculate_consensus(doc_encoding, true_entity_encoding)
# consensus_encoding_t = consensus_encoding.transpose()
doc_mse, doc_diff = self._calculate_similarity(doc_encoding, consensus_encoding) doc_mse, doc_diff = self._calculate_similarity(doc_encoding, consensus_encoding)

View File

@ -111,7 +111,7 @@ if __name__ == "__main__":
print("STEP 6: training ", datetime.datetime.now()) print("STEP 6: training ", datetime.datetime.now())
my_nlp = spacy.load('en_core_web_md') my_nlp = spacy.load('en_core_web_md')
trainer = EL_Model(kb=my_kb, nlp=my_nlp) trainer = EL_Model(kb=my_kb, nlp=my_nlp)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, limit=500) trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=50, devlimit=50)
print() print()
# STEP 7: apply the EL algorithm on the dev dataset # STEP 7: apply the EL algorithm on the dev dataset