diff --git a/pytorch_lightning/models/model_examples/__init__.py b/pytorch_lightning/models/model_examples/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/pytorch_lightning/models/model_examples/bilstm.py b/pytorch_lightning/models/model_examples/bilstm.py deleted file mode 100644 index f221a9d778..0000000000 --- a/pytorch_lightning/models/model_examples/bilstm.py +++ /dev/null @@ -1,167 +0,0 @@ -import torch.nn as nn -import numpy as np - -from test_tube import HyperOptArgumentParser -import torch -from torch.autograd import Variable -from sklearn.metrics import confusion_matrix, f1_score -from torch.nn import functional as F - - -class BiLSTMPack(nn.Module): - """ - Sample model to show how to define a template - """ - def __init__(self, hparams): - # init superclass - super(BiLSTMPack, self).__init__(hparams) - - self.hidden = None - - # trigger tag building - self.ner_tagset = {'O': 0, 'I-Bio': 1} - self.nb_tags = len(self.ner_tagset) - - # build model - print('building model...') - if hparams.model_load_weights_path is None: - self.__build_model() - print('model built') - else: - self = BiLSTMPack.load(hparams.model_load_weights_path, hparams.on_gpu, hparams) - print('model loaded from: {}'.format(hparams.model_load_weights_path)) - - def __build_model(self): - """ - Layout model - :return: - """ - # design the number of final units - self.output_dim = self.hparams.nb_lstm_units - - # when it's bidirectional our weights double - if self.hparams.bidirectional: - self.output_dim *= 2 - - # total number of words - total_words = len(self.tng_dataloader.dataset.words_token_to_idx) - - # word embeddings - self.word_embedding = nn.Embedding( - num_embeddings=total_words + 1, - embedding_dim=self.hparams.embedding_dim, - padding_idx=0 - ) - - # design the LSTM - self.lstm = nn.LSTM( - self.hparams.embedding_dim, - self.hparams.nb_lstm_units, - num_layers=self.hparams.nb_lstm_layers, - bidirectional=self.hparams.bidirectional, - dropout=self.hparams.drop_prob, - batch_first=True, - ) - - # map to tag space - self.fc_out = nn.Linear(self.output_dim, self.out_dim) - self.hidden_to_tag = nn.Linear(self.output_dim, self.nb_tags) - - - def init_hidden(self, batch_size): - - # the weights are of the form (nb_layers * 2 if bidirectional, batch_size, nb_lstm_units) - mult = 2 if self.hparams.bidirectional else 1 - hidden_a = torch.randn(self.hparams.nb_layers * mult, batch_size, self.nb_rnn_units) - hidden_b = torch.randn(self.hparams.nb_layers * mult, batch_size, self.nb_rnn_units) - - if self.hparams.on_gpu: - hidden_a = hidden_a.cuda() - hidden_b = hidden_b.cuda() - - hidden_a = Variable(hidden_a) - hidden_b = Variable(hidden_b) - - return (hidden_a, hidden_b) - - def forward(self, model_in): - # layout data (expand it, etc...) - # x = sequences - x, seq_lengths = model_in - batch_size, seq_len = x.size() - - # reset RNN hidden state - self.hidden = self.init_hidden(batch_size) - - # embed - x = self.word_embedding(x) - - # run through rnn using packed sequences - x = torch.nn.utils.rnn.pack_padded_sequence(x, seq_lengths, batch_first=True) - x, self.hidden = self.lstm(x, self.hidden) - x, _ = torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True) - - # if asked for only last state, use the h_n which is the same as out(t=n) - if not self.return_sequence: - # pull out hidden states - # h_n = (nb_directions * nb_layers, batch_size, emb_size) - nb_directions = 2 if self.bidirectional else 1 - (h_n, _) = self.hidden - - # reshape to make indexing easier - # forward = 0, backward = 1 (of nb_directions) - h_n = h_n.view(self.nb_layers, nb_directions, batch_size, self.nb_rnn_units) - - # pull out last forward - forward_h_n = h_n[-1, 0, :, :] - x = forward_h_n - - # if bidirectional, also pull out the last hidden of backward network - if self.bidirectional: - backward_h_n = h_n[-1, 1, :, :] - x = torch.cat([forward_h_n, backward_h_n], dim=1) - - # project to tag space - x = x.contiguous() - x = x.view(-1, self.output_dim) - x = self.hidden_to_tag(x) - - return x - - def loss(self, model_out): - # cross entropy loss - logits, y = model_out - y, y_lens = y - - # flatten y and logits - y = y.view(-1) - logits = logits.view(-1, self.nb_tags) - - # calculate a mask to remove padding tokens - mask = (y >= 0).float() - - # count how many tokens we have - num_tokens = int(torch.sum(mask).data[0]) - - # pick the correct values and mask out - logits = logits[range(logits.shape[0]), y] * mask - - # compute the ce loss - ce_loss = -torch.sum(logits)/num_tokens - - return ce_loss - - def pull_out_last_embedding(self, x, seq_lengths, batch_size, on_gpu): - # grab only the last activations from the non-padded ouput - x_last = torch.zeros([batch_size, 1, x.size(-1)]) - for i, seq_len in enumerate(seq_lengths): - x_last[i, :, :] = x[i, seq_len-1, :] - - # put on gpu when requested - if on_gpu: - x_last = x_last.cuda() - - # turn into torch var - x_last = Variable(x_last) - - return x_last \ No newline at end of file