attempt at migrating w/o Variables
This commit is contained in:
parent
ab4f39de72
commit
76304734b6
|
@ -5,7 +5,6 @@ import numpy as np
|
|||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch.autograd import Variable
|
||||
|
||||
from cove import MTLSTM
|
||||
from allennlp.modules.elmo import Elmo
|
||||
|
@ -94,14 +93,12 @@ class MultitaskQuestionAnsweringNetwork(nn.Module):
|
|||
|
||||
coattended_context, coattended_question = self.coattention(context_encoded, question_encoded, context_padding, question_padding)
|
||||
|
||||
# context_summary = self.dropout(torch.cat([coattended_context, context_encoded, context_embedded], -1))
|
||||
context_summary = torch.cat([coattended_context, context_encoded, context_embedded], -1)
|
||||
condensed_context, _ = self.context_bilstm_after_coattention(context_summary, context_lengths)
|
||||
self_attended_context = self.self_attentive_encoder_context(condensed_context, padding=context_padding)
|
||||
final_context, (context_rnn_h, context_rnn_c) = self.bilstm_context(self_attended_context[-1], context_lengths)
|
||||
context_rnn_state = [self.reshape_rnn_state(x) for x in (context_rnn_h, context_rnn_c)]
|
||||
|
||||
# question_summary = self.dropout(torch.cat([coattended_question, question_encoded, question_embedded], -1))
|
||||
question_summary = torch.cat([coattended_question, question_encoded, question_embedded], -1)
|
||||
condensed_question, _ = self.question_bilstm_after_coattention(question_summary, question_lengths)
|
||||
self_attended_question = self.self_attentive_encoder_question(condensed_question, padding=question_padding)
|
||||
|
@ -159,14 +156,14 @@ class MultitaskQuestionAnsweringNetwork(nn.Module):
|
|||
effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
|
||||
if self.generative_vocab_size < effective_vocab_size:
|
||||
size[-1] = effective_vocab_size - self.generative_vocab_size
|
||||
buff = Variable(scaled_p_vocab.data.new(*size).fill_(EPSILON))
|
||||
buff = scaled_p_vocab.new_full(size, EPSILON, requires_grad=True)
|
||||
scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)
|
||||
|
||||
p_context_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
|
||||
p_context_ptr = scaled_p_vocab.new_full(scaled_p_vocab.size(), EPSILON, requires_grad=True)
|
||||
p_context_ptr.scatter_add_(p_context_ptr.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), context_attention)
|
||||
scaled_p_context_ptr = (context_question_switches * (1 - vocab_pointer_switches)).expand_as(p_context_ptr) * p_context_ptr
|
||||
|
||||
p_question_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
|
||||
p_question_ptr = scaled_p_vocab.new_full(scaled_p_vocab.size(), EPSILON, requires_grad=True)
|
||||
p_question_ptr.scatter_add_(p_question_ptr.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), question_attention)
|
||||
scaled_p_question_ptr = ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(p_question_ptr) * p_question_ptr
|
||||
|
||||
|
@ -175,21 +172,20 @@ class MultitaskQuestionAnsweringNetwork(nn.Module):
|
|||
|
||||
|
||||
def greedy(self, self_attended_context, context, question, context_indices, question_indices, oov_to_limited_idx, rnn_state=None):
|
||||
with torch.no_grad():
|
||||
B, TC, C = context.size()
|
||||
T = self.args.max_output_length
|
||||
outs = Variable(context.data.new(B, T).long().fill_(
|
||||
self.field.decoder_stoi['<pad>']), volatile=True)
|
||||
hiddens = [Variable(self_attended_context[0].data.new(B, T, C).zero_(), volatile=True)
|
||||
outs = context.new_full((B, T), self.field.decoder_stoi['<pad>'], dtype=torch.long)
|
||||
hiddens = [self_attended_context[0].new_zeros((B, T, C))
|
||||
for l in range(len(self.self_attentive_decoder.layers) + 1)]
|
||||
hiddens[0] = hiddens[0] + positional_encodings_like(hiddens[0])
|
||||
eos_yet = context.data.new(B).byte().zero_()
|
||||
eos_yet = context.new_zeros((B, )).byte()
|
||||
|
||||
rnn_output, context_alignment, question_alignment = None, None, None
|
||||
for t in range(T):
|
||||
if t == 0:
|
||||
embedding = self.decoder_embeddings(Variable(
|
||||
self_attended_context[-1].data.new(B).long().fill_(
|
||||
self.field.vocab.stoi['<init>']), volatile=True).unsqueeze(1), [1]*B)
|
||||
embedding = self.decoder_embeddings(
|
||||
self_attended_context[-1].new_full((B, 1), self.field.vocab.stoi['<init>'], dtype=torch.long), [1]*B)
|
||||
else:
|
||||
embedding = self.decoder_embeddings(outs[:, t - 1].unsqueeze(1), [1]*B)
|
||||
hiddens[0][:, t] = hiddens[0][:, t] + (math.sqrt(self.self_attentive_decoder.d_model) * embedding).squeeze(1)
|
||||
|
@ -209,7 +205,7 @@ class MultitaskQuestionAnsweringNetwork(nn.Module):
|
|||
oov_to_limited_idx)
|
||||
pred_probs, preds = probs.max(-1)
|
||||
eos_yet = eos_yet | (preds.data == self.field.decoder_stoi['<eos>'])
|
||||
outs[:, t] = Variable(preds.data.cpu().apply_(self.map_to_full), volatile=True)
|
||||
outs[:, t] = preds.data.cpu().apply_(self.map_to_full)
|
||||
if eos_yet.all():
|
||||
break
|
||||
return outs
|
||||
|
@ -224,14 +220,14 @@ class CoattentiveLayer(nn.Module):
|
|||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, context, question, context_padding, question_padding):
|
||||
context_padding = torch.cat([context.data.new(context.size(0)).long().fill_(0).unsqueeze(1).long()==1, context_padding], 1)
|
||||
question_padding = torch.cat([question.data.new(question.size(0)).long().fill_(0).unsqueeze(1)==1, question_padding], 1)
|
||||
context_padding = torch.cat([context.new_zeros((context.size(0), 1), dtype=torch.long)==1, context_padding], 1)
|
||||
question_padding = torch.cat([question.data.new_zeros((question.size(0), 1), dtype=torch.long)==1, question_padding], 1)
|
||||
|
||||
context_sentinel = self.embed_sentinel(Variable(context.data.new(context.size(0)).long().fill_(0)))
|
||||
context = torch.cat([context_sentinel.unsqueeze(1), self.dropout(context)], 1) # batch_size x (context_length + 1) x features
|
||||
context_sentinel = self.embed_sentinel(context.new_zeros((context.size(0), 1), dtype=torch.long))
|
||||
context = torch.cat([context_sentinel, self.dropout(context)], 1) # batch_size x (context_length + 1) x features
|
||||
|
||||
question_sentinel = self.embed_sentinel(Variable(question.data.new(question.size(0)).long().fill_(1)))
|
||||
question = torch.cat([question_sentinel.unsqueeze(1), question], 1) # batch_size x (question_length + 1) x features
|
||||
question_sentinel = self.embed_sentinel(question.new_ones((question.size(0), 1), dtype=torch.long))
|
||||
question = torch.cat([question_sentinel, question], 1) # batch_size x (question_length + 1) x features
|
||||
question = torch.tanh(self.proj(question)) # batch_size x (question_length + 1) x features
|
||||
|
||||
affinity = context.bmm(question.transpose(1,2)) # batch_size x (context_length + 1) x (question_length + 1)
|
||||
|
@ -311,7 +307,7 @@ class DualPtrRNNDecoder(nn.Module):
|
|||
def make_init_output(self, context):
|
||||
batch_size = context.size(0)
|
||||
h_size = (batch_size, self.d_hid)
|
||||
return Variable(context.data.new(*h_size).zero_(), requires_grad=False)
|
||||
return context.new_zeros(h_size)
|
||||
|
||||
def package_outputs(self, outputs):
|
||||
outputs = torch.stack(outputs, dim=1)
|
||||
|
|
|
@ -247,7 +247,6 @@ def get_best(args):
|
|||
if __name__ == '__main__':
|
||||
args = get_args()
|
||||
print(f'Arguments:\n{pformat(vars(args))}')
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = f'{args.gpus}'
|
||||
|
||||
np.random.seed(args.seed)
|
||||
random.seed(args.seed)
|
||||
|
|
|
@ -3,7 +3,6 @@ from copy import deepcopy
|
|||
from collections import Counter, OrderedDict
|
||||
import six
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
from tqdm import tqdm
|
||||
|
||||
from .dataset import Dataset
|
||||
|
@ -201,7 +200,6 @@ class Field(RawField):
|
|||
tensor = torch.LongTensor(batch)
|
||||
if device != -1:
|
||||
tensor = tensor.cuda(device)
|
||||
tensor = Variable(tensor, volatile=not train)
|
||||
else:
|
||||
padded = self.pad(batch)
|
||||
tensor = self.numericalize(padded, device=device, train=train, **kwargs)
|
||||
|
@ -370,8 +368,8 @@ class Field(RawField):
|
|||
# if self.include_lengths:
|
||||
# lengths = lengths.cuda(device)
|
||||
if self.include_lengths:
|
||||
return Variable(arr, volatile=not train), lengths, Variable(lim_arr, volatile=not train)
|
||||
return Variable(arr, volatile=not train)
|
||||
return arr, lengths, lim_arr
|
||||
return arr
|
||||
|
||||
|
||||
class ReversibleField(Field):
|
||||
|
|
Loading…
Reference in New Issue