From 2f976aae0acb68941e45bb168d2d7a5ede91dd7d Mon Sep 17 00:00:00 2001 From: Sergey Edunov Date: Tue, 27 Feb 2018 11:41:20 -0800 Subject: [PATCH] Making our code compatible with the latest pytorch (#223) * Making our code compatible with the latest pytorch * revert * torch.nn.utils.clip_grad_norm now returns tensor --- fairseq/criterions/cross_entropy.py | 4 ++-- fairseq/criterions/label_smoothed_cross_entropy.py | 4 ++-- fairseq/distributed_utils.py | 2 +- fairseq/trainer.py | 2 +- fairseq/utils.py | 7 +++++++ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fairseq/criterions/cross_entropy.py b/fairseq/criterions/cross_entropy.py index 85e0195c62..5d35d7f0ba 100644 --- a/fairseq/criterions/cross_entropy.py +++ b/fairseq/criterions/cross_entropy.py @@ -9,7 +9,7 @@ import torch.nn.functional as F from . import FairseqCriterion, register_criterion - +from fairseq import utils @register_criterion('cross_entropy') class CrossEntropyCriterion(FairseqCriterion): @@ -33,7 +33,7 @@ def forward(self, model, sample, reduce=True): reduce=reduce) sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] logging_output = { - 'loss': loss.data[0] if reduce else loss.data, + 'loss': utils.item(loss.data) if reduce else loss.data, 'ntokens': sample['ntokens'], 'sample_size': sample_size, } diff --git a/fairseq/criterions/label_smoothed_cross_entropy.py b/fairseq/criterions/label_smoothed_cross_entropy.py index e36c29624f..ebe698f050 100644 --- a/fairseq/criterions/label_smoothed_cross_entropy.py +++ b/fairseq/criterions/label_smoothed_cross_entropy.py @@ -79,8 +79,8 @@ def forward(self, model, sample, reduce=True): nll_loss = F.nll_loss(lprobs, target, size_average=False, ignore_index=self.padding_idx, reduce=reduce) sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] logging_output = { - 'loss': loss.data[0] if reduce else loss.data, - 'nll_loss': nll_loss.data[0] if reduce else loss.data, + 'loss': utils.item(loss.data) if reduce else loss.data, + 'nll_loss': utils.item(nll_loss.data) if reduce else loss.data, 'ntokens': sample['ntokens'], 'sample_size': sample_size, } diff --git a/fairseq/distributed_utils.py b/fairseq/distributed_utils.py index 75aec3f8b4..8d8b279486 100644 --- a/fairseq/distributed_utils.py +++ b/fairseq/distributed_utils.py @@ -116,7 +116,7 @@ def all_gather_list(data, max_size=4096): if len(enc) >= max_size: raise ValueError('encoded data exceeds max_size: {}'.format(len(enc))) in_buffer[0] = len(enc) - in_buffer[1:len(enc)+1] = torch.ByteTensor(enc) + in_buffer[1:len(enc)+1] = torch.ByteTensor(list(enc)) torch.distributed.all_gather(out_buffers, in_buffer.cuda()) diff --git a/fairseq/trainer.py b/fairseq/trainer.py index b4be2400dc..a68f2473a9 100644 --- a/fairseq/trainer.py +++ b/fairseq/trainer.py @@ -190,7 +190,7 @@ def _backward_and_opt(self, loss, grad_denom): # clip grads if self.args.clip_norm > 0: - grad_norm = torch.nn.utils.clip_grad_norm(self.model.parameters(), self.args.clip_norm) + grad_norm = utils.item(torch.nn.utils.clip_grad_norm(self.model.parameters(), self.args.clip_norm)) else: grad_norm = math.sqrt(sum(p.grad.data.norm()**2 for p in self.model.parameters())) diff --git a/fairseq/utils.py b/fairseq/utils.py index 8215d6cb72..edda6a1c33 100644 --- a/fairseq/utils.py +++ b/fairseq/utils.py @@ -304,3 +304,10 @@ def convert_padding_direction( else: index = torch.remainder(range + num_pads, max_len) return src_tokens.gather(1, index) + +def item(tensor): + if hasattr(tensor, 'item'): + return tensor.item() + if hasattr(tensor, '__getitem__'): + return tensor[0] + return tensor