Support deprecation of volatile Variables in latest PyTorch

senaga · Jan 22, 2018 · 7da4e06 · 7da4e06
1 parent 5637d54
commit 7da4e06
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 14 deletions.
diff --git a/fairseq/multiprocessing_trainer.py b/fairseq/multiprocessing_trainer.py
@@ -227,20 +227,21 @@ def _async_forward(self, rank, device_id, eval=False):
             self.model.train()
             self.optimizer.zero_grad()
 
-        sample_size, logging_output, oom = 0, {}, False
-        if self._sample is not None:
-            try:
-                # calculate loss and sample size
-                self.loss, sample_size, logging_output = self.criterion(self.model, self._sample)
-            except RuntimeError as e:
-                if not eval and 'out of memory' in str(e):
-                    print('| WARNING: ran out of memory on GPU #{}, skipping batch'.format(device_id))
-                    oom = True
-                    self.loss = None
-                    if hasattr(torch.cuda, 'empty_cache'):
-                        torch.cuda.empty_cache()
-                else:
-                    raise e
+        with utils.maybe_no_grad(eval):
+            sample_size, logging_output, oom = 0, {}, False
+            if self._sample is not None:
+                try:
+                    # calculate loss and sample size
+                    self.loss, sample_size, logging_output = self.criterion(self.model, self._sample)
+                except RuntimeError as e:
+                    if not eval and 'out of memory' in str(e):
+                        print('| WARNING: ran out of memory on GPU #{}, skipping batch'.format(device_id))
+                        oom = True
+                        self.loss = None
+                        if hasattr(torch.cuda, 'empty_cache'):
+                            torch.cuda.empty_cache()
+                    else:
+                        raise e
 
         return sample_size, logging_output, oom
 

diff --git a/fairseq/utils.py b/fairseq/utils.py
@@ -6,6 +6,7 @@
 # can be found in the PATENTS file in the same directory.
 #
 
+import contextlib
 import logging
 import os
 import torch
@@ -244,3 +245,10 @@ def rstrip_pad(tensor, pad):
     if strip > 0:
         return tensor[:-strip]
     return tensor
+
+
+def maybe_no_grad(condition):
+    if hasattr(torch, 'no_grad') and condition:
+        return torch.no_grad()
+    # no-op context manager
+    return contextlib.ExitStack()
diff --git a/generate.py b/generate.py
@@ -35,6 +35,8 @@ def main():
     print(args)
 
     use_cuda = torch.cuda.is_available() and not args.cpu
+    if hasattr(torch, 'set_grad_enabled'):
+        torch.set_grad_enabled(False)
 
     # Load dataset
     if args.replace_unk is None: