Remove variables and add device management inside Model

Jules-Diez · May 10, 2018 · e105399 · e105399
1 parent 130b653
commit e105399
Show file tree

Hide file tree

Showing 10 changed files with 211 additions and 136 deletions.
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
@@ -14,5 +14,4 @@ These utils functions only support the following basic Python types: tuple,
 list and dict.
 
 .. autofunction:: torch_to_numpy
-.. autofunction:: tensors_to_variables
-.. autofunction:: variables_to_tensors
+.. autofunction:: torch_apply
diff --git a/pytoune/framework/model.py b/pytoune/framework/model.py
@@ -2,11 +2,10 @@
 import numpy as np
 
 import torch
-from torch.autograd import Variable
 from torch.utils.data import DataLoader, TensorDataset
 
 from .callbacks import CallbackList, ProgressionCallback
-from pytoune import torch_to_numpy, tensors_to_variables, variables_to_tensors
+from pytoune import torch_to_numpy, torch_to
 
 class Model:
     """
@@ -132,6 +131,7 @@ def __init__(self, model, optimizer, loss_function, metrics=[]):
         self.loss_function = loss_function
         self.metrics = metrics
         self.metrics_names = [metric.__name__ for metric in metrics]
+        self.device = None
 
     def fit(self, x, y, validation_x=None, validation_y=None, batch_size=32, epochs=1000, steps_per_epoch=None, validation_steps=None, initial_epoch=1, verbose=True, callbacks=[]):
         """
@@ -315,27 +315,28 @@ def fit_generator(self, train_generator, valid_generator=None, epochs=1000, step
 
             self.model.train(True)
             train_iterator = iter(train_generator)
-            for step in range(1, steps_per_epoch + 1):
-                callback_list.on_batch_begin(step, {})
+            with torch.enable_grad():
+                for step in range(1, steps_per_epoch + 1):
+                    callback_list.on_batch_begin(step, {})
 
-                self.model.zero_grad()
+                    self.model.zero_grad()
 
-                x, y = next(train_iterator)
-                loss_tensor, metrics_tensors = self._compute_loss_and_metrics(x, y)
+                    x, y = next(train_iterator)
+                    loss_tensor, metrics_tensors = self._compute_loss_and_metrics(x, y)
 
-                loss_tensor.backward()
-                callback_list.on_backward_end(step)
-                self.optimizer.step()
+                    loss_tensor.backward()
+                    callback_list.on_backward_end(step)
+                    self.optimizer.step()
 
-                loss, metrics = self._loss_and_metrics_tensors_to_numpy(loss_tensor, metrics_tensors)
-                size = self._get_batch_size(x, y)
-                losses_sum += loss * size
-                metrics_sum += metrics * size
-                sizes_sum += size
+                    loss, metrics = self._loss_and_metrics_tensors_to_numpy(loss_tensor, metrics_tensors)
+                    size = self._get_batch_size(x, y)
+                    losses_sum += loss * size
+                    metrics_sum += metrics * size
+                    sizes_sum += size
 
-                metrics_dict = dict(zip(self.metrics_names, metrics))
-                batch_logs = {'batch': step, 'size': size, 'loss': loss, **metrics_dict}
-                callback_list.on_batch_end(step, batch_logs)
+                    metrics_dict = dict(zip(self.metrics_names, metrics))
+                    batch_logs = {'batch': step, 'size': size, 'loss': loss, **metrics_dict}
+                    callback_list.on_batch_end(step, batch_logs)
 
             val_dict = {}
             if valid_generator is not None:
@@ -362,7 +363,7 @@ def fit_generator(self, train_generator, valid_generator=None, epochs=1000, step
     def predict(self, x, batch_size=32):
         """
         Returns the predictions of the network given a dataset ``x``, where the
-        torch variables are converted into numpy arrays.
+        tensors are converted into numpy arrays.
 
         Args:
             x (Union[Tensor, np.ndarray])): Dataset for which to predict.
@@ -381,7 +382,7 @@ def predict(self, x, batch_size=32):
     def predict_generator(self, generator, steps=None):
         """
         Returns the predictions of the network given a batch of samples ``x``,
-        where the torch variables are converted into numpy arrays.
+        where the tensors are converted into numpy arrays.
 
         generator: Generator-like object for the dataset. The generator must
             yield a tuple a batch of samples.
@@ -402,18 +403,18 @@ def predict_generator(self, generator, steps=None):
             entire dataset)
 
         Returns:
-            List of the predictions of each batch with torch variables
+            List of the predictions of each batch with tensors
             converted into numpy arrays.
         """
         self.model.eval()
         if steps is None:
             steps = len(generator)
         pred_y = []
         iterator = iter(generator)
-        for _ in range(steps):
-            x = next(iterator)
-            x = tensors_to_variables(x, volatile=True)
-            pred_y.append(torch_to_numpy(self.model(x)))
+        with torch.no_grad():
+            for _ in range(steps):
+                x = next(iterator)
+                pred_y.append(torch_to_numpy(self.model(x)))
         return pred_y
 
 
@@ -489,8 +490,8 @@ def evaluate_generator(self, generator, steps=None, return_pred=False):
             ``metrics`` is a numpy array of size ``n``, where ``n`` is the
             number of metrics. If ``return_pred`` is true, then this method
             returns a tuple ``(loss, metrics, pred_y)`` where ``pred_y`` is the
-            list of the predictions of each batch with torch variables converted
-            into numpy arrays.
+            list of the predictions of each batch with tensors converted into
+            numpy arrays.
         """
         self.model.eval()
         if steps is None:
@@ -506,20 +507,21 @@ def _validate(self, valid_generator, validation_steps, return_pred=False):
             pred_list = []
 
         valid_iterator = iter(valid_generator)
-        for step in range(validation_steps):
-            x, y = next(valid_iterator)
-            if return_pred:
-                loss_tensor, metrics_tensors, pred_y = self._compute_loss_and_metrics(x, y, return_pred=True)
-                loss, metrics, pred_y = self._loss_and_metrics_tensors_to_numpy(loss_tensor, metrics_tensors, pred_y)
-                pred_list.append(pred_y)
-            else:
-                loss_tensor, metrics_tensors = self._compute_loss_and_metrics(x, y, return_pred=False)
-                loss, metrics = self._loss_and_metrics_tensors_to_numpy(loss_tensor, metrics_tensors)
+        with torch.no_grad():
+            for step in range(validation_steps):
+                x, y = next(valid_iterator)
+                if return_pred:
+                    loss_tensor, metrics_tensors, pred_y = self._compute_loss_and_metrics(x, y, return_pred=True)
+                    loss, metrics, pred_y = self._loss_and_metrics_tensors_to_numpy(loss_tensor, metrics_tensors, pred_y)
+                    pred_list.append(pred_y)
+                else:
+                    loss_tensor, metrics_tensors = self._compute_loss_and_metrics(x, y, return_pred=False)
+                    loss, metrics = self._loss_and_metrics_tensors_to_numpy(loss_tensor, metrics_tensors)
 
-            size = self._get_batch_size(x, y)
-            losses_sum += loss * size
-            metrics_sum += metrics * size
-            sizes_sum += size
+                size = self._get_batch_size(x, y)
+                losses_sum += loss * size
+                metrics_sum += metrics * size
+                sizes_sum += size
 
         loss_mean = losses_sum / sizes_sum
         metrics_mean = metrics_sum / sizes_sum
@@ -529,8 +531,9 @@ def _validate(self, valid_generator, validation_steps, return_pred=False):
         return ret
 
     def _compute_loss_and_metrics(self, x, y, return_pred=False):
-        x = tensors_to_variables(x, volatile=not self.model.training)
-        y = tensors_to_variables(y, volatile=not self.model.training)
+        if self.device is not None:
+            x = torch_to(x, self.device)
+            y = torch_to(y, self.device)
         pred_y = self.model(x)
         loss_tensor = self.loss_function(pred_y, y)
         metrics_tensors = self._compute_metrics(pred_y, y)
@@ -552,9 +555,9 @@ def _loss_and_metrics_tensors_to_numpy(self, loss_tensor, metrics_tensors, pred_
         return ret
 
     def _get_batch_size(self, x, y):
-        if torch.is_tensor(x) or isinstance(x, Variable):
+        if torch.is_tensor(x):
             return len(x)
-        elif torch.is_tensor(y) or isinstance(y, Variable):
+        elif torch.is_tensor(y):
             return len(y)
         else:
             warnings.warn("When 'x' or 'y' are not tensors, the batch size is set to 1 and, thus, the computed loss and metrics at the end of each epoch is the mean of the batches' losses and metrics.")
@@ -611,23 +614,50 @@ def set_weights(self, weights):
     def cuda(self, *args, **kwargs):
         """
         Tranfers the network on the GPU. The arguments are passed to the
-        ``torch.nn.Module.cuda()`` method. Notice that the method
-        ``torch.Tensor.cuda()`` must be called separately on the tensors given
-        to the network.
+        ``torch.nn.Module.cuda()`` method. Notice that the device is saved so
+        that the batches can send to the right device before passing it to the
+        network.
+
+        This also saves the device so that the batches can send to the right
+        device before passing it to the network.
 
         Returns:
-            The return of ``torch.nn.Module.cuda()``.
+            `self`.
         """
-        return self.model.cuda(*args, **kwargs)
+        self.model.cuda(*args, **kwargs)
+        self.device = None
+        for _, p in zip(range(1), self.model.parameters()):
+            self.device = p.device
+        return self
 
     def cpu(self, *args, **kwargs):
         """
         Tranfers the network on the CPU. The arguments are passed to the
-        ``torch.nn.Module.cpu()`` method. Notice that the method
-        ``torch.Tensor.cpu()`` must be called separately on the tensors given
-        to the network.
+        ``torch.nn.Module.cpu()`` method. Notice that the device is saved so
+        that the batches can send to the right device before passing it to the
+        network.
+
+        Returns:
+            `self`.
+        """
+        ret = self.model.cpu(*args, **kwargs)
+        self.device = None
+        for _, p in zip(range(1), self.model.parameters()):
+            self.device = p.device
+        return self
+
+    def to(self, device):
+        """
+        Tranfers the network on the specified device. The device is saved so
+        that the batches can send to the right device before passing it to the
+        network.
+        
+        Args:
+            device (torch.device): The device to which the network is sent.
 
         Returns:
-            The return of ``torch.nn.Module.cpu()``.
+            `self`.
         """
-        return self.model.cpu(*args, **kwargs)
+        self.device = device
+        self.model.to(self.device)
+        return self
diff --git a/pytoune/utils.py b/pytoune/utils.py
@@ -1,80 +1,47 @@
 import torch
-from torch.autograd import Variable
 
 
 def torch_to_numpy(obj):
     """
-    Convert to numpy arrays all tensors and variables inside a Python object
-    composed of the supported types.
-
-    Args:
-        obj: The Python object to convert.
-
-    Returns:
-        A new Python object with the same structure as `obj` but where the
-        tensors and variables are now numpy arrays. Not supported type are left
-        as reference in the new object.
-    """
-    if isinstance(obj, Variable):
-        obj = obj.data
-    if isinstance(obj, list) or isinstance(obj, tuple):
-        return type(obj)(torch_to_numpy(el) for el in obj)
-    if isinstance(obj, dict):
-        return {k:torch_to_numpy(el) for k,el in obj.items()}
-    if not torch.is_tensor(obj):
-        return obj
-    return obj.cpu().numpy()
-
-def tensors_to_variables(obj, *args, **kwargs):
-    """
-    Convert to variables all tensors inside a Python object composed of the
+    Convert to numpy arrays all tensors inside a Python object composed of the
     supported types.
 
     Args:
         obj: The Python object to convert.
-        *args: The arguments to pass to the Variable constructor.
-        **kwargs: The keyword arguments to pass to the Variable constructor.
 
     Returns:
         A new Python object with the same structure as `obj` but where the
-        tensors are now variables.
+        tensors are now numpy arrays. Not supported type are left as reference
+        in the new object.
 
-    Raises:
-        ValueError: If a not supported type is inside `obj`.
+    See:
+        `pytoune.torch_apply` for supported types.
     """
-    if isinstance(obj, Variable):
-        return obj
-    if torch.is_tensor(obj):
-        return Variable(obj, *args, **kwargs)
-    if isinstance(obj, list) or isinstance(obj, tuple):
-        return type(obj)(tensors_to_variables(el, *args, **kwargs) for el in obj)
-    if isinstance(obj, dict):
-        return {k:tensors_to_variables(el, *args, **kwargs) for k,el in obj.items()}
+    return torch_apply(obj, lambda t: t.cpu().numpy())
 
-    raise ValueError("The type '%s' is not supported by this function." % type(obj).__name__)
+def torch_to(obj, *args, **kargs):
+    return torch_apply(obj, lambda t: t.to(*args, **kargs))
 
-def variables_to_tensors(obj):
+def torch_apply(obj, func):
     """
-    Convert to tensors all variables inside a Python object composed of the
+    Apply a function to all tensors inside a Python object composed of the
     supported types.
 
+    Supported types are: list, tuple and dict.
+
     Args:
         obj: The Python object to convert.
+        func: The function to apply.
 
     Returns:
         A new Python object with the same structure as `obj` but where the
-        variables are now tensors.
-
-    Raises:
-        ValueError: If a not supported type is inside `obj`.
+        tensors have been applied the function `func`. Not supported type are
+        left as reference in the new object.
     """
-    if torch.is_tensor(obj):
-        return obj
-    if isinstance(obj, Variable):
-        return obj.data
     if isinstance(obj, list) or isinstance(obj, tuple):
-        return type(obj)(variables_to_tensors(el) for el in obj)
+        return type(obj)(torch_apply(el, func) for el in obj)
     if isinstance(obj, dict):
-        return {k:variables_to_tensors(el) for k,el in obj.items()}
-
-    raise ValueError("The type '%s' is not supported by this function." % type(obj).__name__)
+        return {k:torch_apply(el, func) for k,el in obj.items()}
+    if not torch.is_tensor(obj):
+        return obj
+    return func(obj)
diff --git a/tests/framework/callbacks/test_best_model_restore.py b/tests/framework/callbacks/test_best_model_restore.py
@@ -8,12 +8,11 @@
 
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 
 def some_data_generator(batch_size):
     while True:
-        x = Variable(torch.rand(batch_size, 1))
-        y = Variable(torch.rand(batch_size, 1))
+        x = torch.rand(batch_size, 1)
+        y = torch.rand(batch_size, 1)
         yield x, y
 
 class BestModelRestoreTest(TestCase):

diff --git a/tests/framework/callbacks/test_checkpoint.py b/tests/framework/callbacks/test_checkpoint.py
@@ -10,12 +10,11 @@
 
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 
 def some_data_generator(batch_size):
     while True:
-        x = Variable(torch.rand(batch_size, 1))
-        y = Variable(torch.rand(batch_size, 1))
+        x = torch.rand(batch_size, 1)
+        y = torch.rand(batch_size, 1)
         yield x, y
 
 class ModelCheckpointTest(TestCase):

diff --git a/tests/framework/callbacks/test_earlystopping.py b/tests/framework/callbacks/test_earlystopping.py
@@ -7,12 +7,11 @@
 
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 
 def some_data_generator(batch_size):
     while True:
-        x = Variable(torch.rand(batch_size, 1))
-        y = Variable(torch.rand(batch_size, 1))
+        x = torch.rand(batch_size, 1)
+        y = torch.rand(batch_size, 1)
         yield x, y
 
 class EarlyStoppingTest(TestCase):

diff --git a/tests/framework/callbacks/test_logger.py b/tests/framework/callbacks/test_logger.py
@@ -11,7 +11,6 @@
 
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 
 def some_data_generator(batch_size):
     while True: