Merge pull request Theano#3275 from abergeron/fix_buildbot

Fix buildbot
tfjgeorge · Aug 14, 2015 · 079181c · 079181c
2 parents 15c90dd + 5381bbd
commit 079181c
Show file tree

Hide file tree

Showing 12 changed files with 91 additions and 55 deletions.
diff --git a/theano/compile/pfunc.py b/theano/compile/pfunc.py
@@ -101,7 +101,8 @@ def clone_v_get_shared_updates(v, copy_inputs_over):
                     # Do not use default_update if a "real" update was
                     # provided
                     if v not in update_d:
-                        v_update = v.type.filter_variable(v.default_update)
+                        v_update = v.type.filter_variable(v.default_update,
+                                                          allow_convert=False)
                         if v_update.type != v.type:
                             raise TypeError(
                                 'an update must have the same type as '
@@ -197,7 +198,8 @@ def clone_inputs(i):
 
         # filter_variable ensure smooth conversion of cpu/gpu Types
         try:
-            update_val = store_into.type.filter_variable(update_val)
+            update_val = store_into.type.filter_variable(update_val,
+                                                         allow_convert=False)
         except TypeError:
             err_msg = ('An update must have the same type as the'
                        ' original shared variable (shared_var=%s,'

diff --git a/theano/gof/null_type.py b/theano/gof/null_type.py
@@ -21,7 +21,7 @@ def __init__(self, why_null='(no explanation given)'):
     def filter(self, data, strict=False, allow_downcast=None):
         raise ValueError("No values may be assigned to a NullType")
 
-    def filter_variable(self, other):
+    def filter_variable(self, other, allow_convert=True):
         raise ValueError("No values may be assigned to a NullType")
 
     def may_share_memory(a, b):

diff --git a/theano/gof/opt.py b/theano/gof/opt.py
@@ -1288,7 +1288,6 @@ def build(pattern, u):
                     return pattern.clone()
             p = self.out_pattern
             new = build(p, u)
-            # print "PatternSub matched:", new
             return [new]
         else:
             return False

diff --git a/theano/gof/type.py b/theano/gof/type.py
@@ -263,7 +263,7 @@ def filter(self, data, strict=False, allow_downcast=None):
 
     # def filter_inplace(value, storage, strict=False, allow_downcast=None)
 
-    def filter_variable(self, other):
+    def filter_variable(self, other, allow_convert=True):
         """Convert a symbolic variable into this Type, if compatible.
 
         For the moment, the only Types compatible with one another are
@@ -277,6 +277,11 @@ def filter_variable(self, other):
             # a Constant of the appropriate Type.
             other = self.Constant(type=self, data=other)
 
+        if other.type != self and allow_convert:
+            other2 = self.convert_variable(other)
+            if other2 is not None:
+                return other2
+
         if other.type != self:
             raise TypeError(
                 'Cannot convert Type %(othertype)s '
@@ -285,6 +290,24 @@ def filter_variable(self, other):
                 % dict(othertype=other.type, other=other, self=self))
         return other
 
+    def convert_variable(self, var):
+        """Patch variable so that its type will match self, if possible.
+
+        If the variable can't be converted, this should return None.
+
+        The conversion can only happen if the following implication is
+        true for all possible `val`.
+
+          self.is_valid_value(val) => var.type.is_valid_value(val)
+
+        For the majority of types this means that you can only have
+        non-broadcastable dimensions become broadcastable and not the
+        inverse.
+
+        The default is to not convert anything which is always safe.
+        """
+        return None
+
     def is_valid_value(self, a):
         """Required: Return True for any python object `a` that would be a
 legal value for a Variable of this Type"""
@@ -404,23 +427,6 @@ class Type(object2, PureType, CLinkerType):
     do type-checking in pattern-based optimizations.
 
     """
-    def convert_variable(self, var):
-        """Patch variable so that its type will match self, if possible.
-
-        If the variable can't be converted, this should return None.
-
-        The conversion can only happen if the following implication is
-        true for all possible `val`.
-
-          self.is_valid_value(val) => var.type.is_valid_value(val)
-
-        For the majority of types this means that you can only have
-        non-broadcastable dimensions become broadcastable and not the
-        inverse.
-
-        The default is to not convert anything which is always safe.
-        """
-        return None
 
 
 class SingletonType(Type):

diff --git a/theano/sandbox/cuda/tests/test_memory.py b/theano/sandbox/cuda/tests/test_memory.py
@@ -138,7 +138,7 @@ def test_memory_lazy():
     # When dtype is float64, only the shared is on the gpu and it is transferd
     # to the cpu for computation. So no extra alloc after compilation.
     # more_alloc1 if after the first compilation, more_alloc2 after the second.
-    for dtype, more_alloc1 in [("float32", 2),
+    for dtype, more_alloc1 in [("float32", 1),
                                ("float64", 0)]:
         print(dtype)
         test_params = np.asarray(np.random.randn(np.prod(shapes)), dtype)

diff --git a/theano/sandbox/cuda/type.py b/theano/sandbox/cuda/type.py
@@ -119,7 +119,7 @@ def filter_inplace(self, data, old_data, strict=False,
                         % (self, self.dtype, data, converted_data, self.dtype),
                         data)
 
-    def filter_variable(self, other):
+    def filter_variable(self, other, allow_convert=True):
         """Convert a Variable into a CudaNdarrayType, if compatible.
 
         This Variable should either already be a CudaNdarrayType, or be
@@ -146,10 +146,17 @@ def filter_variable(self, other):
             raise TypeError('Incompatible number of dimensions.'
                             ' Expected %d, got %d.' % (self.ndim, other.ndim))
         if other.type.broadcastable != self.broadcastable:
-            raise TypeError('Incompatible broadcastable dimensions.'
-                            ' Expected %s, got %s.' %
-                            (str(other.type.broadcastable),
-                             str(self.broadcastable)))
+            if allow_convert:
+                type2 = other.type.clone(broadcastable=self.broadcastable)
+                other2 = type2.convert_variable(other)
+            else:
+                other2 = None
+            if other2 is None:
+                raise TypeError('Incompatible broadcastable dimensions.'
+                                ' Expected %s, got %s.' %
+                                (str(other.type.broadcastable),
+                                 str(self.broadcastable)))
+            other = other2
 
         return theano.sandbox.cuda.basic_ops.GpuFromHost()(other)
 

diff --git a/theano/sandbox/gpuarray/type.py b/theano/sandbox/gpuarray/type.py
@@ -89,7 +89,7 @@ def filter(self, data, strict=False, allow_downcast=None):
                                 " dimension.", shp, self.broadcastable)
         return data
 
-    def filter_variable(self, other):
+    def filter_variable(self, other, allow_convert=True):
         if hasattr(other, '_as_GpuArrayVariable'):
             other = other._as_GpuArrayVariable()
 
@@ -108,10 +108,17 @@ def filter_variable(self, other):
             raise TypeError('Incompatible number of dimensions.'
                             ' Expected %d, got %d.' % (self.ndim, other.ndim))
         if other.type.broadcastable != self.broadcastable:
-            raise TypeError('Incompatible broadcastable dimensions.'
-                            ' Expected %s, got %s.' %
-                            (str(other.type.broadcastable),
-                             str(self.broadcastable)))
+            if allow_convert:
+                type2 = other.type.clone(broadcastable=self.broadcastable)
+                other2 = type2.convert_variable(other)
+            else:
+                other2 = None
+            if other2 is None:
+                raise TypeError('Incompatible broadcastable dimensions.'
+                                ' Expected %s, got %s.' %
+                                (str(other.type.broadcastable),
+                                 str(self.broadcastable)))
+            other = other2
 
         return theano.sandbox.gpuarray.basic_ops.gpu_from_host(other)
 

diff --git a/theano/scan_module/scan_utils.py b/theano/scan_module/scan_utils.py
@@ -400,12 +400,7 @@ def expand(tensor_var, size):
     shapes = [tensor_var.shape[x] for x in xrange(tensor_var.ndim)]
     zeros_shape = [size + shapes[0]] + shapes[1:]
     empty = tensor.zeros(zeros_shape,
-                               dtype=tensor_var.dtype)
-
-    # Make sure to reuse the broadcast pattern of the original tensor for
-    # every dimension but the first one.
-    broadcastable = (False,) + tensor_var.broadcastable[1:]
-    empty = tensor.patternbroadcast(empty, broadcastable)
+                         dtype=tensor_var.dtype)
 
     return tensor.set_subtensor(empty[:shapes[0]], tensor_var)
 

diff --git a/theano/scan_module/tests/test_scan.py b/theano/scan_module/tests/test_scan.py
@@ -43,6 +43,11 @@
 else:
     mode_with_opt = theano.compile.mode.get_default_mode()
 mode_with_gpu = mode_with_opt.including('gpu', 'scan')
+if theano.config.mode in ('DEBUG_MODE', 'DebugMode'):
+    mode_nodebug = theano.compile.mode.get_mode('FAST_RUN')
+else:
+    mode_nodebug = mode_with_opt
+mode_with_gpu_nodebug = mode_nodebug.including('gpu', 'scan')
 
 
 type_eps = {'float64': 1e-7,
@@ -1772,8 +1777,16 @@ def reset_rng_fn(fn, *args):
         analytic_grad = reset_rng_grad_fn(v_u, v_x0, vW_in)
         utt.assert_allclose(analytic_grad[0][:2], numpy.zeros((2, 2)))
 
-    @attr('slow')
     def test_grad_multiple_outs_some_disconnected(self):
+        final_cost = self._grad_mout_helper(100, mode_nodebug)
+        assert final_cost < 0.02
+
+    def test_grad_multiple_outs_some_disconnected_2(self):
+        # This is to try the network in DEBUG_MODE, but not fully
+        # train it since that would take 3 hours
+        self._grad_mout_helper(1, None)
+
+    def _grad_mout_helper(self, n_iters, mode):
         # Created on Tue Oct 07 13:28:51 2014
         # @author: vaneetke
         rng = numpy.random.RandomState(utt.fetch_seed())
@@ -1815,7 +1828,8 @@ def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
             sequences=dict(input=x),
             # corresponds to the return type of one_step
             outputs_info=[dict(initial=h0, taps=[-2, -1]), None],
-            non_sequences=[W_ih, W_hh, b_h, W_ho, b_o])
+            non_sequences=[W_ih, W_hh, b_h, W_ho, b_o],
+            mode=mode)
 
         # target values
         t = tensor.matrix()
@@ -1830,8 +1844,6 @@ def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
         gparams = theano.grad(cost, params)
         updates = [(param, param - gparam * learning_rate)
                    for param, gparam in zip(params, gparams)]
-        mode = copy.copy(theano.compile.get_default_mode())
-        mode.check_py_code = False
         learn_rnn_fn = theano.function(inputs=[x, t],
                                        outputs=cost,
                                        updates=updates,
@@ -1846,10 +1858,10 @@ def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
         s_v = numpy.sin(x_v)
         t_v = numpy.roll(s_v, -1)[:-1]
         s_v = s_v[:-1]
-        for i in xrange(100):
+        for i in xrange(n_iters):
             cost = learn_rnn_fn(s_v, t_v)
         pred = eval_rnn_fn(s_v)
-        assert cost < 0.02
+        return cost
 
     def test_draw_as_input_to_scan(self):
         trng = theano.tensor.shared_randomstreams.RandomStreams(123)
@@ -4602,7 +4614,7 @@ def scan_l(baseline, last_step):
 
         l1_out, _ = theano.scan(scan_l, sequences=[l1_base],
                                 outputs_info=[zero_output],
-                                mode=self.mode_with_gpu)
+                                mode=self.mode_with_gpu_nodebug)
 
         l2_out = tensor.dot(l1_out, W)
 
@@ -4613,7 +4625,7 @@ def scan_l(baseline, last_step):
 
         # Compile the theano function
         feval_backprop = theano.function([xin, yout], cost, updates=updates,
-                                         mode=self.mode_with_gpu)
+                                         mode=self.mode_with_gpu_nodebug)
 
         # Validate that the PushOutScanOutput optimization has been applied
         # by checking the number of outputs of the grad Scan node in the
@@ -4676,7 +4688,8 @@ class ScanGpuTests and runs them using the cuda backend. It also adds
     def __init__(self, *args, **kwargs):
         from theano.sandbox import cuda
         self.gpu_backend = cuda
-        self.mode_with_gpu = mode_with_opt.including('gpu', 'scan')
+        self.mode_with_gpu = mode_with_gpu
+        self.mode_with_gpu_nodebug = mode_with_gpu_nodebug
         super(T_Scan_Cuda, self).__init__(*args, **kwargs)
 
     def setUp(self):
@@ -4737,6 +4750,7 @@ def __init__(self, *args, **kwargs):
         from theano.sandbox import gpuarray
         self.gpu_backend = gpuarray
         self.mode_with_gpu = mode_with_opt.including('gpuarray', 'scan')
+        self.mode_with_gpu_nodebug = mode_nodebug.including('gpuarray', 'scan')
         super(T_Scan_Gpuarray, self).__init__(*args, **kwargs)
 
     def setUp(self):

diff --git a/theano/tensor/basic.py b/theano/tensor/basic.py
@@ -712,11 +712,9 @@ def get_scalar_constant_value(orig_v, elemwise=True,
                     ndim = grandparent.type.ndim
                     if grandparent.owner and isinstance(grandparent.owner.op,
                                                         Rebroadcast):
-                        l = []
-                        for idx, (b1, b2) in enumerate(
-                                zip(grandparent.owner.inputs[0].broadcastable,
-                                    gp_broadcastable)):
-                            l.append(b1 or b2)
+                        ggp_broadcastable = grandparent.owner.inputs[0].broadcastable
+                        l = [b1 or b2 for b1, b2 in zip(ggp_broadcastable,
+                                                        gp_broadcastable)]
                         gp_broadcastable = tuple(l)
 
                     assert ndim == len(gp_broadcastable)

diff --git a/theano/tensor/elemwise.py b/theano/tensor/elemwise.py
@@ -1467,6 +1467,8 @@ def _c_all(self, node, name, inames, onames, sub):
         odtype = output.type.dtype_specs()[1]
 
         if hasattr(self, 'acc_dtype') and self.acc_dtype is not None:
+            if self.acc_dtype == 'float16':
+                raise theano.gof.utils.MethodNotDefined("no c_code for float16")
             acc_type = TensorType(
                 broadcastable=node.outputs[0].broadcastable,
                 dtype=self.acc_dtype)
@@ -1632,7 +1634,7 @@ def c_headers(self):
         return ['<vector>', '<algorithm>']
 
     def c_code_cache_version_apply(self, node):
-        version = [5]  # the version corresponding to the c code in this Op
+        version = (6,)  # the version corresponding to the c code in this Op
 
         # now we insert versions for the ops on which we depend...
         scalar_node = Apply(

diff --git a/theano/tensor/type.py b/theano/tensor/type.py
@@ -190,7 +190,7 @@ def filter(self, data, strict=False, allow_downcast=None):
             raise ValueError("non-finite elements not allowed")
         return data
 
-    def filter_variable(self, other):
+    def filter_variable(self, other, allow_convert=True):
         """Convert a symbolic Variable into a TensorType, if compatible.
 
         For the moment, only a TensorType or CudaNdarrayType will be
@@ -208,6 +208,12 @@ def filter_variable(self, other):
         if other.type == self:
             return other
 
+        if allow_convert:
+            # Attempt safe broadcast conversion.
+            other2 = self.convert_variable(other)
+            if other2 is not None and other2.type == self:
+                return other2
+
         raise TypeError(
             'Cannot convert Type %(othertype)s '
             '(of Variable %(other)s) into Type %(self)s. '