Skip to content

Commit

Permalink
Fix crash in new cudnn R2 conv code in full mode
Browse files Browse the repository at this point in the history
  • Loading branch information
nouiz authored and abergeron committed Dec 19, 2014
1 parent 20be5a2 commit b2d72c7
Showing 1 changed file with 34 additions and 9 deletions.
43 changes: 34 additions & 9 deletions theano/sandbox/cuda/dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import theano
from theano import Apply, gof, tensor
from theano.gradient import DisconnectedType
from theano.gof import Optimizer, local_optimizer
from theano.gof.type import CDataType, Generic
from theano.compat import PY3
Expand Down Expand Up @@ -373,6 +374,7 @@ def c_set_tensor4d(self, *arg):

def c_code(self, node, name, inputs, outputs, sub):
desc = inputs[2]
height, width = inputs[3:] or (-1, -1)
out, = outputs

checks = []
Expand Down Expand Up @@ -425,11 +427,24 @@ def c_code(self, node, name, inputs, outputs, sub):
out_dims[3] = dd[6];
}
#else
cudnnGetConvolution2dForwardOutputDim(
if (!%(full)d){
cudnnGetConvolution2dForwardOutputDim(
%(desc)s,
input%(id)d,
kerns%(id)d,
&out_dims[0], &out_dims[1],&out_dims[2], &out_dims[3]);
}else{
int padH, padW, dH, dW, upscalex, upscaley;
cudnnConvolutionMode_t mode;
cudnnGetConvolution2dDescriptor(
%(desc)s, &padW, &padH, &dH, &dW,
&upscalex, &upscaley, &mode);
out_dims[0] = CudaNdarray_HOST_DIMS(%(input2)s)[0];
out_dims[1] = CudaNdarray_HOST_DIMS(%(input1)s)[1];
out_dims[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(%(input1)s)[2] - 1) * dH + CudaNdarray_HOST_DIMS(%(input2)s)[2] - 2*padH;
out_dims[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(%(input1)s)[3] - 1) * dW + CudaNdarray_HOST_DIMS(%(input2)s)[3] - 2*padW;
}
#endif
if (CudaNdarray_prep_output(&%(out)s, 4, out_dims) != 0) {
%(fail)s
Expand Down Expand Up @@ -475,7 +490,7 @@ def c_code(self, node, name, inputs, outputs, sub):
}
#endif
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
PyErr_Format(PyExc_RuntimeError, "%(cls)s, error doing operation: %%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
Expand All @@ -485,10 +500,13 @@ def c_code(self, node, name, inputs, outputs, sub):
input1_desc=self.conv_inputs[0]+name,
input2_desc=self.conv_inputs[1]+name,
output_desc=self.conv_output+name,
height=height, width=width,
cls=self.__class__.__name__,
full=int("GpuDnnConvGradI" == self.__class__.__name__),
method=self.conv_op, path=self.path_flag, algo=self.algo)

def c_code_cache_version(self):
return (8, version())
return (9, version())


class GpuDnnConv(GpuDnnConvBase):
Expand Down Expand Up @@ -533,7 +551,7 @@ def grad(self, inp, grads):

top = gpu_contiguous(top)

d_img = GpuDnnConvGradI()(kerns, top, desc)
d_img = GpuDnnConvGradI()(kerns, top, desc, img.shape[-2:])
d_kerns = GpuDnnConvGradW()(img, top, desc)

return d_img, d_kerns, theano.gradient.DisconnectedType()()
Expand Down Expand Up @@ -619,14 +637,14 @@ def grad(self, inp, grads):

d_kerns = GpuDnnConvGradW()(img, top, desc)
d_top = GpuDnnConv()(img, kerns, desc)

return d_kerns, d_top, theano.gradient.DisconnectedType()()
d_height_width = (DisconnectedType()(),) * 2 if len(inp) == 5 else ()
return (d_kerns, d_top, DisconnectedType()()) + d_height_width

def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [0]]

def make_node(self, kern, topgrad, desc):
def make_node(self, kern, topgrad, desc, shape=None):
kern = as_cuda_ndarray_variable(kern)
topgrad = as_cuda_ndarray_variable(topgrad)
if kern.type.ndim != 4:
Expand All @@ -637,11 +655,18 @@ def make_node(self, kern, topgrad, desc):
if not isinstance(desc.type, CDataType) \
or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
raise TypeError('desc must be cudnnConvolutionDescriptor_t')

if shape is None:
if not (desc.owner and
isinstance(desc.owner.op, GpuDnnConvDesc) and
desc.owner.op.subsample == (1, 1)):
raise ValueError('shape must be given if subsample != (1, 1)')
height_width = []
else:
height_width = [shape[0], shape[1]]
broadcastable = [topgrad.type.broadcastable[0],
kern.type.broadcastable[1],
False, False]
return Apply(self, [kern, topgrad, desc],
return Apply(self, [kern, topgrad, desc] + height_width,
[CudaNdarrayType(broadcastable)()])


Expand Down

0 comments on commit b2d72c7

Please sign in to comment.