start impl

memimo · Feb 9, 2016 · 2176d21 · 2176d21
1 parent e9c56c3
commit 2176d21
Show file tree

Hide file tree

Showing 2 changed files with 112 additions and 0 deletions.
diff --git a/theano/sandbox/multinomial.py b/theano/sandbox/multinomial.py
@@ -192,6 +192,72 @@ def perform(self, node, ins, outs):
                             z[0][n, m] += 1
                             break
 
+class WeightedSelectionFromUniform(Op):
+    """
+    Converts samples from a uniform into sample from a multinomial.
+
+    """
+
+    __props__ = ("odtype",)
+
+    def __init__(self, odtype):
+        self.odtype = odtype
+
+    def __str__(self):
+        return '%s{%s}' % (self.__class__.__name__, self.odtype)
+
+    def __setstate__(self, dct):
+        self.__dict__.update(dct)
+        try:
+            self.odtype
+        except AttributeError:
+            self.odtype = 'auto'
+
+    def make_node(self, pvals, unis, n=1):
+        pvals = T.as_tensor_variable(pvals)
+        unis = T.as_tensor_variable(unis)
+        if pvals.ndim != 2:
+            raise NotImplementedError('pvals ndim should be 2', pvals.ndim)
+        if unis.ndim != 1:
+            raise NotImplementedError('unis ndim should be 1', unis.ndim)
+        if self.odtype == 'auto':
+            odtype = pvals.dtype
+        else:
+            odtype = self.odtype
+        out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
+        return Apply(self, [pvals, unis, as_scalar(n)], [out])
+
+    def grad(self, ins, outgrads):
+        pvals, unis, n = ins
+        (gz,) = outgrads
+        return [T.zeros_like(x) for x in ins]
+
+    def perform(self, node, ins, outs):
+        (pvals, unis, n_samples) = ins
+        (z,) = outs
+
+        if unis.shape[0] != pvals.shape[0] * n_samples:
+            raise ValueError("unis.shape[0] != pvals.shape[0] * n_samples",
+                             unis.shape[0], pvals.shape[0], n_samples)
+        if z[0] is None or numpy.any(z[0].shape != [pvals.shape[0], n_samples]):
+            z[0] = numpy.zeros((pvals.shape[0], n_samples), dtype=node.outputs[0].dtype)
+
+        nb_multi = pvals.shape[0]
+        nb_outcomes = pvals.shape[1]
+
+        # For each multinomial, loop over each possible outcome,
+        # and set selected pval to 0 after being selected
+        for c in range(n_samples):
+            for n in range(nb_multi):
+                cummul = 0
+                unis_n = unis[c*nb_multi+n]
+                for m in range(nb_outcomes):
+                    cummul += pvals[n, m]
+                    if (cummul > unis_n):
+                        z[0][n, c] = m
+                        # set to zero so that it's not selected again
+                        pvals[n, m] = 0.
+
 
 class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
     """

diff --git a/theano/sandbox/rng_mrg.py b/theano/sandbox/rng_mrg.py
@@ -1363,6 +1363,52 @@ def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64',
             raise NotImplementedError(("MRG_RandomStreams.multinomial only"
                                        " implemented for pvals.ndim = 2"))
 
+    def weighted_selection(self, size=None, n=1, pvals=None, ndim=None, dtype='int64',
+                           nstreams=None):
+        """
+        Sample `n` times (`n` needs to be in [1, m], where m is pvals.shape[1], default 1)
+        *WITHOUT replacement* from a multinomial distribution defined by probabilities pvals.
+        
+        Example : WRITEME
+        
+        Notes
+        -----
+        -`size` and `ndim` are only there keep the same signature as other
+        uniform, binomial, normal, etc.
+        TODO : adapt multinomial to take that into account
+
+        -Does not do any value checking on pvals, i.e. there is no
+        check that the elements are non-negative, less than 1, or
+        sum to 1. passing pvals = [[-2., 2.]] will result in
+        sampling [[0, 0]]
+
+        """
+        if pvals is None:
+            raise TypeError("You have to specify pvals")
+        pvals = as_tensor_variable(pvals)
+
+        if n > pvals.shape[1]:
+            raise ValueError("Cannot sample without replacement n samples bigger "
+                             "than the size of the distribution.")
+        if size is not None:
+            raise ValueError("Provided a size argument to "
+                             "MRG_RandomStreams.weighted_selection, which does not use "
+                             "the size argument.")
+        if ndim is not None:
+            raise ValueError("Provided an ndim argument to "
+                             "MRG_RandomStreams.weighted_selection, which does not use "
+                             "the ndim argument.")
+        if pvals.ndim == 2:
+            # size = [pvals.shape[0], as_tensor_variable(n)]
+            size = pvals[:,0].shape * n
+            unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
+            op = multinomial.WeightedSelectionFromUniform(dtype)
+            n_samples = as_tensor_variable(n)
+            return op(pvals, unis, n_samples)
+        else:
+            raise NotImplementedError(("MRG_RandomStreams.weighted_selection only"
+                                       " implemented for pvals.ndim = 2"))
+
     def normal(self, size, avg=0.0, std=1.0, ndim=None,
                dtype=None, nstreams=None):
         """