From 9b90a00b90e632cfa81f01c868d8d46037c9fc73 Mon Sep 17 00:00:00 2001
From: Dave Moore <davmre@google.com>
Date: Fri, 12 Jul 2019 09:24:37 -0700
Subject: [PATCH] Make `sample_size` argument mandatory in
 monte_carlo_variational_loss.

The current behavior is actually broken -- it defaults to None, but None is not a valid sample size. This was an oversight in the previous CL.

PiperOrigin-RevId: 257815557
---
 tensorflow_probability/python/vi/csiszar_divergence.py | 8 +++++---
 tensorflow_probability/python/vi/optimization.py       | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/tensorflow_probability/python/vi/csiszar_divergence.py b/tensorflow_probability/python/vi/csiszar_divergence.py
index 65b658f8ab..ceb560c1f0 100644
--- a/tensorflow_probability/python/vi/csiszar_divergence.py
+++ b/tensorflow_probability/python/vi/csiszar_divergence.py
@@ -790,8 +790,8 @@ def symmetrized_csiszar_function(logu, csiszar_function, name=None):
 
 def monte_carlo_variational_loss(target_log_prob_fn,
                                  surrogate_posterior,
+                                 sample_size=1,
                                  discrepancy_fn=kl_reverse,
-                                 sample_size=None,
                                  use_reparametrization=None,
                                  seed=None,
                                  name=None):
@@ -831,12 +831,14 @@ def monte_carlo_variational_loss(target_log_prob_fn,
       this is to use `tfp.util.DeferredTensor` to represent any parameters
       defined as transformations of unconstrained variables, so that the
       transformations execute at runtime instead of at distribution creation.
+    sample_size: Integer scalar number of Monte Carlo samples used to
+      approximate the variational divergence. Larger values may stabilize
+      the optimization, but at higher cost per step in time and memory.
+      Default value: `1`.
     discrepancy_fn: Python `callable` representing a Csiszar `f` function in
       in log-space. That is, `discrepancy_fn(log(u)) = f(u)`, where `f` is
       convex in `u`.
       Default value: `tfp.vi.kl_reverse`.
-    sample_size: Integer scalar number of Monte Carlo samples used to
-      approximate the variational divergence.
     use_reparametrization: Python `bool`. When `None` (the default),
       automatically set to:
       `surrogate_posterior.reparameterization_type ==
diff --git a/tensorflow_probability/python/vi/optimization.py b/tensorflow_probability/python/vi/optimization.py
index da397f3ea3..b83d4c9f06 100644
--- a/tensorflow_probability/python/vi/optimization.py
+++ b/tensorflow_probability/python/vi/optimization.py
@@ -40,7 +40,7 @@ def fit_surrogate_posterior(target_log_prob_fn,
                             num_steps,
                             trace_fn=_trace_loss,
                             variational_loss_fn=_reparameterized_elbo,
-                            sample_size=10,
+                            sample_size=1,
                             trainable_variables=None,
                             seed=None,
                             name='fit_surrogate_posterior'):
@@ -108,7 +108,7 @@ def fit_surrogate_posterior(target_log_prob_fn,
     sample_size: Python `int` number of Monte Carlo samples to use
       in estimating the variational divergence. Larger values may stabilize
       the optimization, but at higher cost per step in time and memory.
-      Default value: 10.
+      Default value: `1`.
     trainable_variables: Optional list of `tf.Variable` instances to optimize
       with respect to. If `None`, defaults to the set of all variables accessed
       during the computation of the variational bound, i.e., those defining