Merge branch 'master' into fix-libhdfs-path

kingwenChen · Nov 28, 2016 · f806cfe · f806cfe
2 parents d8a9016 + 5657d0d
commit f806cfe
Show file tree

Hide file tree

Showing 366 changed files with 18,750 additions and 4,976 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -2,6 +2,10 @@
 
 ## Breaking Changes to the API
 
+* Division and modulus operators (/, //, %) now match Python (flooring)
+  semantics. tf.div is renamed to tf.division. New operators tf.truncatediv and
+  tf.truncatemod are available for achieving the previous C++ (truncation)
+  division/modulus semantics.
 * `BusAdjacency` enum replaced with a protocol buffer `DeviceLocality`.  PCI bus
 indexing now starts from 1 instead of 0, and bus_id==0 is used where previously
 BUS_ANY was used.

diff --git a/configure b/configure
@@ -130,11 +130,11 @@ done
 ## Set up Cuda-related environment settings
 
 while [ "$TF_NEED_CUDA" == "" ]; do
-  read -p "Do you wish to build TensorFlow with GPU support? [y/N] " INPUT
+  read -p "Do you wish to build TensorFlow with CUDA support? [y/N] " INPUT
   case $INPUT in
-    [Yy]* ) echo "GPU support will be enabled for TensorFlow"; TF_NEED_CUDA=1;;
-    [Nn]* ) echo "No GPU support will be enabled for TensorFlow"; TF_NEED_CUDA=0;;
-    "" ) echo "No GPU support will be enabled for TensorFlow"; TF_NEED_CUDA=0;;
+    [Yy]* ) echo "CUDA support will be enabled for TensorFlow"; TF_NEED_CUDA=1;;
+    [Nn]* ) echo "No CUDA support will be enabled for TensorFlow"; TF_NEED_CUDA=0;;
+    "" ) echo "No CUDA support will be enabled for TensorFlow"; TF_NEED_CUDA=0;;
     * ) echo "Invalid selection: " $INPUT;;
   esac
 done
@@ -178,7 +178,7 @@ OSNAME=`uname -s`
 while true; do
   # Configure the Cuda SDK version to use.
   if [ -z "$TF_CUDA_VERSION" ]; then
-    read -p "Please specify the Cuda SDK version you want to use, e.g. 7.0. [Leave empty to use system default]: " TF_CUDA_VERSION
+    read -p "Please specify the CUDA SDK version you want to use, e.g. 7.0. [Leave empty to use system default]: " TF_CUDA_VERSION
   fi
 
   fromuser=""

diff --git a/grpc.BUILD b/grpc.BUILD
@@ -1559,6 +1559,7 @@ cc_library(
     "include/grpc++/impl/codegen/create_auth_context.h",
     "include/grpc++/impl/codegen/grpc_library.h",
     "include/grpc++/impl/codegen/method_handler_impl.h",
+    "include/grpc++/impl/codegen/proto_utils.h",
     "include/grpc++/impl/codegen/rpc_method.h",
     "include/grpc++/impl/codegen/rpc_service_method.h",
     "include/grpc++/impl/codegen/security/auth_context.h",
@@ -1764,10 +1765,12 @@ cc_library(
     "include/grpc++/impl/codegen/completion_queue.h",
     "include/grpc++/impl/codegen/completion_queue_tag.h",
     "include/grpc++/impl/codegen/config.h",
+    "include/grpc++/impl/codegen/config_protobuf.h",        
     "include/grpc++/impl/codegen/core_codegen_interface.h",
     "include/grpc++/impl/codegen/create_auth_context.h",
     "include/grpc++/impl/codegen/grpc_library.h",
     "include/grpc++/impl/codegen/method_handler_impl.h",
+    "include/grpc++/impl/codegen/proto_utils.h",
     "include/grpc++/impl/codegen/rpc_method.h",
     "include/grpc++/impl/codegen/rpc_service_method.h",
     "include/grpc++/impl/codegen/security/auth_context.h",

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
@@ -111,6 +111,7 @@ filegroup(
         "//tensorflow/contrib/layers/kernels:all_files",
         "//tensorflow/contrib/learn:all_files",
         "//tensorflow/contrib/learn/python/learn/datasets:all_files",
+        "//tensorflow/contrib/linalg:all_files",
         "//tensorflow/contrib/linear_optimizer:all_files",
         "//tensorflow/contrib/lookup:all_files",
         "//tensorflow/contrib/losses:all_files",
@@ -140,6 +141,7 @@ filegroup(
         "//tensorflow/core/distributed_runtime:all_files",
         "//tensorflow/core/distributed_runtime/rpc:all_files",
         "//tensorflow/core/kernels:all_files",
+        "//tensorflow/core/kernels/cloud:all_files",
         "//tensorflow/core/kernels/hexagon:all_files",
         "//tensorflow/core/ops/compat:all_files",
         "//tensorflow/core/platform/cloud:all_files",

diff --git a/tensorflow/c/checkpoint_reader.h b/tensorflow/c/checkpoint_reader.h
@@ -43,7 +43,7 @@ class CheckpointReader {
   bool HasTensor(const string& name) const;
   const string DebugString() const;
 
-  // Returns a map from variable namaes to its shape.  Slices of a partitioned
+  // Returns a map from variable names to its shape.  Slices of a partitioned
   // tensor are combined into a single entry.
   const TensorSliceReader::VarToShapeMap& GetVariableToShapeMap() const;
 

diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
@@ -35,7 +35,7 @@ auto* load_attempt_count = monitoring::Counter<2>::New(
     "The number of times a SavedModel was successfully loaded.");
 auto* load_latency = monitoring::Counter<1>::New(
     "/tensorflow/cc/saved_model/load_latency", "model_path",
-    "Latency in microseconds for SavedModels that were succesfully loaded.");
+    "Latency in microseconds for SavedModels that were successfully loaded.");
 constexpr char kLoadAttemptFail[] = "fail";
 constexpr char kLoadAttemptSuccess[] = "success";
 

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
@@ -17,6 +17,7 @@ py_library(
         "//tensorflow/contrib/copy_graph:copy_graph_py",
         "//tensorflow/contrib/crf:crf_py",
         "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_py",
+        "//tensorflow/contrib/deprecated:deprecated_py",
         "//tensorflow/contrib/distributions:distributions_py",
         "//tensorflow/contrib/factorization:factorization_py",
         "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
@@ -27,6 +28,7 @@ py_library(
         "//tensorflow/contrib/labeled_tensor",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
+        "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/contrib/linear_optimizer:sdca_ops_py",
         "//tensorflow/contrib/lookup:lookup_py",
         "//tensorflow/contrib/losses:losses_py",

diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
@@ -23,6 +23,7 @@
 from tensorflow.contrib import copy_graph
 from tensorflow.contrib import crf
 from tensorflow.contrib import cudnn_rnn
+from tensorflow.contrib import deprecated
 from tensorflow.contrib import distributions
 from tensorflow.contrib import factorization
 from tensorflow.contrib import framework
@@ -32,6 +33,7 @@
 from tensorflow.contrib import labeled_tensor
 from tensorflow.contrib import layers
 from tensorflow.contrib import learn
+from tensorflow.contrib import linalg
 from tensorflow.contrib import linear_optimizer
 from tensorflow.contrib import lookup
 from tensorflow.contrib import losses

diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_gradient_estimators_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_gradient_estimators_test.py
@@ -18,13 +18,39 @@
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import tensorflow as tf
 
 st = tf.contrib.bayesflow.stochastic_tensor
 sge = tf.contrib.bayesflow.stochastic_gradient_estimators
 dists = tf.contrib.distributions
 
 
+def _vimco(loss):
+  """Python implementation of VIMCO."""
+  n = loss.shape[0]
+  log_loss = np.log(loss)
+  geometric_mean = []
+  for j in range(n):
+    geometric_mean.append(
+        np.exp(np.mean([log_loss[i, :] for i in range(n) if i != j], 0)))
+  geometric_mean = np.array(geometric_mean)
+
+  learning_signal = []
+  for j in range(n):
+    learning_signal.append(
+        np.sum([loss[i, :] for i in range(n) if i != j], 0))
+  learning_signal = np.array(learning_signal)
+
+  local_learning_signal = np.log(1/n * (learning_signal + geometric_mean))
+
+  # log_mean - local_learning_signal
+  log_mean = np.log(np.mean(loss, 0))
+  advantage = log_mean - local_learning_signal
+
+  return advantage
+
+
 class StochasticGradientEstimatorsTest(tf.test.TestCase):
 
   def setUp(self):
@@ -97,6 +123,56 @@ def advantage_fn(stoch_tensor, loss):
     self._testScoreFunction(
         sge.get_score_function_with_advantage(advantage_fn), expected)
 
+  def testVIMCOAdvantageFn(self):
+    # simple_loss: (3, 2) with 3 samples, batch size 2
+    simple_loss = np.array(
+        [[1.0, 1.5],
+         [1e-6, 1e4],
+         [2.0, 3.0]])
+    # random_loss: (100, 50, 64) with 100 samples, batch shape (50, 64)
+    random_loss = 100*np.random.rand(100, 50, 64)
+
+    advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=False)
+
+    with self.test_session() as sess:
+      for loss in [simple_loss, random_loss]:
+        expected = _vimco(loss)
+        loss_t = tf.constant(loss, dtype=tf.float32)
+        advantage_t = advantage_fn(None, loss_t)  # ST is not used
+        advantage = sess.run(advantage_t)
+        self.assertEqual(expected.shape, advantage_t.get_shape())
+        self.assertAllClose(expected, advantage, atol=5e-5)
+
+  def testVIMCOAdvantageGradients(self):
+    loss = np.log(
+        [[1.0, 1.5],
+         [1e-6, 1e4],
+         [2.0, 3.0]])
+    advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=True)
+
+    with self.test_session():
+      loss_t = tf.constant(loss, dtype=tf.float64)
+      advantage_t = advantage_fn(None, loss_t)  # ST is not used
+      gradient_error = tf.test.compute_gradient_error(
+          loss_t, loss_t.get_shape().as_list(),
+          advantage_t, advantage_t.get_shape().as_list(),
+          x_init_value=loss)
+      self.assertLess(gradient_error, 1e-3)
+
+  def testVIMCOAdvantageWithSmallProbabilities(self):
+    theta_value = np.random.rand(10, 100000)
+    # Test with float16 dtype to ensure stability even in this extreme case.
+    theta = tf.constant(theta_value, dtype=tf.float16)
+    advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=True)
+
+    with self.test_session() as sess:
+      log_loss = -tf.reduce_sum(theta, [1])
+      advantage_t = advantage_fn(None, log_loss)
+      grad_t = tf.gradients(advantage_t, theta)[0]
+      advantage, grad = sess.run((advantage_t, grad_t))
+      self.assertTrue(np.all(np.isfinite(advantage)))
+      self.assertTrue(np.all(np.isfinite(grad)))
+
   def testScoreFunctionWithMeanBaselineHasUniqueVarScope(self):
     ema_decay = 0.8
     x = st.StochasticTensor(

diff --git a/tensorflow/contrib/bayesflow/python/ops/stochastic_gradient_estimators.py b/tensorflow/contrib/bayesflow/python/ops/stochastic_gradient_estimators.py
@@ -56,6 +56,8 @@
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -194,4 +196,122 @@ def mean_baseline(_, loss):
   return mean_baseline
 
 
+def get_vimco_advantage_fn(have_log_loss=False):
+  """VIMCO (Variational Inference for Monte Carlo Objectives) baseline.
+
+  Implements VIMCO baseline from the article of the same name:
+
+  https://arxiv.org/pdf/1602.06725v2.pdf
+
+  Given a `loss` tensor (containing non-negative probabilities or ratios),
+  calculates the advantage VIMCO advantage via Eq. 9 of the above paper.
+
+  The tensor `loss` should be shaped `[n, ...]`, with rank at least 1.  Here,
+  the first axis is considered the single sampling dimension and `n` must
+  be at least 2.  Specifically, the `StochasticTensor` is assumed to have
+  used the `SampleValue(n)` value type with `n > 1`.
+
+  Args:
+    have_log_loss: Python `Boolean`.  If `True`, the loss is assumed to be the
+      log loss.  If `False` (the default), it is assumed to be a nonnegative
+      probability or probability ratio.
+
+  Returns:
+    Callable baseline function that takes the `StochasticTensor` (unused) and
+    the downstream `loss`, and returns the VIMCO baseline for the loss.
+  """
+  def vimco_advantage_fn(_, loss, name=None):
+    """Internal VIMCO function.
+
+    Args:
+      _: ignored `StochasticTensor`.
+      loss: The loss `Tensor`.
+      name: Python string, the name scope to use.
+
+    Returns:
+      The advantage `Tensor`.
+    """
+    with ops.name_scope(name, "VIMCOAdvantage", values=[loss]):
+      loss = ops.convert_to_tensor(loss)
+      loss_shape = loss.get_shape()
+      loss_num_elements = loss_shape[0].value
+      n = math_ops.cast(
+          loss_num_elements or array_ops.shape(loss)[0], dtype=loss.dtype)
+
+      if have_log_loss:
+        log_loss = loss
+      else:
+        log_loss = math_ops.log(loss)
+
+      # Calculate L_hat, Eq. (4) -- stably
+      log_mean = math_ops.reduce_logsumexp(log_loss, [0]) - math_ops.log(n)
+
+      # expand_dims: Expand shape [a, b, c] to [a, 1, b, c]
+      log_loss_expanded = array_ops.expand_dims(log_loss, [1])
+
+      # divide: log_loss_sub with shape [a, a, b, c], where
+      #
+      #  log_loss_sub[i] = log_loss - log_loss[i]
+      #
+      #       = [ log_loss[j] - log_loss[i] for rows j = 0 ... i - 1     ]
+      #         [ zeros                                                  ]
+      #         [ log_loss[j] - log_loss[i] for rows j = i + 1 ... a - 1 ]
+      #
+      log_loss_sub = log_loss - log_loss_expanded
+
+      # reduce_sum: Sums each row across all the sub[i]'s; result is:
+      #   reduce_sum[j] = (n - 1) * log_loss[j] - (sum_{i != j} loss[i])
+      # divide by (n - 1) to get:
+      #   geometric_reduction[j] =
+      #     log_loss[j] - (sum_{i != j} log_loss[i]) / (n - 1)
+      geometric_reduction = math_ops.reduce_sum(log_loss_sub, [0]) / (n - 1)
+
+      # subtract this from the original log_loss to get the baseline:
+      #   geometric_mean[j] = exp((sum_{i != j} log_loss[i]) / (n - 1))
+      log_geometric_mean = log_loss - geometric_reduction
+
+      ## Equation (9)
+
+      # Calculate sum_{i != j} loss[i] -- via exp(reduce_logsumexp(.))
+      # reduce_logsumexp: log-sum-exp each row across all the
+      # -sub[i]'s, result is:
+      #
+      #  exp(reduce_logsumexp[j]) =
+      #    1 + sum_{i != j} exp(log_loss[i] - log_loss[j])
+      log_local_learning_reduction = math_ops.reduce_logsumexp(
+          -log_loss_sub, [0])
+
+      # convert local_learning_reduction to the sum-exp of the log-sum-exp
+      #  (local_learning_reduction[j] - 1) * exp(log_loss[j])
+      #    = sum_{i != j} exp(log_loss[i])
+      local_learning_log_sum = (
+          _logexpm1(log_local_learning_reduction) + log_loss)
+
+      # Add (logaddexp) the local learning signals (Eq. 9)
+      local_learning_signal = (
+          math_ops.reduce_logsumexp(
+              array_ops.stack((local_learning_log_sum, log_geometric_mean)),
+              [0])
+          - math_ops.log(n))
+
+      advantage = log_mean - local_learning_signal
+
+      return advantage
+
+  return vimco_advantage_fn
+
+
+def _logexpm1(x):
+  """Stably calculate log(exp(x)-1)."""
+  with ops.name_scope("logsumexp1"):
+    eps = np.finfo(x.dtype.as_numpy_dtype).eps
+    # Choose a small offset that makes gradient calculations stable for
+    # float16, float32, and float64.
+    safe_log = lambda y: math_ops.log(y + eps / 1e8)  # For gradient stability
+    return array_ops.where(
+        math_ops.abs(x) < eps,
+        safe_log(x) + x/2 + x*x/24,  # small x approximation to log(expm1(x))
+        safe_log(math_ops.exp(x) - 1))
+
+
 __all__ = make_all(__name__)
diff --git a/tensorflow/contrib/cmake/tf_core_direct_session.cmake b/tensorflow/contrib/cmake/tf_core_direct_session.cmake
@@ -4,8 +4,6 @@
 file(GLOB tf_core_direct_session_srcs
    "${tensorflow_source_dir}/tensorflow/core/common_runtime/direct_session.cc"
    "${tensorflow_source_dir}/tensorflow/core/common_runtime/direct_session.h"
-   "${tensorflow_source_dir}/tensorflow/core/debug/*.h"
-   "${tensorflow_source_dir}/tensorflow/core/debug/*.cc"
 )
 
 file(GLOB_RECURSE tf_core_direct_session_test_srcs
@@ -18,3 +16,5 @@ list(REMOVE_ITEM tf_core_direct_session_srcs ${tf_core_direct_session_test_srcs}
 add_library(tf_core_direct_session OBJECT ${tf_core_direct_session_srcs})
 
 add_dependencies(tf_core_direct_session tf_core_cpu)
+
+add_definitions(-DNOTFDBG)
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -209,11 +209,7 @@ file(GLOB_RECURSE tf_core_framework_test_srcs
     "${tensorflow_source_dir}/tensorflow/core/util/*main.cc"
 )
 
-list(REMOVE_ITEM tf_core_framework_srcs ${tf_core_framework_test_srcs}
-    "${tensorflow_source_dir}/tensorflow/core/util/memmapped_file_system.cc"
-    "${tensorflow_source_dir}/tensorflow/core/util/memmapped_file_system.h"
-    "${tensorflow_source_dir}/tensorflow/core/util/memmapped_file_system_writer.cc"
-)
+list(REMOVE_ITEM tf_core_framework_srcs ${tf_core_framework_test_srcs})
 
 add_library(tf_core_framework OBJECT
     ${tf_core_framework_srcs}