davek44
diff --git a/‎WORKSPACE
+1-1 b/‎WORKSPACE
+1-1
diff --git a/‎tensorflow/contrib/layers/python/layers/feature_column.py
+81 b/‎tensorflow/contrib/layers/python/layers/feature_column.py
+81
diff --git a/‎tensorflow/contrib/learn/BUILD
+12 b/‎tensorflow/contrib/learn/BUILD
+12
diff --git a/‎tensorflow/contrib/learn/python/learn/dataframe/__init__.py
+3-2 b/‎tensorflow/contrib/learn/python/learn/dataframe/__init__.py
+3-2
diff --git a/‎tensorflow/contrib/learn/python/learn/dataframe/dataframe.py
-43 b/‎tensorflow/contrib/learn/python/learn/dataframe/dataframe.py
-43
diff --git a/‎tensorflow/contrib/learn/python/learn/dataframe/estimator_utils.py
+172 b/‎tensorflow/contrib/learn/python/learn/dataframe/estimator_utils.py
+172
@@ -364,7 +364,7 @@ new_git_repository(
   name = "paper_styles",
   build_file = "bower.BUILD",
   remote = "https://github.com/polymerelements/paper-styles.git",
-  tag = "v1.1.1",
+  tag = "v1.1.4",
 )
 
 new_git_repository(
 
@@ -1257,6 +1257,87 @@ def crossed_column(columns, hash_bucket_size, combiner="sum"):
   return _CrossedColumn(columns, hash_bucket_size, combiner=combiner)
 
 
+class DataFrameColumn(_FeatureColumn,
+                      collections.namedtuple("DataFrameColumn",
+                                             ["name", "series"])):
+  """Represents a feature column produced from a `DataFrame`.
+
+  Instances of this class are immutable.  A `DataFrame` column may be dense or
+  sparse, and may have any shape, with the constraint that dimension 0 is
+  batch_size.
+
+  Args:
+    name: a name for this column
+    series: a `Series` to be wrapped, which has already had its base features
+      substituted with `PredefinedSeries`.
+  """
+
+  def __new__(cls, name, series):
+    return super(DataFrameColumn, cls).__new__(cls, name, series)
+
+  @property
+  def config(self):
+    return self.series.required_base_features()
+
+  @property
+  def key(self):
+    """Returns a string which will be used as a key when we do sorting."""
+    return self.name
+
+  def insert_transformed_feature(self, columns_to_tensors):
+    # The cache must already contain mappings from the expected base feature
+    # names to Tensors.
+
+    # Passing columns_to_tensors as the cache here means that multiple outputs
+    # of the transform will be cached, keyed by the repr of their associated
+    # TransformedSeries.
+    # The specific requested output ends up in columns_to_tensors twice: once
+    # keyed by the TransformedSeries repr, and once keyed by this
+    # DataFrameColumn instance.
+    columns_to_tensors[self] = self.series.build(columns_to_tensors)
+
+  # pylint: disable=unused-argument
+  def to_dnn_input_layer(self,
+                         input_tensor,
+                         weight_collections=None,
+                         trainable=True):
+    return input_tensor
+
+  # TODO(soergel): This mirrors RealValuedColumn for now, but should become
+  # better abstracted with less code duplication when we add other kinds.
+  def to_weighted_sum(self,
+                      input_tensor,
+                      num_outputs=1,
+                      weight_collections=None,
+                      trainable=True):
+    def _weight(name):
+      return variable_scope.get_variable(
+          name,
+          shape=[self.dimension, num_outputs],
+          initializer=array_ops.zeros_initializer,
+          collections=_add_variable_collection(weight_collections))
+
+    if self.name:
+      with variable_scope.variable_op_scope([input_tensor], None, self.name):
+        weight = _weight("weight")
+    else:
+      # Old behavior to support a subset of old checkpoints.
+      weight = _weight("_weight")
+
+    # The _RealValuedColumn has the shape of [batch_size, column.dimension].
+    log_odds_by_dim = math_ops.matmul(input_tensor, weight)
+    return log_odds_by_dim, [weight]
+
+  def __eq__(self, other):
+    if isinstance(other, self.__class__):
+      return self.__dict__ == other.__dict__
+    else:
+      return False
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+
 def _get_feature_config(feature_column):
   """Returns configuration for the base feature defined in feature_column."""
   if not isinstance(feature_column, _FeatureColumn):
 
@@ -88,6 +88,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "estimator_utils_test",
+    size = "medium",
+    srcs = ["python/learn/tests/dataframe/estimator_utils_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":learn",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
 py_test(
     name = "series_test",
     size = "small",
 
@@ -20,6 +20,7 @@
 from __future__ import print_function
 
 from tensorflow.contrib.learn.python.learn.dataframe.dataframe import DataFrame
+from tensorflow.contrib.learn.python.learn.dataframe.series import PredefinedSeries
 from tensorflow.contrib.learn.python.learn.dataframe.series import Series
 from tensorflow.contrib.learn.python.learn.dataframe.series import TransformedSeries
 from tensorflow.contrib.learn.python.learn.dataframe.tensorflow_dataframe import TensorFlowDataFrame
@@ -45,5 +46,5 @@
 for ct_def in _cmp.COMPARISON_TRANSFORMS:
   _cmp.register_comparison_ops(*ct_def)
 
-__all__ = ['DataFrame', 'Series', 'TransformedSeries', 'TensorFlowDataFrame',
-           'parameter', 'Transform']
+__all__ = ['DataFrame', 'Series', 'PredefinedSeries', 'TransformedSeries',
+           'TensorFlowDataFrame', 'parameter', 'Transform']
@@ -124,46 +124,3 @@ def build(self):
     cache = {}
     tensors = {name: c.build(cache) for name, c in self._columns.items()}
     return tensors
-
-  def to_input_fn(self, feature_keys=None, target_keys=None):
-    """Build an input_fn suitable for use with Estimator.
-
-    Args:
-      feature_keys: the names of columns to be used as features.  If None, all
-        columns except those in target_keys are used.
-      target_keys: the names of columns to be used as targets.  None is
-        acceptable for unsupervised learning.
-
-    Returns:
-      A function that returns a pair of dicts (features, targets), each mapping
-        string names to Tensors.
-
-    Raises:
-      ValueError: when the feature and target key sets are non-disjoint
-    """
-    if target_keys is None:
-      target_keys = []
-
-    if feature_keys is None:
-      feature_keys = self.columns() - set(target_keys)
-    else:
-      in_both = set(feature_keys) & set(target_keys)
-      if in_both:
-        raise ValueError(
-            "Columns cannot be used for both features and targets: %s" %
-            ", ".join(in_both))
-
-    def input_fn():
-      # It's important to build all the tensors together in one DataFrame.
-      # If we did df.select() for both key sets and then build those, the two
-      # resulting DataFrames would be shuffled independently.
-      tensors = self.build()
-
-      # Note that (for now at least) we provide our columns to Estimator keyed
-      # by strings, so they are base features as far as Estimator is concerned.
-      # TODO(soergel): reconcile with FeatureColumn keys, Transformer etc.
-      features = {key: tensors[key] for key in feature_keys}
-      targets = {key: tensors[key] for key in target_keys}
-      return features, targets
-
-    return input_fn
@@ -0,0 +1,172 @@
+# pylint: disable=g-bad-file-header
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions relating DataFrames to Estimators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.layers import feature_column
+from tensorflow.contrib.learn.python.learn.dataframe import series as ss
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import parsing_ops
+
+
+def _to_feature_spec(tensor, default_value=None):
+  if isinstance(tensor, ops.SparseTensor):
+    return parsing_ops.VarLenFeature(dtype=tensor.dtype)
+  else:
+    return parsing_ops.FixedLenFeature(shape=tensor.get_shape(),
+                                       dtype=tensor.dtype,
+                                       default_value=default_value)
+
+
+def _infer_feature_specs(dataframe, keys_with_defaults):
+  with ops.Graph().as_default():
+    tensors = dataframe.build()
+    feature_specs = {
+        name: _to_feature_spec(tensor, keys_with_defaults.get(name))
+        for name, tensor in tensors.items()}
+  return feature_specs
+
+
+def _build_alternate_universe(
+    dataframe, base_input_keys_with_defaults, feature_keys):
+  """Create an alternate universe assuming that the base series are defined.
+
+  The resulting graph will be used with an `input_fn` that provides exactly
+  those features.
+
+  Args:
+    dataframe: the underlying `DataFrame`
+    base_input_keys_with_defaults: a `dict` from the names of columns to
+      considered base features to their default values.
+    feature_keys: the names of columns to be used as features (including base
+      features and derived features).
+
+  Returns:
+    A `dict` mapping names to rebuilt `Series`.
+  """
+  feature_specs = _infer_feature_specs(dataframe, base_input_keys_with_defaults)
+
+  alternate_universe_map = {
+      dataframe[name]: ss.PredefinedSeries(name, feature_specs[name])
+      for name in base_input_keys_with_defaults.keys()
+  }
+
+  def _in_alternate_universe(orig_series):
+    # pylint: disable=protected-access
+    # Map Series in the original DataFrame to series rebuilt assuming base_keys.
+    try:
+      return alternate_universe_map[orig_series]
+    except KeyError:
+      rebuilt_inputs = []
+      for i in orig_series._input_series:
+        rebuilt_inputs.append(_in_alternate_universe(i))
+      rebuilt_series = ss.TransformedSeries(rebuilt_inputs,
+                                            orig_series._transform,
+                                            orig_series._output_name)
+      alternate_universe_map[orig_series] = rebuilt_series
+      return rebuilt_series
+
+  orig_feature_series_dict = {fk: dataframe[fk] for fk in feature_keys}
+  new_feature_series_dict = ({name: _in_alternate_universe(x)
+                              for name, x in orig_feature_series_dict.items()})
+  return new_feature_series_dict, feature_specs
+
+
+def to_feature_columns_and_input_fn(dataframe,
+                                    base_input_keys_with_defaults,
+                                    feature_keys,
+                                    target_keys=None):
+  """Build a list of FeatureColumns and an input_fn for use with Estimator.
+
+  Args:
+    dataframe: the underlying dataframe
+    base_input_keys_with_defaults: a dict from the names of columns to be
+      considered base features to their default values.  These columns will be
+      fed via input_fn.
+    feature_keys: the names of columns from which to generate FeatureColumns.
+      These may include base features and/or derived features.
+    target_keys: the names of columns to be used as targets.  None is
+      acceptable for unsupervised learning.
+
+  Returns:
+    A tuple of two elements:
+    * A list of `FeatureColumn`s to be used when constructing an Estimator
+    * An input_fn, i.e. a function that returns a pair of dicts
+      (features, targets), each mapping string names to Tensors.
+      the feature dict provides mappings for all the base columns required
+      by the FeatureColumns.
+
+  Raises:
+    ValueError: when the feature and target key sets are non-disjoint, or the
+      base_input and target sets are non-disjoint.
+  """
+  if feature_keys is None or not feature_keys:
+    raise ValueError("feature_keys must be specified.")
+
+  if target_keys is None:
+    target_keys = []
+
+  base_input_keys = base_input_keys_with_defaults.keys()
+
+  in_two = (set(feature_keys) & set(target_keys)) or (set(base_input_keys) &
+                                                      set(target_keys))
+  if in_two:
+    raise ValueError("Columns cannot be used for both features and targets: %s"
+                     % ", ".join(in_two))
+
+  # Obtain the feature series in the alternate universe
+  new_feature_series_dict, feature_specs = _build_alternate_universe(
+      dataframe, base_input_keys_with_defaults, feature_keys)
+
+  # TODO(soergel): Allow non-real, non-dense DataFrameColumns
+  for key in new_feature_series_dict.keys():
+    spec = feature_specs[key]
+    if not (
+        isinstance(spec, parsing_ops.FixedLenFeature)
+        and (spec.dtype.is_integer or spec.dtype.is_floating)):
+      raise ValueError("For now, only real dense columns can be passed from "
+                       "DataFrame to Estimator.  %s is %s of %s" % (
+                           (key, type(spec).__name__, spec.dtype)))
+
+  # Make FeatureColumns from these
+  feature_columns = [feature_column.DataFrameColumn(name, s)
+                     for name, s in new_feature_series_dict.items()]
+
+  # Make a new DataFrame with only the Series needed for input_fn.
+  # This is important to avoid starting queue feeders that won't be used.
+  limited_dataframe = dataframe.select_columns(
+      list(base_input_keys) + list(target_keys))
+
+  # Build an input_fn suitable for use with Estimator.
+  def input_fn():
+    # It's important to build all the tensors together in one DataFrame.
+    # If we did df.select() for both key sets and then build those, the two
+    # resulting DataFrames would be shuffled independently.
+    tensors = limited_dataframe.build()
+
+    base_input_features = {key: tensors[key] for key in base_input_keys}
+    targets = {key: tensors[key] for key in target_keys}
+
+    # TODO(soergel): Remove this special case when b/30367437 is fixed.
+    if len(targets) == 1:
+      targets = list(targets.values())[0]
+
+    return base_input_features, targets
+
+  return feature_columns, input_fn
Original file line number	Diff line number	Diff line change
`@@ -364,7 +364,7 @@ new_git_repository(`
`364`	`364`	`name = "paper_styles",`
`365`	`365`	`build_file = "bower.BUILD",`
`366`	`366`	`remote = "https://github.com/polymerelements/paper-styles.git",`
`367`		`- tag = "v1.1.1",`
	`367`	`+ tag = "v1.1.4",`
`368`	`368`	`)`
`369`	`369`
`370`	`370`	`new_git_repository(`