From 04933363e818bd03aee4c5478be038c8dc18eb72 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 21 Sep 2017 21:01:14 -0700
Subject: [PATCH 01/14] eager: Fix bug in tfe.gradients_function().

The generated function was not always treating function parameters as distinct
tensors. Added test would fail (the calls to np_g() would return [2., 2.]
instead of [1., 1.]) without the corresponding change to backprop.py

PiperOrigin-RevId: 169642367
---
 tensorflow/python/eager/backprop.py      | 38 +++++++++++++++++++++---
 tensorflow/python/eager/backprop_test.py | 24 +++++++++++++++
 tensorflow/python/eager/function_test.py | 13 ++++++++
 tensorflow/python/framework/ops.py       |  8 +++++
 4 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index cb7171dea8f289..a83d02151bc751 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -714,6 +714,38 @@ def decorated(*args, **kwds):
   return decorated
 
 
+def _ensure_unique_tensor_objects(parameter_positions, args):
+  """Make each of the parameter_positions in args a unique ops.Tensor object.
+
+  Ensure that each parameter is treated independently.
+  For example:
+
+  def f(x, y): return x * y
+  g = gradients_function(f)
+  one = tf.constant(1.)
+
+  g(one, one) should return [1., 1.]
+  (even though the two arguments are the same Tensor object).
+
+  Args:
+    parameter_positions: List of indices into args defining the arguments to
+      differentiate against.
+    args: A list of arguments to the function to be differentiated.
+
+  Returns:
+    args, possibly edited in-place.
+  """
+  s = set()
+  for (i, t) in enumerate(args):
+    if i in parameter_positions:
+      tid = ops.tensor_id(t)
+      if tid in s:
+        args[i] = args[i]._dup()  # pylint: disable=protected-access
+      else:
+        s.add(tid)
+  return args
+
+
 def val_and_grad_function(f, params=None):
   """Returns a function that computes f and is derivative w.r.t. params.
 
@@ -778,13 +810,11 @@ def decorated(*args, **kwds):
         ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i]
         for i in range(len(args))
     ]
+    args = _ensure_unique_tensor_objects(parameter_positions, args)
     for i in parameter_positions:
       sources.append(args[i])
       tape.watch(args[i])
     result = f(*args)
-    return result, imperative_grad(
-        result,
-        sources,
-        output_gradients=dy)
+    return result, imperative_grad(result, sources, output_gradients=dy)
 
   return decorated
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 483b28791a7e09..599cf4fdca3057 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -373,6 +373,30 @@ def tfe_conv2d(timage, tkernel, conv2dstrides):
     grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0]
     self.assertAllEqual([[[[2.0]]]], grad.numpy())
 
+  def testSameObjectForMultipleArguments(self):
+
+    def f(x, y):
+      return math_ops.multiply(x, y)
+
+    g = backprop.gradients_function(f)
+
+    def np_g(x, y):
+      dx, dy = g(x, y)
+      return [dx.numpy(), dy.numpy()]
+
+    x = constant_op.constant(1.)
+    self.assertAllEqual([1., 1.], np_g(x, x))
+    x = 1.
+    self.assertAllEqual([1., 1.], np_g(x, x))
+    x = constant_op.constant([[1.]])
+    self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x))
+    x = [[1.]]
+    self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x))
+
+    v = resource_variable_ops.ResourceVariable(
+        initial_value=1., name='testSameObjectForMultipleArguments.Variable')
+    self.assertAllEqual([1., 1.], np_g(v, v))
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 611a1c88cf1225..899b6d59b7096f 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -99,6 +99,19 @@ def g(x):
 
     self.assertAllEqual(2, g(constant_op.constant(2)).numpy())
 
+  def testDefunCallBackpropUsingSameObjectForMultipleArguments(self):
+
+    @function.defun
+    def g(x):
+      return backprop.gradients_function(math_ops.multiply, [0, 1])(x, x)
+
+    def np_g(x):
+      return [d.numpy() for d in g(x)]
+
+    x = constant_op.constant(1.)
+    self.assertAllEqual([1., 1.], np_g(x))
+    self.assertAllEqual([1., 1.], np_g(1.))
+
   def testCallShape(self):
 
     @function.defun
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 1ddeaa0f719543..db9aa1e06173a9 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -578,6 +578,11 @@ def eval(self, feed_dict=None, session=None):
     """
     return _eval_using_default_session(self, feed_dict, self.graph, session)
 
+  def _dup(self):
+    ret = copy.copy(self)
+    ret._id = uid()  # pylint: disable=protected-access
+    return ret
+
 
 def _eager_cast(tensor_handle, src_type_enum, dest_type_enum, ctx):
   """Cast tensor_handle from src_type_enum to dest_type_enum."""
@@ -772,6 +777,9 @@ def grad_fun(dresult):
     return new_tensor
     # pylint: enable=protected-access
 
+  def _dup(self):
+    return self._copy(device_name=self.device)
+
   @property
   def device(self):
     return c_api.TFE_TensorHandleDeviceName(self._handle)

From da2606f4963744f6940f8060ea5ca1f7f18181cc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 21 Sep 2017 21:40:23 -0700
Subject: [PATCH 02/14] Fix typos in the API documentation for
 tensorflow::PendingCounts.

PiperOrigin-RevId: 169644786
---
 tensorflow/core/common_runtime/pending_counts.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h
index 198eb896afc76d..9e39b6b7b93a8e 100644
--- a/tensorflow/core/common_runtime/pending_counts.h
+++ b/tensorflow/core/common_runtime/pending_counts.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-// PendingCounts is internal helper class to keep track of pending and
+// PendingCounts is an internal helper class to keep track of pending and
 // dead counts for nodes, for use in the ExecutorState module.  It
 // holds a map from Handles to various counts for that handle.  This
 // information is needed per frame iteration. The amount of memory
@@ -39,7 +39,7 @@ namespace tensorflow {
 //    }
 //
 // When we actually want to start an iteration we first create a
-// nPendingCounts object and then index into it using the precomputed
+// PendingCounts object and then index into it using the precomputed
 // handles:
 
 //    PendingCounts counts(layout);

From 3a38f48a32f1190520b3647d99752c1d805ffb42 Mon Sep 17 00:00:00 2001
From: Yao Zhang <yaozhang@google.com>
Date: Thu, 21 Sep 2017 21:42:52 -0700
Subject: [PATCH 03/14] Fix memory leak if a node evaluation fails due to a
 divide by zero exception.

PiperOrigin-RevId: 169644909
---
 .../core/grappler/optimizers/constant_folding.cc       | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 6ed5c36b1ff041..88babfdc5f8e9c 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -414,10 +414,16 @@ Status ConstantFolding::EvaluateNode(const NodeDef& node,
 Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
                                             std::vector<NodeDef>* outputs) {
   TensorVector inputs;
-  auto inputs_cleanup = gtl::MakeCleanup([&inputs] {
+  TensorVector output_tensors;
+  auto inputs_cleanup = gtl::MakeCleanup([&inputs, &output_tensors] {
     for (const auto& input : inputs) {
       delete input.tensor;
     }
+    for (const auto& output : output_tensors) {
+      if (output.tensor) {
+        delete output.tensor;
+      }
+    }
   });
 
   for (const auto& input : node.input()) {
@@ -439,7 +445,6 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
     inputs.emplace_back(value);
   }
 
-  TensorVector output_tensors;
   TF_RETURN_IF_ERROR(EvaluateNode(node, inputs, &output_tensors));
   if (output_tensors.empty()) {
     return Status(error::INVALID_ARGUMENT, "Expected at least one output.");
@@ -452,7 +457,6 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
     }
     if (output_tensors[i].tensor) {
       outputs->push_back(CreateNodeDef(node_name, output_tensors[i]));
-      delete output_tensors[i].tensor;
     } else {
       // Create an empty NodeDef to identify dead outputs (e.g. the output of a
       // switch that's not selected by the switch predicate).

From abf90c9e0669039055bc1b0ae656e58d0a031d49 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 21 Sep 2017 21:52:39 -0700
Subject: [PATCH 04/14] Increase tolerance on nn_grad_test.

PiperOrigin-RevId: 169645639
---
 tensorflow/cc/gradients/nn_grad_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index 23545f75ac1d4f..156a070acfb4ca 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -36,7 +36,7 @@ class NNGradTest : public ::testing::Test {
     float max_error;
     TF_ASSERT_OK((ComputeGradientError<float, float, float>(
         scope_, {x}, {x_shape}, {y}, {y_shape}, &max_error)));
-    EXPECT_LT(max_error, 2e-4);
+    EXPECT_LT(max_error, 2.2e-4);
   }
 
   void RunTest(const Output& x, const Tensor& x_init_value, const Output& y,
@@ -44,7 +44,7 @@ class NNGradTest : public ::testing::Test {
     float max_error;
     TF_ASSERT_OK((ComputeGradientError<float, float, float>(
         scope_, x, x_init_value, y, y_shape, &max_error)));
-    EXPECT_LT(max_error, 2e-4);
+    EXPECT_LT(max_error, 2.2e-4);
   }
 
   void RunTest(const OutputList& xs, const std::vector<TensorShape>& x_shapes,
@@ -53,7 +53,7 @@ class NNGradTest : public ::testing::Test {
     float max_error;
     TF_ASSERT_OK((ComputeGradientError<float, float, float>(
         scope_, xs, x_shapes, ys, y_shapes, &max_error)));
-    EXPECT_LT(max_error, 2e-4);
+    EXPECT_LT(max_error, 2.2e-4);
   }
 
   Scope scope_;

From d101b3c4cc9c2ca3e26697fdfbd4225aefe87abc Mon Sep 17 00:00:00 2001
From: Benoit Steiner <bsteiner@google.com>
Date: Fri, 22 Sep 2017 07:06:06 -0700
Subject: [PATCH 05/14] Run constant folding iteratively until it converges.

PiperOrigin-RevId: 169685214
---
 .../grappler/optimizers/constant_folding.cc   | 64 ++++++++++++-------
 .../grappler/optimizers/constant_folding.h    |  6 +-
 .../python/grappler/memory_optimizer_test.py  |  2 +
 3 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 88babfdc5f8e9c..faea843c69bff0 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -466,7 +466,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
   return Status::OK();
 }
 
-Status ConstantFolding::FoldNode(NodeDef* node) {
+Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
   if (IsMerge(*node)) {
     // Merge nodes are special, in the sense that they execute as soon as one of
     // their input is ready. We can therefore fold a merge node iff it has at
@@ -515,7 +515,7 @@ Status ConstantFolding::FoldNode(NodeDef* node) {
                             "already present in the graph"));
       }
 
-      NodeDef* const_out = added_graph_.add_node();
+      NodeDef* const_out = output_graph->add_node();
       *const_out = *input_node;
       const_out->set_name(const_out_name);
       const_out->set_device(node->device());
@@ -523,7 +523,7 @@ Status ConstantFolding::FoldNode(NodeDef* node) {
       node_map_->AddNode(const_out->name(), const_out);
       node_map_->AddOutput(node->name(), const_out->name());
 
-      NodeDef* const_index = added_graph_.add_node();
+      NodeDef* const_index = output_graph->add_node();
       const_index->set_op("Const");
       Tensor index(DT_INT32, TensorShape({}));
       index.flat<int32>()(0) = input_index;
@@ -612,7 +612,7 @@ Status ConstantFolding::FoldNode(NodeDef* node) {
         return errors::AlreadyExists(strings::StrCat(
             const_node->name(), "already present in the graph"));
       }
-      NodeDef* added_node = added_graph_.add_node();
+      NodeDef* added_node = output_graph->add_node();
       *added_node = *const_node;
       added_node->set_device(node->device());
       node_map_->AddNode(added_node->name(), added_node);
@@ -683,7 +683,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) {
     }
     // We need to record a copy of output nodes before FoldNode() modifies it.
     std::set<NodeDef*> outputs = node_map_->GetOutputs(node->name());
-    Status s = FoldNode(node);
+    Status s = FoldNode(node, output);
     processed_nodes.insert(node->name());
     if (!s.ok()) {
       VLOG(1) << "Failed to fold node " << node->name() << ": " << s;
@@ -696,14 +696,19 @@ Status ConstantFolding::FoldGraph(GraphDef* output) {
     }
   }
 
-  // Build the graph after constant folding.
-  for (const auto& node : added_graph_.node()) {
+  // Delete the newly created nodes that don't feed anything.
+  int last = output->node_size() - 1;
+  for (int i = output->node_size() - 1; i >= 0; --i) {
+    const NodeDef& node = output->node(i);
     auto outputs = node_map_->GetOutputs(node.name());
-    if (!outputs.empty()) {
-      auto added_node = output->add_node();
-      *added_node = node;
+    if (outputs.empty()) {
+      output->mutable_node()->SwapElements(i, last);
+      last--;
     }
   }
+  output->mutable_node()->DeleteSubrange(last + 1,
+                                         output->node_size() - last - 1);
+
   for (const auto& node : graph_.node()) {
     // If no fetch nodes is provided, we conservatively
     // keep all nodes in the original graph in case users need to fetch
@@ -847,11 +852,11 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
   return Status::OK();
 }
 
-Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
-                                 GraphDef* output) {
-  graph_ = item.graph;
+Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
+                                            const GrapplerItem& item,
+                                            GraphDef* output) {
   node_map_.reset(new NodeMap(&graph_));
-  nodes_to_preserve_ = item.NodesToPreserve();
+  nodes_whitelist_.clear();
   // Fold fetch nodes iff it has a single fanout. Note that if a fetch node
   // has a single fanout, it would be rewritten as a constant with the same
   // node name, and therefore users are still able to fetch it. This is not
@@ -865,16 +870,9 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
       nodes_whitelist_.insert(fetch_node->name());
     }
   }
-  *output = GraphDef();
-  if (cpu_device_ == nullptr) {
-    owned_device_.reset(new DeviceSimple());
-    cpu_device_ = owned_device_.get();
-  }
-
-  bool has_feed = !item.feed.empty();
-  has_fetch_ = !item.fetch.empty();
 
   GraphProperties properties(item);
+  bool has_feed = !item.feed.empty();
   if (!has_feed) {
     // Only use static shape information when there is no feed in the
     // graph. That's because it's possible to feed a placeholder with a tensor
@@ -893,6 +891,28 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
   if (!has_feed) {
     TF_RETURN_IF_ERROR(SimplifyGraph(output, properties));
   }
+  return Status::OK();
+}
+
+Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
+                                 GraphDef* output) {
+  nodes_to_preserve_ = item.NodesToPreserve();
+
+  if (cpu_device_ == nullptr) {
+    owned_device_.reset(new DeviceSimple());
+    cpu_device_ = owned_device_.get();
+  }
+
+  has_fetch_ = !item.fetch.empty();
+
+  GrapplerItem item_to_optimize = item;
+  *output = item.graph;
+  do {
+    graph_.Swap(output);
+    item_to_optimize.graph = graph_;
+    *output = GraphDef();
+    TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output));
+  } while (output->node_size() < graph_.node_size());
 
   *output->mutable_library() = item.graph.library();
   *output->mutable_versions() = item.graph.versions();
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index ea99c33cf89e78..b115e51dbfda3a 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -60,7 +60,7 @@ class ConstantFolding : public GraphOptimizer {
   Status EvaluateOneFoldable(const NodeDef& node,
                              std::vector<NodeDef>* outputs);
 
-  Status FoldNode(NodeDef* node);
+  Status FoldNode(NodeDef* node, GraphDef* output_graph);
 
   Status FoldGraph(GraphDef* output);
 
@@ -69,6 +69,9 @@ class ConstantFolding : public GraphOptimizer {
                              const GraphProperties& properties) const;
   Status SimplifyGraph(GraphDef* output, const GraphProperties& properties);
 
+  Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item,
+                             GraphDef* output);
+
   // Points to an externally provided device or to owned_device_;
   DeviceBase* cpu_device_;
   std::unique_ptr<DeviceBase> owned_device_;
@@ -78,7 +81,6 @@ class ConstantFolding : public GraphOptimizer {
   std::unique_ptr<NodeMap> node_map_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unordered_set<string> nodes_whitelist_;
-  GraphDef added_graph_;
   bool has_fetch_;
 };
 
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index 46f03952fa9127..aea7f7c57e7d20 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -127,6 +127,7 @@ def testRewritingDefaultGradientNames(self):
     rewritten_graph_def = tf_optimizer.OptimizeGraph(
         rewriter_config_pb2.RewriterConfig(
             disable_model_pruning=True,
+            constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
             arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS),
         original_metagraph)
@@ -149,6 +150,7 @@ def testRewritingNameScopedGradientNames(self):
     rewritten_graph_def = tf_optimizer.OptimizeGraph(
         rewriter_config_pb2.RewriterConfig(
             disable_model_pruning=True,
+            constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
             arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS,
             memory_optimizer_target_node_name_prefix='optimizer/gradients/'),

From ef39f0b86bd173256f16a94563999e44b5268e22 Mon Sep 17 00:00:00 2001
From: Igor Saprykin <isaprykin@google.com>
Date: Fri, 22 Sep 2017 08:00:55 -0700
Subject: [PATCH 06/14] Move the ExportStrategy API from .contrib to .core.

This is the first of the three steps to support export strategies in _TrainingExecutor.

PiperOrigin-RevId: 169689743
---
 .../python/estimator/export_strategy.py       | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 tensorflow/python/estimator/export_strategy.py

diff --git a/tensorflow/python/estimator/export_strategy.py b/tensorflow/python/estimator/export_strategy.py
new file mode 100644
index 00000000000000..bfcd20d7796862
--- /dev/null
+++ b/tensorflow/python/estimator/export_strategy.py
@@ -0,0 +1,83 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ExportStrategy class represents different flavors of model export."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.estimator import util
+
+__all__ = ['ExportStrategy']
+
+
+class ExportStrategy(
+    collections.namedtuple('ExportStrategy', ['name', 'export_fn'])):
+  """A class representing a type of model export.
+
+  Typically constructed by a utility function specific to the exporter, such as
+  `saved_model_export_utils.make_export_strategy()`.
+
+  The fields are:
+    name: The directory name under the export base directory where exports of
+      this type will be written.
+    export_fn: A function that writes an export, given an estimator, a
+      destination path, and optionally a checkpoint path and an evaluation
+      result for that checkpoint.  Note the export_fn() may choose whether or
+      not to export based on the eval result or based on an internal timer or
+      any other criterion, if exports are not desired for every checkpoint.
+
+    The signature of this function must be one of:
+
+    * `(estimator, export_path) -> export_path`
+    * `(estimator, export_path, checkpoint_path) -> export_path`
+    * `(estimator, export_path, checkpoint_path, eval_result) -> export_path`
+  """
+
+  def export(self,
+             estimator,
+             export_path,
+             checkpoint_path=None,
+             eval_result=None):
+    """Exports the given Estimator to a specific format.
+
+    Args:
+      estimator: the Estimator to export.
+      export_path: A string containing a directory where to write the export.
+      checkpoint_path: The checkpoint path to export.  If None (the default),
+        the strategy may locate a checkpoint (e.g. the most recent) by itself.
+      eval_result: The output of Estimator.evaluate on this checkpoint.  This
+        should be set only if checkpoint_path is provided (otherwise it is
+        unclear which checkpoint this eval refers to).
+
+    Returns:
+      The string path to the exported directory.
+
+    Raises:
+      ValueError: if the export_fn does not have the required signature.
+    """
+    export_fn_args = util.fn_args(self.export_fn)
+    kwargs = {}
+    if 'checkpoint_path' in export_fn_args:
+      kwargs['checkpoint_path'] = checkpoint_path
+    if 'eval_result' in export_fn_args:
+      if 'checkpoint_path' not in export_fn_args:
+        raise ValueError('An export_fn accepting eval_result must also accept '
+                         'checkpoint_path.')
+      kwargs['eval_result'] = eval_result
+
+    return self.export_fn(estimator, export_path, **kwargs)

From 00f9e5adcec888478f2939558d9addb664e83081 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 22 Sep 2017 08:55:59 -0700
Subject: [PATCH 07/14] Replace dependencies on core:tensorflow_opensource with
 :all_kernels in compiler/tf2xla/kernels.  :tensorflow_opensource pulls in
 unconditional GPU dependencies that we don't need for the XLA compiler.

PiperOrigin-RevId: 169694943
---
 tensorflow/compiler/tf2xla/kernels/BUILD | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 6e6c5dc17f5364..d3898db42f2584 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -83,10 +83,10 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:computation_builder",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
+        "//tensorflow/core:all_kernels",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:tensorflow_opensource",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:concat_lib",
         "//tensorflow/core/kernels:conv_ops",
@@ -112,7 +112,6 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:tensorflow_opensource",
     ],
 )
 
@@ -137,9 +136,9 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:computation_builder",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
+        "//tensorflow/core:all_kernels",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:tensorflow_opensource",
         "//tensorflow/core/kernels:bounds_check",
     ],
 )

From 766e8a5da91efb11279ea03f2cd2bb5685769b49 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 22 Sep 2017 09:03:23 -0700
Subject: [PATCH 08/14] Include device name in XlaDevice::GetMetadata() error
 message.

PiperOrigin-RevId: 169695795
---
 tensorflow/compiler/jit/xla_device.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc
index 66ec96d54a4b57..02cc6654c870be 100644
--- a/tensorflow/compiler/jit/xla_device.cc
+++ b/tensorflow/compiler/jit/xla_device.cc
@@ -161,8 +161,10 @@ const DeviceType& XlaDevice::Metadata::jit_device_type() const {
   XlaDevice* xla_device = dynamic_cast<XlaDevice*>(ctx->device());
   if (xla_device == nullptr) {
     return errors::Internal(
-        "GetMetadata should be called on an XLA device. This usually means an "
-        "internal bug or Op is placed on the wrong device.");
+        "Cannot get XLA metadata from non-XLA device \"", ctx->device()->name(),
+        "\". GetMetadata must only be called on an XLA device. Either an "
+        "internal bug has been triggered, or an XLA-specific op has been "
+        "placed on the wrong device.");
   }
   *metadata = &(xla_device->xla_metadata_);
   return Status::OK();

From 0fccb5d8384d2fd6e0c8d57fc1ebfd094a5c19af Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Fri, 22 Sep 2017 10:02:04 -0700
Subject: [PATCH 09/14] Replace assert_called() with called to fix python3 test
 failures

PiperOrigin-RevId: 169702185
---
 tensorflow/python/estimator/training_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index e99d945b42a3da..35398a929b437a 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -253,7 +253,7 @@ def test_train_with_train_spec(self, mock_server, unused_mock_sleep):
         config=test.mock.ANY,
         start=False)
 
-    mock_server_instance.start.assert_called()
+    self.assertTrue(mock_server_instance.start.called)
 
     mock_est.train.assert_called_with(input_fn=train_spec.input_fn,
                                       max_steps=train_spec.max_steps,
@@ -365,7 +365,7 @@ def test_delay_for_worker(self, _):
     with test.mock.patch.object(time, 'sleep') as mock_sleep:
       mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s)
       self._run_task(executor)
-      mock_sleep.assert_called()
+      self.assertTrue(mock_sleep.called)
 
 
 class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest,
@@ -546,8 +546,8 @@ def test_std_server(self, mock_server):
         config=test.mock.ANY,
         start=False)
 
-    mock_server_instance.start.assert_called()
-    mock_server_instance.join.assert_called()
+    self.assertTrue(mock_server_instance.start.called)
+    self.assertTrue(mock_server_instance.join.called)
 
   def test_fail_with_empty_cluster_spec(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)

From a1a5ed233ccbc05250020b3518f8405b32d6822e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 22 Sep 2017 10:27:53 -0700
Subject: [PATCH 10/14] [XLA] Add show_metadata argument to
 hlo_graph_dumper::DumpGraph.

PiperOrigin-RevId: 169705577
---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 13 +++++++------
 tensorflow/compiler/xla/service/hlo_graph_dumper.h  |  3 ++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index f75c82006f5abc..bba6fbfae04a87 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1223,7 +1223,8 @@ XLA_REGISTER_GRAPH_RENDERER(FileGraphRenderer, 0);
 
 string DumpGraph(const HloComputation& computation, const string& label,
                  const DebugOptions& debug_options,
-                 const HloExecutionProfile* hlo_execution_profile) {
+                 const HloExecutionProfile* hlo_execution_profile,
+                 bool show_metadata) {
   string graph;
   string graph_url;
   if (debug_options.xla_hlo_dump_as_graphdef()) {
@@ -1237,11 +1238,11 @@ string DumpGraph(const HloComputation& computation, const string& label,
     graph_url = FileGraphRenderer().RenderGraph(
         graph, GraphRendererInterface::TF_GRAPHDEF, debug_options);
   } else {
-    graph = HloDotDumper(
-                &computation, label,
-                /*show_addresses=*/debug_options.xla_hlo_graph_addresses(),
-                /*show_metadata=*/false, hlo_execution_profile, NodeFilter())
-                .Dump();
+    graph =
+        HloDotDumper(&computation, label,
+                     /*show_addresses=*/debug_options.xla_hlo_graph_addresses(),
+                     show_metadata, hlo_execution_profile, NodeFilter())
+            .Dump();
     graph_url = GetGraphRenderer()->RenderGraph(
         graph, GraphRendererInterface::DOT_GRAPH, debug_options);
   }
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h
index a17ede7f0a0e71..dd304ec76cd903 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h
@@ -55,7 +55,8 @@ string MaybeDumpHloModule(const HloModule& module, const string& label,
 // registry is used.
 string DumpGraph(const HloComputation& computation, const string& label,
                  const DebugOptions& debug_options,
-                 const HloExecutionProfile* hlo_execution_profile = nullptr);
+                 const HloExecutionProfile* hlo_execution_profile = nullptr,
+                 bool show_metadata = false);
 
 // Like DumpGraph, but renders only nodes "near" the given node in the graph.
 //

From f69e32f2e4e544ee29cbfccfd448341c01fe1355 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Fri, 22 Sep 2017 10:32:13 -0700
Subject: [PATCH 11/14] Hide "re" from the docs system.

PiperOrigin-RevId: 169706174
---
 .../contrib/meta_graph_transform/meta_graph_transform.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py
index f914391eccd2cb..ff4afbb4ce13c4 100644
--- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py
+++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py
@@ -20,7 +20,7 @@
 from __future__ import print_function
 
 
-import re
+import re as _re
 
 from tensorflow.core.framework import graph_pb2 as _graph_pb2
 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2
@@ -644,7 +644,7 @@ def _is_removed_mentioned(s, removed_op_names):
   # /foo/bar. This regex ensures that we handle these two nodes
   # as separate entities.  It matches on nodes having names in the form of
   # '/foo/bar_x' as well as nodes having names in the form of 'foo.'
-  s_names = re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', compat.as_str_any(s))
+  s_names = _re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', compat.as_str_any(s))
   for removed_op_name in removed_op_names:
     for s_name in s_names:
       if s_name.endswith(removed_op_name):

From 2a7a3a3e4ec7904ee4303b380141eaef87ff617e Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 22 Sep 2017 10:34:47 -0700
Subject: [PATCH 12/14] Add libtensorflow_framework.so to a bunch of genrules

Fixes hermetic builds

PiperOrigin-RevId: 169706567
---
 tensorflow/java/src/gen/gen_ops.bzl | 13 ++++------
 tensorflow/python/eager/gen_op.bzl  |  3 ++-
 tensorflow/tensorflow.bzl           | 39 +++++++++++++----------------
 tensorflow/tools/lib_package/BUILD  |  3 ++-
 4 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl
index 1d62e5bacb6a19..28f0908ec4a7a0 100644
--- a/tensorflow/java/src/gen/gen_ops.bzl
+++ b/tensorflow/java/src/gen/gen_ops.bzl
@@ -1,9 +1,9 @@
 # -*- Python -*-
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_copts")
-load(
-    "//tensorflow/core:platform/default/build_config_root.bzl",
-    "if_static")
+load("//tensorflow:tensorflow.bzl",
+     "tf_binary_additional_srcs",
+     "tf_cc_binary",
+     "tf_copts")
 
 # Given a list of "ops_libs" (a list of files in the core/ops directory
 # without their .cc extensions), generate Java wrapper code for all operations
@@ -54,10 +54,7 @@ def tf_java_op_gen_srcjar(name,
   gen_srcjar = out_dir + name + ".srcjar"
   gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."]
   gen_tools += ["@local_jdk//:jar"] + ["@local_jdk//:jdk"]
-  gen_tools += if_static(
-      extra_deps=[],
-      otherwise=["//tensorflow:libtensorflow_framework.so"]
-  )
+  gen_tools += tf_binary_additional_srcs()
   native.genrule(
       name=name,
       outs=[gen_srcjar],
diff --git a/tensorflow/python/eager/gen_op.bzl b/tensorflow/python/eager/gen_op.bzl
index cb725dbf8cb749..1c99d342befaf0 100644
--- a/tensorflow/python/eager/gen_op.bzl
+++ b/tensorflow/python/eager/gen_op.bzl
@@ -2,6 +2,7 @@
 
 load("//tensorflow:tensorflow.bzl",
      "clean_dep",
+     "tf_binary_additional_srcs",
      "tf_copts",
      "tf_cc_binary")
 
@@ -32,7 +33,7 @@ def tfe_gen_op_wrapper_py(name,
   native.genrule(
       name=name + "_pygenrule",
       outs=[out],
-      tools=[tool_name],
+      tools=[tool_name] + tf_binary_additional_srcs(),
       cmd=("$(location " + tool_name + ")  > $@"))
 
   # Make a py_library out of the generated python file.
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 8eb3e661c0c779..a308688790ce9a 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -238,14 +238,22 @@ def _rpath_linkopts(name):
   })
 
 
+# Bazel-generated shared objects which must be linked into TensorFlow binaries
+# to define symbols from //tensorflow/core:framework and //tensorflow/core:lib.
+def tf_binary_additional_srcs():
+  return if_static(
+      extra_deps=[],
+      otherwise=[
+          clean_dep("//tensorflow:libtensorflow_framework.so"),
+      ])
+
+
 def tf_cc_shared_object(
     name,
     srcs=[],
     deps=[],
     linkopts=[],
-    framework_so=if_static(
-        extra_deps=[],
-        otherwise=["//tensorflow:libtensorflow_framework.so"]),
+    framework_so=tf_binary_additional_srcs(),
     **kwargs):
   native.cc_binary(
       name=name,
@@ -262,16 +270,6 @@ def tf_cc_shared_object(
       **kwargs)
 
 
-# Bazel-generated shared objects which must be linked into TensorFlow binaries
-# to define symbols from //tensorflow/core:framework and //tensorflow/core:lib.
-def _binary_additional_srcs():
-  return if_static(
-      extra_deps=[],
-      otherwise=[
-          clean_dep("//tensorflow:libtensorflow_framework.so"),
-      ])
-
-
 # Links in the framework shared object
 # (//third_party/tensorflow:libtensorflow_framework.so) when not building
 # statically. Also adds linker options (rpaths) so that the framework shared
@@ -283,7 +281,7 @@ def tf_cc_binary(name,
                  **kwargs):
   native.cc_binary(
       name=name,
-      srcs=srcs + _binary_additional_srcs(),
+      srcs=srcs + tf_binary_additional_srcs(),
       deps=deps + if_mkl(
           [
               "//third_party/mkl:intel_binary_blob",
@@ -292,7 +290,6 @@ def tf_cc_binary(name,
       linkopts=linkopts + _rpath_linkopts(name),
       **kwargs)
 
-
 def tf_gen_op_wrapper_cc(name,
                          out_ops_file,
                          pkg="",
@@ -324,7 +321,7 @@ def tf_gen_op_wrapper_cc(name,
           out_ops_file + "_internal.h", out_ops_file + "_internal.cc"
       ],
       srcs=srcs,
-      tools=[":" + tool] + _binary_additional_srcs(),
+      tools=[":" + tool] + tf_binary_additional_srcs(),
       cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " +
            "$(location :" + out_ops_file + ".cc) " + override_arg + " " +
            str(include_internal_ops)))
@@ -488,14 +485,14 @@ def tf_gen_op_wrapper_py(name,
         name=name + "_pygenrule",
         outs=[out],
         srcs=[hidden_file],
-        tools=[tool_name],
+        tools=[tool_name] + tf_binary_additional_srcs(),
         cmd=("$(location " + tool_name + ") @$(location " + hidden_file + ") " +
              ("1" if require_shape_functions else "0") + " > $@"))
   else:
     native.genrule(
         name=name + "_pygenrule",
         outs=[out],
-        tools=[tool_name],
+        tools=[tool_name] + tf_binary_additional_srcs(),
         cmd=("$(location " + tool_name + ") " + op_list_arg + " " +
              ("1" if require_shape_functions else "0") + " " +
              ("1" if op_list_is_whitelist else "0") + " > $@"))
@@ -532,7 +529,7 @@ def tf_cc_test(name,
                **kwargs):
   native.cc_test(
       name="%s%s" % (name, suffix),
-      srcs=srcs + _binary_additional_srcs(),
+      srcs=srcs + tf_binary_additional_srcs(),
       copts=tf_copts() + extra_copts,
       linkopts=["-lpthread", "-lm"] + linkopts + _rpath_linkopts(name),
       deps=deps + if_mkl(
@@ -626,7 +623,7 @@ def tf_cuda_only_cc_test(name,
                     linkopts=[]):
   native.cc_test(
       name="%s%s" % (name, "_gpu"),
-      srcs=srcs + _binary_additional_srcs(),
+      srcs=srcs + tf_binary_additional_srcs(),
       size=size,
       args=args,
       copts= _cuda_copts() + tf_copts(),
@@ -712,7 +709,7 @@ def tf_java_test(name,
   native.java_test(
       name=name,
       srcs=srcs,
-      deps=deps + _binary_additional_srcs(),
+      deps=deps + tf_binary_additional_srcs(),
       *args,
       **kwargs)
 
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index d522c9d395afb4..845bad5e499025 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -4,6 +4,7 @@
 package(default_visibility = ["//visibility:private"])
 
 load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
+load("//tensorflow:tensorflow.bzl", "tf_binary_additional_srcs")
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
 
 genrule(
@@ -48,7 +49,7 @@ pkg_tar(
 # Shared objects that all TensorFlow libraries depend on.
 pkg_tar(
     name = "common_deps",
-    files = ["//tensorflow:libtensorflow_framework.so"],
+    files = tf_binary_additional_srcs(),
     tags = ["manual"],
 )
 

From 7b2f14a73d336dc0a98bbb69ecef9338568c839c Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Fri, 22 Sep 2017 10:46:42 -0700
Subject: [PATCH 13/14] Fix sometimes overwriting the default bazel config
 arguments in Jenkins build script.

PiperOrigin-RevId: 169708266
---
 tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index a12b80de16d420..7a1479c150488d 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -351,7 +351,7 @@ if [[ "${TF_BUILD_APPEND_ARGUMENTS}" == *"--test_tag_filters="* ]]; then
     fi
   done
 else
-  EXTRA_ARGS="${TF_BUILD_APPEND_ARGUMENTS} --test_tag_filters=-no_oss,-oss_serial,-benchmark-test"
+  EXTRA_ARGS="${EXTRA_ARGS} ${TF_BUILD_APPEND_ARGUMENTS} --test_tag_filters=-no_oss,-oss_serial,-benchmark-test"
   if [[ ${IS_MAC} == "1" ]]; then
     EXTRA_ARGS="${EXTRA_ARGS},-nomac"
   fi

From f927a72031ce563d65cf8864fe142ceb173444f5 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 22 Sep 2017 11:15:02 -0700
Subject: [PATCH 14/14] Add ltensorflow_framework to the test_user_ops GPU
 build

PiperOrigin-RevId: 169712715
---
 tensorflow/tools/ci_build/builds/test_user_ops.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 479b1931ca15c2..4f1c61b8e9a799 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -203,8 +203,8 @@ else
   USER_OP_SO="add_one.so"
   "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \
       -shared -o "${USER_OP_SO}" "${OP_KERNEL_CC}" \
-      "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" \
-      -fPIC -lcudart || \
+      "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" -L "${TF_LIB}" \
+      -fPIC -lcudart -ltensorflow_framework || \
       die "g++ compilation of ${OP_KERNEL_CC}" FAILED
 fi