From 04933363e818bd03aee4c5478be038c8dc18eb72 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 21 Sep 2017 21:01:14 -0700 Subject: [PATCH 01/14] eager: Fix bug in tfe.gradients_function(). The generated function was not always treating function parameters as distinct tensors. Added test would fail (the calls to np_g() would return [2., 2.] instead of [1., 1.]) without the corresponding change to backprop.py PiperOrigin-RevId: 169642367 --- tensorflow/python/eager/backprop.py | 38 +++++++++++++++++++++--- tensorflow/python/eager/backprop_test.py | 24 +++++++++++++++ tensorflow/python/eager/function_test.py | 13 ++++++++ tensorflow/python/framework/ops.py | 8 +++++ 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index cb7171dea8f289..a83d02151bc751 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -714,6 +714,38 @@ def decorated(*args, **kwds): return decorated +def _ensure_unique_tensor_objects(parameter_positions, args): + """Make each of the parameter_positions in args a unique ops.Tensor object. + + Ensure that each parameter is treated independently. + For example: + + def f(x, y): return x * y + g = gradients_function(f) + one = tf.constant(1.) + + g(one, one) should return [1., 1.] + (even though the two arguments are the same Tensor object). + + Args: + parameter_positions: List of indices into args defining the arguments to + differentiate against. + args: A list of arguments to the function to be differentiated. + + Returns: + args, possibly edited in-place. + """ + s = set() + for (i, t) in enumerate(args): + if i in parameter_positions: + tid = ops.tensor_id(t) + if tid in s: + args[i] = args[i]._dup() # pylint: disable=protected-access + else: + s.add(tid) + return args + + def val_and_grad_function(f, params=None): """Returns a function that computes f and is derivative w.r.t. params. @@ -778,13 +810,11 @@ def decorated(*args, **kwds): ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] + args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(args[i]) result = f(*args) - return result, imperative_grad( - result, - sources, - output_gradients=dy) + return result, imperative_grad(result, sources, output_gradients=dy) return decorated diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 483b28791a7e09..599cf4fdca3057 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -373,6 +373,30 @@ def tfe_conv2d(timage, tkernel, conv2dstrides): grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0] self.assertAllEqual([[[[2.0]]]], grad.numpy()) + def testSameObjectForMultipleArguments(self): + + def f(x, y): + return math_ops.multiply(x, y) + + g = backprop.gradients_function(f) + + def np_g(x, y): + dx, dy = g(x, y) + return [dx.numpy(), dy.numpy()] + + x = constant_op.constant(1.) + self.assertAllEqual([1., 1.], np_g(x, x)) + x = 1. + self.assertAllEqual([1., 1.], np_g(x, x)) + x = constant_op.constant([[1.]]) + self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) + x = [[1.]] + self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) + + v = resource_variable_ops.ResourceVariable( + initial_value=1., name='testSameObjectForMultipleArguments.Variable') + self.assertAllEqual([1., 1.], np_g(v, v)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 611a1c88cf1225..899b6d59b7096f 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -99,6 +99,19 @@ def g(x): self.assertAllEqual(2, g(constant_op.constant(2)).numpy()) + def testDefunCallBackpropUsingSameObjectForMultipleArguments(self): + + @function.defun + def g(x): + return backprop.gradients_function(math_ops.multiply, [0, 1])(x, x) + + def np_g(x): + return [d.numpy() for d in g(x)] + + x = constant_op.constant(1.) + self.assertAllEqual([1., 1.], np_g(x)) + self.assertAllEqual([1., 1.], np_g(1.)) + def testCallShape(self): @function.defun diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 1ddeaa0f719543..db9aa1e06173a9 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -578,6 +578,11 @@ def eval(self, feed_dict=None, session=None): """ return _eval_using_default_session(self, feed_dict, self.graph, session) + def _dup(self): + ret = copy.copy(self) + ret._id = uid() # pylint: disable=protected-access + return ret + def _eager_cast(tensor_handle, src_type_enum, dest_type_enum, ctx): """Cast tensor_handle from src_type_enum to dest_type_enum.""" @@ -772,6 +777,9 @@ def grad_fun(dresult): return new_tensor # pylint: enable=protected-access + def _dup(self): + return self._copy(device_name=self.device) + @property def device(self): return c_api.TFE_TensorHandleDeviceName(self._handle) From da2606f4963744f6940f8060ea5ca1f7f18181cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2017 21:40:23 -0700 Subject: [PATCH 02/14] Fix typos in the API documentation for tensorflow::PendingCounts. PiperOrigin-RevId: 169644786 --- tensorflow/core/common_runtime/pending_counts.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h index 198eb896afc76d..9e39b6b7b93a8e 100644 --- a/tensorflow/core/common_runtime/pending_counts.h +++ b/tensorflow/core/common_runtime/pending_counts.h @@ -24,7 +24,7 @@ limitations under the License. namespace tensorflow { -// PendingCounts is internal helper class to keep track of pending and +// PendingCounts is an internal helper class to keep track of pending and // dead counts for nodes, for use in the ExecutorState module. It // holds a map from Handles to various counts for that handle. This // information is needed per frame iteration. The amount of memory @@ -39,7 +39,7 @@ namespace tensorflow { // } // // When we actually want to start an iteration we first create a -// nPendingCounts object and then index into it using the precomputed +// PendingCounts object and then index into it using the precomputed // handles: // PendingCounts counts(layout); From 3a38f48a32f1190520b3647d99752c1d805ffb42 Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Thu, 21 Sep 2017 21:42:52 -0700 Subject: [PATCH 03/14] Fix memory leak if a node evaluation fails due to a divide by zero exception. PiperOrigin-RevId: 169644909 --- .../core/grappler/optimizers/constant_folding.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 6ed5c36b1ff041..88babfdc5f8e9c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -414,10 +414,16 @@ Status ConstantFolding::EvaluateNode(const NodeDef& node, Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, std::vector* outputs) { TensorVector inputs; - auto inputs_cleanup = gtl::MakeCleanup([&inputs] { + TensorVector output_tensors; + auto inputs_cleanup = gtl::MakeCleanup([&inputs, &output_tensors] { for (const auto& input : inputs) { delete input.tensor; } + for (const auto& output : output_tensors) { + if (output.tensor) { + delete output.tensor; + } + } }); for (const auto& input : node.input()) { @@ -439,7 +445,6 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, inputs.emplace_back(value); } - TensorVector output_tensors; TF_RETURN_IF_ERROR(EvaluateNode(node, inputs, &output_tensors)); if (output_tensors.empty()) { return Status(error::INVALID_ARGUMENT, "Expected at least one output."); @@ -452,7 +457,6 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, } if (output_tensors[i].tensor) { outputs->push_back(CreateNodeDef(node_name, output_tensors[i])); - delete output_tensors[i].tensor; } else { // Create an empty NodeDef to identify dead outputs (e.g. the output of a // switch that's not selected by the switch predicate). From abf90c9e0669039055bc1b0ae656e58d0a031d49 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2017 21:52:39 -0700 Subject: [PATCH 04/14] Increase tolerance on nn_grad_test. PiperOrigin-RevId: 169645639 --- tensorflow/cc/gradients/nn_grad_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index 23545f75ac1d4f..156a070acfb4ca 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -36,7 +36,7 @@ class NNGradTest : public ::testing::Test { float max_error; TF_ASSERT_OK((ComputeGradientError( scope_, {x}, {x_shape}, {y}, {y_shape}, &max_error))); - EXPECT_LT(max_error, 2e-4); + EXPECT_LT(max_error, 2.2e-4); } void RunTest(const Output& x, const Tensor& x_init_value, const Output& y, @@ -44,7 +44,7 @@ class NNGradTest : public ::testing::Test { float max_error; TF_ASSERT_OK((ComputeGradientError( scope_, x, x_init_value, y, y_shape, &max_error))); - EXPECT_LT(max_error, 2e-4); + EXPECT_LT(max_error, 2.2e-4); } void RunTest(const OutputList& xs, const std::vector& x_shapes, @@ -53,7 +53,7 @@ class NNGradTest : public ::testing::Test { float max_error; TF_ASSERT_OK((ComputeGradientError( scope_, xs, x_shapes, ys, y_shapes, &max_error))); - EXPECT_LT(max_error, 2e-4); + EXPECT_LT(max_error, 2.2e-4); } Scope scope_; From d101b3c4cc9c2ca3e26697fdfbd4225aefe87abc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 22 Sep 2017 07:06:06 -0700 Subject: [PATCH 05/14] Run constant folding iteratively until it converges. PiperOrigin-RevId: 169685214 --- .../grappler/optimizers/constant_folding.cc | 64 ++++++++++++------- .../grappler/optimizers/constant_folding.h | 6 +- .../python/grappler/memory_optimizer_test.py | 2 + 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 88babfdc5f8e9c..faea843c69bff0 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -466,7 +466,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, return Status::OK(); } -Status ConstantFolding::FoldNode(NodeDef* node) { +Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) { if (IsMerge(*node)) { // Merge nodes are special, in the sense that they execute as soon as one of // their input is ready. We can therefore fold a merge node iff it has at @@ -515,7 +515,7 @@ Status ConstantFolding::FoldNode(NodeDef* node) { "already present in the graph")); } - NodeDef* const_out = added_graph_.add_node(); + NodeDef* const_out = output_graph->add_node(); *const_out = *input_node; const_out->set_name(const_out_name); const_out->set_device(node->device()); @@ -523,7 +523,7 @@ Status ConstantFolding::FoldNode(NodeDef* node) { node_map_->AddNode(const_out->name(), const_out); node_map_->AddOutput(node->name(), const_out->name()); - NodeDef* const_index = added_graph_.add_node(); + NodeDef* const_index = output_graph->add_node(); const_index->set_op("Const"); Tensor index(DT_INT32, TensorShape({})); index.flat()(0) = input_index; @@ -612,7 +612,7 @@ Status ConstantFolding::FoldNode(NodeDef* node) { return errors::AlreadyExists(strings::StrCat( const_node->name(), "already present in the graph")); } - NodeDef* added_node = added_graph_.add_node(); + NodeDef* added_node = output_graph->add_node(); *added_node = *const_node; added_node->set_device(node->device()); node_map_->AddNode(added_node->name(), added_node); @@ -683,7 +683,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { } // We need to record a copy of output nodes before FoldNode() modifies it. std::set outputs = node_map_->GetOutputs(node->name()); - Status s = FoldNode(node); + Status s = FoldNode(node, output); processed_nodes.insert(node->name()); if (!s.ok()) { VLOG(1) << "Failed to fold node " << node->name() << ": " << s; @@ -696,14 +696,19 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { } } - // Build the graph after constant folding. - for (const auto& node : added_graph_.node()) { + // Delete the newly created nodes that don't feed anything. + int last = output->node_size() - 1; + for (int i = output->node_size() - 1; i >= 0; --i) { + const NodeDef& node = output->node(i); auto outputs = node_map_->GetOutputs(node.name()); - if (!outputs.empty()) { - auto added_node = output->add_node(); - *added_node = node; + if (outputs.empty()) { + output->mutable_node()->SwapElements(i, last); + last--; } } + output->mutable_node()->DeleteSubrange(last + 1, + output->node_size() - last - 1); + for (const auto& node : graph_.node()) { // If no fetch nodes is provided, we conservatively // keep all nodes in the original graph in case users need to fetch @@ -847,11 +852,11 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, return Status::OK(); } -Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* output) { - graph_ = item.graph; +Status ConstantFolding::RunOptimizationPass(Cluster* cluster, + const GrapplerItem& item, + GraphDef* output) { node_map_.reset(new NodeMap(&graph_)); - nodes_to_preserve_ = item.NodesToPreserve(); + nodes_whitelist_.clear(); // Fold fetch nodes iff it has a single fanout. Note that if a fetch node // has a single fanout, it would be rewritten as a constant with the same // node name, and therefore users are still able to fetch it. This is not @@ -865,16 +870,9 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, nodes_whitelist_.insert(fetch_node->name()); } } - *output = GraphDef(); - if (cpu_device_ == nullptr) { - owned_device_.reset(new DeviceSimple()); - cpu_device_ = owned_device_.get(); - } - - bool has_feed = !item.feed.empty(); - has_fetch_ = !item.fetch.empty(); GraphProperties properties(item); + bool has_feed = !item.feed.empty(); if (!has_feed) { // Only use static shape information when there is no feed in the // graph. That's because it's possible to feed a placeholder with a tensor @@ -893,6 +891,28 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, if (!has_feed) { TF_RETURN_IF_ERROR(SimplifyGraph(output, properties)); } + return Status::OK(); +} + +Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) { + nodes_to_preserve_ = item.NodesToPreserve(); + + if (cpu_device_ == nullptr) { + owned_device_.reset(new DeviceSimple()); + cpu_device_ = owned_device_.get(); + } + + has_fetch_ = !item.fetch.empty(); + + GrapplerItem item_to_optimize = item; + *output = item.graph; + do { + graph_.Swap(output); + item_to_optimize.graph = graph_; + *output = GraphDef(); + TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output)); + } while (output->node_size() < graph_.node_size()); *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index ea99c33cf89e78..b115e51dbfda3a 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -60,7 +60,7 @@ class ConstantFolding : public GraphOptimizer { Status EvaluateOneFoldable(const NodeDef& node, std::vector* outputs); - Status FoldNode(NodeDef* node); + Status FoldNode(NodeDef* node, GraphDef* output_graph); Status FoldGraph(GraphDef* output); @@ -69,6 +69,9 @@ class ConstantFolding : public GraphOptimizer { const GraphProperties& properties) const; Status SimplifyGraph(GraphDef* output, const GraphProperties& properties); + Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, + GraphDef* output); + // Points to an externally provided device or to owned_device_; DeviceBase* cpu_device_; std::unique_ptr owned_device_; @@ -78,7 +81,6 @@ class ConstantFolding : public GraphOptimizer { std::unique_ptr node_map_; std::unordered_set nodes_to_preserve_; std::unordered_set nodes_whitelist_; - GraphDef added_graph_; bool has_fetch_; }; diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 46f03952fa9127..aea7f7c57e7d20 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -127,6 +127,7 @@ def testRewritingDefaultGradientNames(self): rewritten_graph_def = tf_optimizer.OptimizeGraph( rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS), original_metagraph) @@ -149,6 +150,7 @@ def testRewritingNameScopedGradientNames(self): rewritten_graph_def = tf_optimizer.OptimizeGraph( rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS, memory_optimizer_target_node_name_prefix='optimizer/gradients/'), From ef39f0b86bd173256f16a94563999e44b5268e22 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Fri, 22 Sep 2017 08:00:55 -0700 Subject: [PATCH 06/14] Move the ExportStrategy API from .contrib to .core. This is the first of the three steps to support export strategies in _TrainingExecutor. PiperOrigin-RevId: 169689743 --- .../python/estimator/export_strategy.py | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tensorflow/python/estimator/export_strategy.py diff --git a/tensorflow/python/estimator/export_strategy.py b/tensorflow/python/estimator/export_strategy.py new file mode 100644 index 00000000000000..bfcd20d7796862 --- /dev/null +++ b/tensorflow/python/estimator/export_strategy.py @@ -0,0 +1,83 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""ExportStrategy class represents different flavors of model export.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.python.estimator import util + +__all__ = ['ExportStrategy'] + + +class ExportStrategy( + collections.namedtuple('ExportStrategy', ['name', 'export_fn'])): + """A class representing a type of model export. + + Typically constructed by a utility function specific to the exporter, such as + `saved_model_export_utils.make_export_strategy()`. + + The fields are: + name: The directory name under the export base directory where exports of + this type will be written. + export_fn: A function that writes an export, given an estimator, a + destination path, and optionally a checkpoint path and an evaluation + result for that checkpoint. Note the export_fn() may choose whether or + not to export based on the eval result or based on an internal timer or + any other criterion, if exports are not desired for every checkpoint. + + The signature of this function must be one of: + + * `(estimator, export_path) -> export_path` + * `(estimator, export_path, checkpoint_path) -> export_path` + * `(estimator, export_path, checkpoint_path, eval_result) -> export_path` + """ + + def export(self, + estimator, + export_path, + checkpoint_path=None, + eval_result=None): + """Exports the given Estimator to a specific format. + + Args: + estimator: the Estimator to export. + export_path: A string containing a directory where to write the export. + checkpoint_path: The checkpoint path to export. If None (the default), + the strategy may locate a checkpoint (e.g. the most recent) by itself. + eval_result: The output of Estimator.evaluate on this checkpoint. This + should be set only if checkpoint_path is provided (otherwise it is + unclear which checkpoint this eval refers to). + + Returns: + The string path to the exported directory. + + Raises: + ValueError: if the export_fn does not have the required signature. + """ + export_fn_args = util.fn_args(self.export_fn) + kwargs = {} + if 'checkpoint_path' in export_fn_args: + kwargs['checkpoint_path'] = checkpoint_path + if 'eval_result' in export_fn_args: + if 'checkpoint_path' not in export_fn_args: + raise ValueError('An export_fn accepting eval_result must also accept ' + 'checkpoint_path.') + kwargs['eval_result'] = eval_result + + return self.export_fn(estimator, export_path, **kwargs) From 00f9e5adcec888478f2939558d9addb664e83081 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2017 08:55:59 -0700 Subject: [PATCH 07/14] Replace dependencies on core:tensorflow_opensource with :all_kernels in compiler/tf2xla/kernels. :tensorflow_opensource pulls in unconditional GPU dependencies that we don't need for the XLA compiler. PiperOrigin-RevId: 169694943 --- tensorflow/compiler/tf2xla/kernels/BUILD | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 6e6c5dc17f5364..d3898db42f2584 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -83,10 +83,10 @@ tf_kernel_library( "//tensorflow/compiler/xla/client:client_library", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/core:all_kernels", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow_opensource", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:concat_lib", "//tensorflow/core/kernels:conv_ops", @@ -112,7 +112,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow_opensource", ], ) @@ -137,9 +136,9 @@ tf_kernel_library( "//tensorflow/compiler/xla/client:client_library", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/core:all_kernels", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:tensorflow_opensource", "//tensorflow/core/kernels:bounds_check", ], ) From 766e8a5da91efb11279ea03f2cd2bb5685769b49 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 22 Sep 2017 09:03:23 -0700 Subject: [PATCH 08/14] Include device name in XlaDevice::GetMetadata() error message. PiperOrigin-RevId: 169695795 --- tensorflow/compiler/jit/xla_device.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index 66ec96d54a4b57..02cc6654c870be 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -161,8 +161,10 @@ const DeviceType& XlaDevice::Metadata::jit_device_type() const { XlaDevice* xla_device = dynamic_cast(ctx->device()); if (xla_device == nullptr) { return errors::Internal( - "GetMetadata should be called on an XLA device. This usually means an " - "internal bug or Op is placed on the wrong device."); + "Cannot get XLA metadata from non-XLA device \"", ctx->device()->name(), + "\". GetMetadata must only be called on an XLA device. Either an " + "internal bug has been triggered, or an XLA-specific op has been " + "placed on the wrong device."); } *metadata = &(xla_device->xla_metadata_); return Status::OK(); From 0fccb5d8384d2fd6e0c8d57fc1ebfd094a5c19af Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 22 Sep 2017 10:02:04 -0700 Subject: [PATCH 09/14] Replace assert_called() with called to fix python3 test failures PiperOrigin-RevId: 169702185 --- tensorflow/python/estimator/training_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index e99d945b42a3da..35398a929b437a 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -253,7 +253,7 @@ def test_train_with_train_spec(self, mock_server, unused_mock_sleep): config=test.mock.ANY, start=False) - mock_server_instance.start.assert_called() + self.assertTrue(mock_server_instance.start.called) mock_est.train.assert_called_with(input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, @@ -365,7 +365,7 @@ def test_delay_for_worker(self, _): with test.mock.patch.object(time, 'sleep') as mock_sleep: mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s) self._run_task(executor) - mock_sleep.assert_called() + self.assertTrue(mock_sleep.called) class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest, @@ -546,8 +546,8 @@ def test_std_server(self, mock_server): config=test.mock.ANY, start=False) - mock_server_instance.start.assert_called() - mock_server_instance.join.assert_called() + self.assertTrue(mock_server_instance.start.called) + self.assertTrue(mock_server_instance.join.called) def test_fail_with_empty_cluster_spec(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) From a1a5ed233ccbc05250020b3518f8405b32d6822e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2017 10:27:53 -0700 Subject: [PATCH 10/14] [XLA] Add show_metadata argument to hlo_graph_dumper::DumpGraph. PiperOrigin-RevId: 169705577 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 13 +++++++------ tensorflow/compiler/xla/service/hlo_graph_dumper.h | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index f75c82006f5abc..bba6fbfae04a87 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1223,7 +1223,8 @@ XLA_REGISTER_GRAPH_RENDERER(FileGraphRenderer, 0); string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, - const HloExecutionProfile* hlo_execution_profile) { + const HloExecutionProfile* hlo_execution_profile, + bool show_metadata) { string graph; string graph_url; if (debug_options.xla_hlo_dump_as_graphdef()) { @@ -1237,11 +1238,11 @@ string DumpGraph(const HloComputation& computation, const string& label, graph_url = FileGraphRenderer().RenderGraph( graph, GraphRendererInterface::TF_GRAPHDEF, debug_options); } else { - graph = HloDotDumper( - &computation, label, - /*show_addresses=*/debug_options.xla_hlo_graph_addresses(), - /*show_metadata=*/false, hlo_execution_profile, NodeFilter()) - .Dump(); + graph = + HloDotDumper(&computation, label, + /*show_addresses=*/debug_options.xla_hlo_graph_addresses(), + show_metadata, hlo_execution_profile, NodeFilter()) + .Dump(); graph_url = GetGraphRenderer()->RenderGraph( graph, GraphRendererInterface::DOT_GRAPH, debug_options); } diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h index a17ede7f0a0e71..dd304ec76cd903 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h @@ -55,7 +55,8 @@ string MaybeDumpHloModule(const HloModule& module, const string& label, // registry is used. string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, - const HloExecutionProfile* hlo_execution_profile = nullptr); + const HloExecutionProfile* hlo_execution_profile = nullptr, + bool show_metadata = false); // Like DumpGraph, but renders only nodes "near" the given node in the graph. // From f69e32f2e4e544ee29cbfccfd448341c01fe1355 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 22 Sep 2017 10:32:13 -0700 Subject: [PATCH 11/14] Hide "re" from the docs system. PiperOrigin-RevId: 169706174 --- .../contrib/meta_graph_transform/meta_graph_transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py index f914391eccd2cb..ff4afbb4ce13c4 100644 --- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py +++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py @@ -20,7 +20,7 @@ from __future__ import print_function -import re +import re as _re from tensorflow.core.framework import graph_pb2 as _graph_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 @@ -644,7 +644,7 @@ def _is_removed_mentioned(s, removed_op_names): # /foo/bar. This regex ensures that we handle these two nodes # as separate entities. It matches on nodes having names in the form of # '/foo/bar_x' as well as nodes having names in the form of 'foo.' - s_names = re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', compat.as_str_any(s)) + s_names = _re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', compat.as_str_any(s)) for removed_op_name in removed_op_names: for s_name in s_names: if s_name.endswith(removed_op_name): From 2a7a3a3e4ec7904ee4303b380141eaef87ff617e Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 22 Sep 2017 10:34:47 -0700 Subject: [PATCH 12/14] Add libtensorflow_framework.so to a bunch of genrules Fixes hermetic builds PiperOrigin-RevId: 169706567 --- tensorflow/java/src/gen/gen_ops.bzl | 13 ++++------ tensorflow/python/eager/gen_op.bzl | 3 ++- tensorflow/tensorflow.bzl | 39 +++++++++++++---------------- tensorflow/tools/lib_package/BUILD | 3 ++- 4 files changed, 27 insertions(+), 31 deletions(-) diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl index 1d62e5bacb6a19..28f0908ec4a7a0 100644 --- a/tensorflow/java/src/gen/gen_ops.bzl +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -1,9 +1,9 @@ # -*- Python -*- -load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_copts") -load( - "//tensorflow/core:platform/default/build_config_root.bzl", - "if_static") +load("//tensorflow:tensorflow.bzl", + "tf_binary_additional_srcs", + "tf_cc_binary", + "tf_copts") # Given a list of "ops_libs" (a list of files in the core/ops directory # without their .cc extensions), generate Java wrapper code for all operations @@ -54,10 +54,7 @@ def tf_java_op_gen_srcjar(name, gen_srcjar = out_dir + name + ".srcjar" gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."] gen_tools += ["@local_jdk//:jar"] + ["@local_jdk//:jdk"] - gen_tools += if_static( - extra_deps=[], - otherwise=["//tensorflow:libtensorflow_framework.so"] - ) + gen_tools += tf_binary_additional_srcs() native.genrule( name=name, outs=[gen_srcjar], diff --git a/tensorflow/python/eager/gen_op.bzl b/tensorflow/python/eager/gen_op.bzl index cb725dbf8cb749..1c99d342befaf0 100644 --- a/tensorflow/python/eager/gen_op.bzl +++ b/tensorflow/python/eager/gen_op.bzl @@ -2,6 +2,7 @@ load("//tensorflow:tensorflow.bzl", "clean_dep", + "tf_binary_additional_srcs", "tf_copts", "tf_cc_binary") @@ -32,7 +33,7 @@ def tfe_gen_op_wrapper_py(name, native.genrule( name=name + "_pygenrule", outs=[out], - tools=[tool_name], + tools=[tool_name] + tf_binary_additional_srcs(), cmd=("$(location " + tool_name + ") > $@")) # Make a py_library out of the generated python file. diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 8eb3e661c0c779..a308688790ce9a 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -238,14 +238,22 @@ def _rpath_linkopts(name): }) +# Bazel-generated shared objects which must be linked into TensorFlow binaries +# to define symbols from //tensorflow/core:framework and //tensorflow/core:lib. +def tf_binary_additional_srcs(): + return if_static( + extra_deps=[], + otherwise=[ + clean_dep("//tensorflow:libtensorflow_framework.so"), + ]) + + def tf_cc_shared_object( name, srcs=[], deps=[], linkopts=[], - framework_so=if_static( - extra_deps=[], - otherwise=["//tensorflow:libtensorflow_framework.so"]), + framework_so=tf_binary_additional_srcs(), **kwargs): native.cc_binary( name=name, @@ -262,16 +270,6 @@ def tf_cc_shared_object( **kwargs) -# Bazel-generated shared objects which must be linked into TensorFlow binaries -# to define symbols from //tensorflow/core:framework and //tensorflow/core:lib. -def _binary_additional_srcs(): - return if_static( - extra_deps=[], - otherwise=[ - clean_dep("//tensorflow:libtensorflow_framework.so"), - ]) - - # Links in the framework shared object # (//third_party/tensorflow:libtensorflow_framework.so) when not building # statically. Also adds linker options (rpaths) so that the framework shared @@ -283,7 +281,7 @@ def tf_cc_binary(name, **kwargs): native.cc_binary( name=name, - srcs=srcs + _binary_additional_srcs(), + srcs=srcs + tf_binary_additional_srcs(), deps=deps + if_mkl( [ "//third_party/mkl:intel_binary_blob", @@ -292,7 +290,6 @@ def tf_cc_binary(name, linkopts=linkopts + _rpath_linkopts(name), **kwargs) - def tf_gen_op_wrapper_cc(name, out_ops_file, pkg="", @@ -324,7 +321,7 @@ def tf_gen_op_wrapper_cc(name, out_ops_file + "_internal.h", out_ops_file + "_internal.cc" ], srcs=srcs, - tools=[":" + tool] + _binary_additional_srcs(), + tools=[":" + tool] + tf_binary_additional_srcs(), cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " + "$(location :" + out_ops_file + ".cc) " + override_arg + " " + str(include_internal_ops))) @@ -488,14 +485,14 @@ def tf_gen_op_wrapper_py(name, name=name + "_pygenrule", outs=[out], srcs=[hidden_file], - tools=[tool_name], + tools=[tool_name] + tf_binary_additional_srcs(), cmd=("$(location " + tool_name + ") @$(location " + hidden_file + ") " + ("1" if require_shape_functions else "0") + " > $@")) else: native.genrule( name=name + "_pygenrule", outs=[out], - tools=[tool_name], + tools=[tool_name] + tf_binary_additional_srcs(), cmd=("$(location " + tool_name + ") " + op_list_arg + " " + ("1" if require_shape_functions else "0") + " " + ("1" if op_list_is_whitelist else "0") + " > $@")) @@ -532,7 +529,7 @@ def tf_cc_test(name, **kwargs): native.cc_test( name="%s%s" % (name, suffix), - srcs=srcs + _binary_additional_srcs(), + srcs=srcs + tf_binary_additional_srcs(), copts=tf_copts() + extra_copts, linkopts=["-lpthread", "-lm"] + linkopts + _rpath_linkopts(name), deps=deps + if_mkl( @@ -626,7 +623,7 @@ def tf_cuda_only_cc_test(name, linkopts=[]): native.cc_test( name="%s%s" % (name, "_gpu"), - srcs=srcs + _binary_additional_srcs(), + srcs=srcs + tf_binary_additional_srcs(), size=size, args=args, copts= _cuda_copts() + tf_copts(), @@ -712,7 +709,7 @@ def tf_java_test(name, native.java_test( name=name, srcs=srcs, - deps=deps + _binary_additional_srcs(), + deps=deps + tf_binary_additional_srcs(), *args, **kwargs) diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index d522c9d395afb4..845bad5e499025 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -4,6 +4,7 @@ package(default_visibility = ["//visibility:private"]) load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar") +load("//tensorflow:tensorflow.bzl", "tf_binary_additional_srcs") load("//third_party/mkl:build_defs.bzl", "if_mkl") genrule( @@ -48,7 +49,7 @@ pkg_tar( # Shared objects that all TensorFlow libraries depend on. pkg_tar( name = "common_deps", - files = ["//tensorflow:libtensorflow_framework.so"], + files = tf_binary_additional_srcs(), tags = ["manual"], ) From 7b2f14a73d336dc0a98bbb69ecef9338568c839c Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 22 Sep 2017 10:46:42 -0700 Subject: [PATCH 13/14] Fix sometimes overwriting the default bazel config arguments in Jenkins build script. PiperOrigin-RevId: 169708266 --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index a12b80de16d420..7a1479c150488d 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -351,7 +351,7 @@ if [[ "${TF_BUILD_APPEND_ARGUMENTS}" == *"--test_tag_filters="* ]]; then fi done else - EXTRA_ARGS="${TF_BUILD_APPEND_ARGUMENTS} --test_tag_filters=-no_oss,-oss_serial,-benchmark-test" + EXTRA_ARGS="${EXTRA_ARGS} ${TF_BUILD_APPEND_ARGUMENTS} --test_tag_filters=-no_oss,-oss_serial,-benchmark-test" if [[ ${IS_MAC} == "1" ]]; then EXTRA_ARGS="${EXTRA_ARGS},-nomac" fi From f927a72031ce563d65cf8864fe142ceb173444f5 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 22 Sep 2017 11:15:02 -0700 Subject: [PATCH 14/14] Add ltensorflow_framework to the test_user_ops GPU build PiperOrigin-RevId: 169712715 --- tensorflow/tools/ci_build/builds/test_user_ops.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index 479b1931ca15c2..4f1c61b8e9a799 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -203,8 +203,8 @@ else USER_OP_SO="add_one.so" "${GPP_BIN}" -std=c++11 ${EXTRA_GPP_FLAGS} \ -shared -o "${USER_OP_SO}" "${OP_KERNEL_CC}" \ - "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" \ - -fPIC -lcudart || \ + "${OP_KERNEL_O}" ${TF_INCLUDE_PATH} -L "${CUDA_LIB_DIR}" -L "${TF_LIB}" \ + -fPIC -lcudart -ltensorflow_framework || \ die "g++ compilation of ${OP_KERNEL_CC}" FAILED fi