From 10632be3bedb5b8e196d98429795ca9108cace5a Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 21 Jul 2017 10:06:29 -1000 Subject: [PATCH 1/8] fix buf address for tuples (#11625) --- tensorflow/compiler/plugin/executor/executable.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/plugin/executor/executable.cc b/tensorflow/compiler/plugin/executor/executable.cc index 4673a90e0a9251..4f1f0d99f97304 100644 --- a/tensorflow/compiler/plugin/executor/executable.cc +++ b/tensorflow/compiler/plugin/executor/executable.cc @@ -47,13 +47,14 @@ static se::DeviceMemoryBase AllocateOutputBuffer(sep::ExecutorExecutor* executor } else { int64 size(xla::ShapeUtil::ByteSizeOf(shape, sizeof(void*))); void** buf = reinterpret_cast(executor->Allocate(size)); + void** buf_rc = buf; for (int64 n = 0; n < xla::ShapeUtil::TupleElementCount(shape); n++) { se::DeviceMemoryBase out = AllocateSingleOutput(executor, literal.tuple_literals(n)); *buf++ = out.opaque(); } - return se::DeviceMemoryBase(buf, size); + return se::DeviceMemoryBase(buf_rc, size); } } From 557f4b8654503a9e281f7e36b6f0680d1cb29d02 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Fri, 21 Jul 2017 23:06:42 +0300 Subject: [PATCH 2/8] Added comment to clarify semantics of opt_level. (#11607) --- tensorflow/core/protobuf/config.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 1bcabf8e1d79ea..69311e3a7f31f6 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -102,6 +102,8 @@ message OptimizerOptions { L0 = -1; } + // Overall optimization level. The actual optimizations applied will be the + // logical OR of the flags that this level implies and any flags already set. Level opt_level = 3; // Control the use of the compiler/jit. Experimental. From 107e30732bace4099072d916bf5c794d7eeeceb5 Mon Sep 17 00:00:00 2001 From: resec Date: Sat, 22 Jul 2017 04:07:40 +0800 Subject: [PATCH 3/8] Add logical_or and less_equal ops to the list (#11631) Add below ops implementations to the list tensorflow/core/kernels/cwise_op_logical_or.cc tensorflow/core/kernels/cwise_op_less_equal.cc When I am using the Android and iOS lib, above ops are not registered while in full version they are available. --- tensorflow/contrib/makefile/tf_op_files.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 4a3b3e77628ac8..d3cc61ce290da5 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -143,8 +143,10 @@ tensorflow/core/kernels/cwise_op_minimum.cc tensorflow/core/kernels/cwise_op_maximum.cc tensorflow/core/kernels/cwise_op_logical_not.cc tensorflow/core/kernels/cwise_op_logical_and.cc +tensorflow/core/kernels/cwise_op_logical_or.cc tensorflow/core/kernels/cwise_op_log.cc tensorflow/core/kernels/cwise_op_less.cc +tensorflow/core/kernels/cwise_op_less_equal.cc tensorflow/core/kernels/cwise_op_isfinite.cc tensorflow/core/kernels/cwise_op_invert.cc tensorflow/core/kernels/cwise_op_greater_equal.cc From 20669f62db607d60fed52c70aa1dcd1c7b3c53d5 Mon Sep 17 00:00:00 2001 From: resec Date: Sat, 22 Jul 2017 04:08:21 +0800 Subject: [PATCH 4/8] Add interface for long(int64) datatype (#11632) Added below methods for TensorFlowInferenceInterface public void feed(String inputName, long[] src, long... dims) public void feed(String inputName, LongBuffer src, long... dims) public void fetch(String outputName, long[] dst) public void fetch(String outputName, LongBuffer dst) They are very useful in case when we are handling int64 Tensor (e.g. output of tf.argmax) I do not see the unit test file for TensorFlowInferenceInterface.java, so I did not add any test case for the change. But this change is very straight forward. Please let me know if I missed any. Thanks. --- .../android/TensorFlowInferenceInterface.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java index b1d18d2faf8d57..587f2941e5f0ef 100644 --- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java +++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java @@ -27,6 +27,7 @@ import java.nio.DoubleBuffer; import java.nio.FloatBuffer; import java.nio.IntBuffer; +import java.nio.LongBuffer; import java.util.ArrayList; import java.util.List; import org.tensorflow.DataType; @@ -226,6 +227,16 @@ public void feed(String inputName, int[] src, long... dims) { addFeed(inputName, Tensor.create(dims, IntBuffer.wrap(src))); } + /** + * Given a source array with shape {@link dims} and content {@link src}, copy the contents into + * the input Tensor with name {@link inputName}. The source array {@link src} must have at least + * as many elements as that of the destination Tensor. If {@link src} has more elements than the + * destination has capacity, the copy is truncated. + */ + public void feed(String inputName, long[] src, long... dims) { + addFeed(inputName, Tensor.create(dims, LongBuffer.wrap(src))); + } + /** * Given a source array with shape {@link dims} and content {@link src}, copy the contents into * the input Tensor with name {@link inputName}. The source array {@link src} must have at least @@ -270,6 +281,17 @@ public void feed(String inputName, IntBuffer src, long... dims) { addFeed(inputName, Tensor.create(dims, src)); } + /** + * Given a source buffer with shape {@link dims} and content {@link src}, both stored as + * direct and native ordered java.nio buffers, copy the contents into the input + * Tensor with name {@link inputName}. The source buffer {@link src} must have at least as many + * elements as that of the destination Tensor. If {@link src} has more elements than the + * destination has capacity, the copy is truncated. + */ + public void feed(String inputName, LongBuffer src, long... dims) { + addFeed(inputName, Tensor.create(dims, src)); + } + /** * Given a source buffer with shape {@link dims} and content {@link src}, both stored as * direct and native ordered java.nio buffers, copy the contents into the input @@ -310,6 +332,15 @@ public void fetch(String outputName, int[] dst) { fetch(outputName, IntBuffer.wrap(dst)); } + /** + * Read from a Tensor named {@link outputName} and copy the contents into a Java array. {@link + * dst} must have length greater than or equal to that of the source Tensor. This operation will + * not affect dst's content past the source Tensor's size. + */ + public void fetch(String outputName, long[] dst) { + fetch(outputName, LongBuffer.wrap(dst)); + } + /** * Read from a Tensor named {@link outputName} and copy the contents into a Java array. {@link * dst} must have length greater than or equal to that of the source Tensor. This operation will @@ -348,6 +379,16 @@ public void fetch(String outputName, IntBuffer dst) { getTensor(outputName).writeTo(dst); } + /** + * Read from a Tensor named {@link outputName} and copy the contents into the direct and + * native ordered java.nio buffer {@link dst}. {@link dst} must have capacity greater than + * or equal to that of the source Tensor. This operation will not affect dst's content past the + * source Tensor's size. + */ + public void fetch(String outputName, LongBuffer dst) { + getTensor(outputName).writeTo(dst); + } + /** * Read from a Tensor named {@link outputName} and copy the contents into the direct and * native ordered java.nio buffer {@link dst}. {@link dst} must have capacity greater than From b65d9ec2b78c7c23e368ed4eec7b4deb89dcd712 Mon Sep 17 00:00:00 2001 From: Batchu Venkat Vishal Date: Sat, 22 Jul 2017 01:59:29 +0530 Subject: [PATCH 5/8] Fix value error generated on is_scalar check (#10391) * Fix value error generated on is_scalar check `is_scalar = shape is not None and not shape` raises a value error when shape is a scalar, "ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" * Update variable_scope.py * Fix --- tensorflow/python/ops/variable_scope.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index c2193a24d0505e..7077f679adae63 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -300,7 +300,8 @@ def _true_getter(name, shape=None, dtype=dtypes.float32, # pylint: disable=miss initializer=None, regularizer=None, reuse=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, use_resource=None): - is_scalar = shape is not None and not shape + is_scalar = (shape is not None and isinstance(shape, collections_lib.Sequence) + and len(shape) == 0) # Partitioned variable case if partitioner is not None and not is_scalar: if not callable(partitioner): From eaa7a8ed93ccac595eab412dc7e021202c155f5d Mon Sep 17 00:00:00 2001 From: abenmao Date: Sat, 22 Jul 2017 12:40:33 +0800 Subject: [PATCH 6/8] grappler swap_to_host bug fix (#11283) --- tensorflow/core/grappler/optimizers/memory_optimizer.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 31e525fb4a8d58..ae5b8123c6c2cc 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -490,6 +490,9 @@ std::pair BuildSwapPair(NodeDef* node, int input_to_swap, (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); + const DataType input_type = node->attr().at("T").type(); + (*swap_in_node->mutable_attr())["T"].set_type(input_type); + (*swap_out_node->mutable_attr())["T"].set_type(input_type); return std::make_pair(swap_out_node, swap_in_node); } From 2b47c6b3390cc333a340afa40b567cb4a2dde1b6 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Sat, 22 Jul 2017 14:44:43 -0700 Subject: [PATCH 7/8] More fixes to arm build scripts; move tools/arm_compiler -> third_party/toolchains/cpus/arm. --- tensorflow/workspace.bzl | 13 ++++++++----- .../toolchains/cpus/arm}/BUILD | 0 .../toolchains/cpus/arm}/CROSSTOOL.tpl | 0 .../toolchains/cpus/arm}/arm_compiler_configure.bzl | 2 +- .../toolchains/cpus/arm}/build_raspberry_pi.sh | 4 +--- 5 files changed, 10 insertions(+), 9 deletions(-) rename {tools/arm_compiler => third_party/toolchains/cpus/arm}/BUILD (100%) rename {tools/arm_compiler => third_party/toolchains/cpus/arm}/CROSSTOOL.tpl (100%) rename {tools/arm_compiler => third_party/toolchains/cpus/arm}/arm_compiler_configure.bzl (91%) rename {tools/arm_compiler => third_party/toolchains/cpus/arm}/build_raspberry_pi.sh (99%) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e031f9b89ab889..7b9f5d74cf86fb 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -2,10 +2,12 @@ load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") -load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") +load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", + "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") load("//third_party/py:python_configure.bzl", "python_configure") -load("//tools/arm_compiler:arm_compiler_configure.bzl", "arm_compiler_configure") +load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", + "arm_compiler_configure") def _is_windows(repository_ctx): @@ -142,9 +144,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): python_configure(name="local_config_python") # Point //external/local_config_arm_compiler to //external/arm_compiler - arm_compiler_configure(name="local_config_arm_compiler", - remote_config_repo="../arm_compiler", - build_file = str(Label("//tools/arm_compiler:BUILD"))) + arm_compiler_configure( + name="local_config_arm_compiler", + remote_config_repo="../arm_compiler", + build_file = str(Label("//third_party/toolchains/cpus/arm:BUILD"))) if path_prefix: print("path_prefix was specified to tf_workspace but is no longer used " + diff --git a/tools/arm_compiler/BUILD b/third_party/toolchains/cpus/arm/BUILD similarity index 100% rename from tools/arm_compiler/BUILD rename to third_party/toolchains/cpus/arm/BUILD diff --git a/tools/arm_compiler/CROSSTOOL.tpl b/third_party/toolchains/cpus/arm/CROSSTOOL.tpl similarity index 100% rename from tools/arm_compiler/CROSSTOOL.tpl rename to third_party/toolchains/cpus/arm/CROSSTOOL.tpl diff --git a/tools/arm_compiler/arm_compiler_configure.bzl b/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl similarity index 91% rename from tools/arm_compiler/arm_compiler_configure.bzl rename to third_party/toolchains/cpus/arm/arm_compiler_configure.bzl index 9fa7fc54e87ca2..5eb3b7bb1c6467 100644 --- a/tools/arm_compiler/arm_compiler_configure.bzl +++ b/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl @@ -6,7 +6,7 @@ def _tpl(repository_ctx, tpl, substitutions={}, out=None): out = tpl repository_ctx.template( out, - Label("//tools/arm_compiler:%s.tpl" % tpl), + Label("//third_party/toolchains/cpus/arm:%s.tpl" % tpl), substitutions) diff --git a/tools/arm_compiler/build_raspberry_pi.sh b/third_party/toolchains/cpus/arm/build_raspberry_pi.sh similarity index 99% rename from tools/arm_compiler/build_raspberry_pi.sh rename to third_party/toolchains/cpus/arm/build_raspberry_pi.sh index 97034c68bef44b..270499d387f683 100755 --- a/tools/arm_compiler/build_raspberry_pi.sh +++ b/third_party/toolchains/cpus/arm/build_raspberry_pi.sh @@ -7,7 +7,7 @@ set -e # To install the architecture includes for python on ubuntu trusty; # run: # sudo dpkg --add-architecture armhf -# echo "deb [arch=armhf] http://ports.ubuntu.com/ trusty main universe" \ +# echo "deb [arch=armhf] http://ports.ubuntu.com/ trusty main universe" | \ # sudo tee -a /etc/apt/sources.list.d/armhf.list # # Ignore errors about missing armhf packages in other repos. # sudo aptitude update @@ -40,5 +40,3 @@ cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${TMPDIR}" echo "Output can be found here:" find "${TMPDIR}" - - From cd4008f8865cb1b1d0876fa1b3595b22a246961f Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Sat, 22 Jul 2017 14:50:44 -0700 Subject: [PATCH 8/8] Update comments in build_raspberry_pi: get ubuntu ports for your lsb codename. --- third_party/toolchains/cpus/arm/build_raspberry_pi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/toolchains/cpus/arm/build_raspberry_pi.sh b/third_party/toolchains/cpus/arm/build_raspberry_pi.sh index 270499d387f683..251a53472a2e6b 100755 --- a/third_party/toolchains/cpus/arm/build_raspberry_pi.sh +++ b/third_party/toolchains/cpus/arm/build_raspberry_pi.sh @@ -7,7 +7,7 @@ set -e # To install the architecture includes for python on ubuntu trusty; # run: # sudo dpkg --add-architecture armhf -# echo "deb [arch=armhf] http://ports.ubuntu.com/ trusty main universe" | \ +# echo "deb [arch=armhf] http://ports.ubuntu.com/ $(lsb_release -s -c) main universe" | \ # sudo tee -a /etc/apt/sources.list.d/armhf.list # # Ignore errors about missing armhf packages in other repos. # sudo aptitude update