Merge remote-tracking branch 'upstream/tf_master' into tf_master

ebrevdo · Jul 23, 2017 · 3c379a6 · 3c379a6
2 parents 298cf40 + cd4008f
commit 3c379a6
Show file tree

Hide file tree

Showing 11 changed files with 62 additions and 11 deletions.
diff --git a/tensorflow/compiler/plugin/executor/executable.cc b/tensorflow/compiler/plugin/executor/executable.cc
@@ -47,13 +47,14 @@ static se::DeviceMemoryBase AllocateOutputBuffer(sep::ExecutorExecutor* executor
   } else {
     int64 size(xla::ShapeUtil::ByteSizeOf(shape, sizeof(void*)));
     void** buf = reinterpret_cast<void**>(executor->Allocate(size));
+    void** buf_rc = buf;
     for (int64 n = 0; n < xla::ShapeUtil::TupleElementCount(shape); n++) {
       se::DeviceMemoryBase out =
           AllocateSingleOutput(executor, literal.tuple_literals(n));
       *buf++ = out.opaque();
     }
 
-    return se::DeviceMemoryBase(buf, size);
+    return se::DeviceMemoryBase(buf_rc, size);
   }
 }
 

diff --git a/...low/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/...low/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
@@ -27,6 +27,7 @@
 import java.nio.DoubleBuffer;
 import java.nio.FloatBuffer;
 import java.nio.IntBuffer;
+import java.nio.LongBuffer;
 import java.util.ArrayList;
 import java.util.List;
 import org.tensorflow.DataType;
@@ -226,6 +227,16 @@ public void feed(String inputName, int[] src, long... dims) {
     addFeed(inputName, Tensor.create(dims, IntBuffer.wrap(src)));
   }
 
+  /**
+   * Given a source array with shape {@link dims} and content {@link src}, copy the contents into
+   * the input Tensor with name {@link inputName}. The source array {@link src} must have at least
+   * as many elements as that of the destination Tensor. If {@link src} has more elements than the
+   * destination has capacity, the copy is truncated.
+   */
+  public void feed(String inputName, long[] src, long... dims) {
+    addFeed(inputName, Tensor.create(dims, LongBuffer.wrap(src)));
+  }
+
   /**
    * Given a source array with shape {@link dims} and content {@link src}, copy the contents into
    * the input Tensor with name {@link inputName}. The source array {@link src} must have at least
@@ -270,6 +281,17 @@ public void feed(String inputName, IntBuffer src, long... dims) {
     addFeed(inputName, Tensor.create(dims, src));
   }
 
+  /**
+   * Given a source buffer with shape {@link dims} and content {@link src}, both stored as
+   * <b>direct</b> and <b>native ordered</b> java.nio buffers, copy the contents into the input
+   * Tensor with name {@link inputName}. The source buffer {@link src} must have at least as many
+   * elements as that of the destination Tensor. If {@link src} has more elements than the
+   * destination has capacity, the copy is truncated.
+   */
+  public void feed(String inputName, LongBuffer src, long... dims) {
+    addFeed(inputName, Tensor.create(dims, src));
+  }
+
   /**
    * Given a source buffer with shape {@link dims} and content {@link src}, both stored as
    * <b>direct</b> and <b>native ordered</b> java.nio buffers, copy the contents into the input
@@ -310,6 +332,15 @@ public void fetch(String outputName, int[] dst) {
     fetch(outputName, IntBuffer.wrap(dst));
   }
 
+  /**
+   * Read from a Tensor named {@link outputName} and copy the contents into a Java array. {@link
+   * dst} must have length greater than or equal to that of the source Tensor. This operation will
+   * not affect dst's content past the source Tensor's size.
+   */
+  public void fetch(String outputName, long[] dst) {
+    fetch(outputName, LongBuffer.wrap(dst));
+  }
+
   /**
    * Read from a Tensor named {@link outputName} and copy the contents into a Java array. {@link
    * dst} must have length greater than or equal to that of the source Tensor. This operation will
@@ -348,6 +379,16 @@ public void fetch(String outputName, IntBuffer dst) {
     getTensor(outputName).writeTo(dst);
   }
 
+  /**
+   * Read from a Tensor named {@link outputName} and copy the contents into the <b>direct</b> and
+   * <b>native ordered</b> java.nio buffer {@link dst}. {@link dst} must have capacity greater than
+   * or equal to that of the source Tensor. This operation will not affect dst's content past the
+   * source Tensor's size.
+   */
+  public void fetch(String outputName, LongBuffer dst) {
+    getTensor(outputName).writeTo(dst);
+  }
+
   /**
    * Read from a Tensor named {@link outputName} and copy the contents into the <b>direct</b> and
    * <b>native ordered</b> java.nio buffer {@link dst}. {@link dst} must have capacity greater than

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -143,8 +143,10 @@ tensorflow/core/kernels/cwise_op_minimum.cc
 tensorflow/core/kernels/cwise_op_maximum.cc
 tensorflow/core/kernels/cwise_op_logical_not.cc
 tensorflow/core/kernels/cwise_op_logical_and.cc
+tensorflow/core/kernels/cwise_op_logical_or.cc
 tensorflow/core/kernels/cwise_op_log.cc
 tensorflow/core/kernels/cwise_op_less.cc
+tensorflow/core/kernels/cwise_op_less_equal.cc
 tensorflow/core/kernels/cwise_op_isfinite.cc
 tensorflow/core/kernels/cwise_op_invert.cc
 tensorflow/core/kernels/cwise_op_greater_equal.cc

diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -490,6 +490,9 @@ std::pair<NodeDef*, NodeDef*> BuildSwapPair(NodeDef* node, int input_to_swap,
   (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group);
   (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group);
 
+  const DataType input_type = node->attr().at("T").type();
+  (*swap_in_node->mutable_attr())["T"].set_type(input_type);
+  (*swap_out_node->mutable_attr())["T"].set_type(input_type);
   return std::make_pair(swap_out_node, swap_in_node);
 }
 

diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
@@ -102,6 +102,8 @@ message OptimizerOptions {
     L0 = -1;
   }
 
+  // Overall optimization level. The actual optimizations applied will be the
+  // logical OR of the flags that this level implies and any flags already set.
   Level opt_level = 3;
 
   // Control the use of the compiler/jit.  Experimental.

diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
@@ -300,7 +300,8 @@ def _true_getter(name, shape=None, dtype=dtypes.float32,  # pylint: disable=miss
                      initializer=None, regularizer=None, reuse=None,
                      trainable=True, collections=None, caching_device=None,
                      partitioner=None, validate_shape=True, use_resource=None):
-      is_scalar = shape is not None and not shape
+      is_scalar = (shape is not None and isinstance(shape, collections_lib.Sequence)
+                   and len(shape) == 0)
       # Partitioned variable case
       if partitioner is not None and not is_scalar:
         if not callable(partitioner):

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
@@ -2,10 +2,12 @@
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
 load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
-load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external")
+load("@io_bazel_rules_closure//closure/private:java_import_external.bzl",
+     "java_import_external")
 load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external")
 load("//third_party/py:python_configure.bzl", "python_configure")
-load("//tools/arm_compiler:arm_compiler_configure.bzl", "arm_compiler_configure")
+load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl",
+     "arm_compiler_configure")
 
 
 def _is_windows(repository_ctx):
@@ -142,9 +144,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   python_configure(name="local_config_python")
 
   # Point //external/local_config_arm_compiler to //external/arm_compiler
-  arm_compiler_configure(name="local_config_arm_compiler",
-                         remote_config_repo="../arm_compiler",
-                         build_file = str(Label("//tools/arm_compiler:BUILD")))
+  arm_compiler_configure(
+      name="local_config_arm_compiler",
+      remote_config_repo="../arm_compiler",
+      build_file = str(Label("//third_party/toolchains/cpus/arm:BUILD")))
 
   if path_prefix:
     print("path_prefix was specified to tf_workspace but is no longer used " +

diff --git a/tools/arm_compiler/BUILD → third_party/toolchains/cpus/arm/BUILD b/tools/arm_compiler/BUILD → third_party/toolchains/cpus/arm/BUILD
diff --git a/tools/arm_compiler/CROSSTOOL.tpl → ...d_party/toolchains/cpus/arm/CROSSTOOL.tpl b/tools/arm_compiler/CROSSTOOL.tpl → ...d_party/toolchains/cpus/arm/CROSSTOOL.tpl
diff --git a/...s/arm_compiler/arm_compiler_configure.bzl → ...hains/cpus/arm/arm_compiler_configure.bzl b/...s/arm_compiler/arm_compiler_configure.bzl → ...hains/cpus/arm/arm_compiler_configure.bzl
@@ -6,7 +6,7 @@ def _tpl(repository_ctx, tpl, substitutions={}, out=None):
     out = tpl
   repository_ctx.template(
       out,
-      Label("//tools/arm_compiler:%s.tpl" % tpl),
+      Label("//third_party/toolchains/cpus/arm:%s.tpl" % tpl),
       substitutions)
 
 

diff --git a/tools/arm_compiler/build_raspberry_pi.sh → ...toolchains/cpus/arm/build_raspberry_pi.sh b/tools/arm_compiler/build_raspberry_pi.sh → ...toolchains/cpus/arm/build_raspberry_pi.sh
@@ -7,7 +7,7 @@ set -e
 # To install the architecture includes for python on ubuntu trusty;
 # run:
 #  sudo dpkg --add-architecture armhf
-#  echo "deb [arch=armhf] http://ports.ubuntu.com/ trusty main universe" \
+#  echo "deb [arch=armhf] http://ports.ubuntu.com/ $(lsb_release -s -c) main universe" | \
 #    sudo tee -a /etc/apt/sources.list.d/armhf.list
 #  # Ignore errors about missing armhf packages in other repos.
 #  sudo aptitude update
@@ -40,5 +40,3 @@ cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${TMPDIR}"
 
 echo "Output can be found here:"
 find "${TMPDIR}"
-
-