From 45be2eab777fb58256c0f730d11cec8f0436f067 Mon Sep 17 00:00:00 2001 From: Po-Hsien Chu Date: Wed, 29 Sep 2021 01:38:45 +0800 Subject: [PATCH] Add Pixel backend (#77) * Add Pixel backend * add license header to resize_argmax_op* --- Makefile | 7 +- android/java/org/mlperf/inference/BUILD | 42 +- .../org/mlperf/inference/Backends.java.in | 2 +- mobile_back_pixel/BUILD | 9 + mobile_back_pixel/Makefile | 111 ++++ mobile_back_pixel/README.md | 14 + mobile_back_pixel/cpp/BUILD | 18 + mobile_back_pixel/cpp/backend_tflite/BUILD | 84 +++ .../cpp/backend_tflite/resize_argmax_op.cc | 362 +++++++++++++ .../cpp/backend_tflite/resize_argmax_op.h | 22 + .../cpp/backend_tflite/tflite_pixel.cc | 484 ++++++++++++++++++ .../backend_tflite/tflite_settings_pixel.h | 134 +++++ .../cpp/backend_tflite/thread_pool.h | 110 ++++ mobile_back_pixel/pixel_backend.mk | 21 + mobile_back_pixel/third_party/BUILD.bazel | 19 + .../tf_gpu_delegate_fix_from_tf_master.diff | 37 ++ .../third_party/tf_grappler_cost.diff | 19 + .../cpp/backend_tflite/tflite_settings.h | 4 +- 18 files changed, 1480 insertions(+), 19 deletions(-) create mode 100644 mobile_back_pixel/BUILD create mode 100644 mobile_back_pixel/Makefile create mode 100644 mobile_back_pixel/README.md create mode 100644 mobile_back_pixel/cpp/BUILD create mode 100644 mobile_back_pixel/cpp/backend_tflite/BUILD create mode 100644 mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.cc create mode 100644 mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.h create mode 100644 mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc create mode 100644 mobile_back_pixel/cpp/backend_tflite/tflite_settings_pixel.h create mode 100644 mobile_back_pixel/cpp/backend_tflite/thread_pool.h create mode 100644 mobile_back_pixel/pixel_backend.mk create mode 100644 mobile_back_pixel/third_party/BUILD.bazel create mode 100644 mobile_back_pixel/third_party/tf_gpu_delegate_fix_from_tf_master.diff create mode 100644 mobile_back_pixel/third_party/tf_grappler_cost.diff diff --git a/Makefile b/Makefile index ed3db4953..945ae3103 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ SAMSUNG_BACKEND= all: app include mobile_back_tflite/tflite_backend.mk +include mobile_back_pixel/pixel_backend.mk include mobile_back_qti/make/qti_backend.mk @@ -95,7 +96,7 @@ app: output/mlperf_mobile_docker_1_0.stamp ${QTI_DEPS} @mkdir -p output/home/mlperf/cache && chmod 777 output/home/mlperf/cache @docker run \ ${COMMON_DOCKER_FLAGS} \ - ${QTI_BACKEND} ${SAMSUNG_BACKEND} ${MEDIATEK_BACKEND} \ + ${QTI_BACKEND} ${SAMSUNG_BACKEND} ${MEDIATEK_BACKEND} ${PIXEL_BACKEND} \ --fat_apk_cpu=arm64-v8a \ //android/java/org/mlperf/inference:mlperf_app @cp output/`readlink bazel-bin`/android/java/org/mlperf/inference/mlperf_app.apk output/mlperf_app.apk @@ -106,7 +107,7 @@ app_x86_64: output/mlperf_mobile_docker_1_0.stamp @mkdir -p output/home/mlperf/cache && chmod 777 output/home/mlperf/cache @docker run \ ${COMMON_DOCKER_FLAGS} \ - ${QTI_BACKEND} ${SAMSUNG_BACKEND} ${MEDIATEK_BACKEND} \ + ${QTI_BACKEND} ${SAMSUNG_BACKEND} ${MEDIATEK_BACKEND} ${PIXEL_BACKEND} \ --fat_apk_cpu=x86_64 \ //android/java/org/mlperf/inference:mlperf_app @cp output/`readlink bazel-bin`/android/java/org/mlperf/inference/mlperf_app.apk output/mlperf_app_x86_64.apk @@ -117,7 +118,7 @@ test_app: output/mlperf_mobile_docker_1_0.stamp @mkdir -p output/home/mlperf/cache && chmod 777 output/home/mlperf/cache @docker run \ ${COMMON_DOCKER_FLAGS} \ - ${QTI_BACKEND} ${SAMSUNG_BACKEND} ${MEDIATEK_BACKEND} \ + ${QTI_BACKEND} ${SAMSUNG_BACKEND} ${MEDIATEK_BACKEND} ${PIXEL_BACKEND} \ --fat_apk_cpu=x86_64,arm64-v8a \ //androidTest:mlperf_test_app @cp output/`readlink bazel-bin`/android/androidTest/mlperf_test_app.apk output/mlperf_test_app.apk diff --git a/android/java/org/mlperf/inference/BUILD b/android/java/org/mlperf/inference/BUILD index 490a2f004..3d3c28001 100644 --- a/android/java/org/mlperf/inference/BUILD +++ b/android/java/org/mlperf/inference/BUILD @@ -58,6 +58,18 @@ config_setting( }, ) +string_flag( + name = "with_pixel", + build_setting_default = "0", +) + +config_setting( + name = "use_pixel", + flag_values = { + ":with_pixel": "1", + }, +) + java_lite_proto_library( name = "mlperf_task_java_proto_lite", deps = ["//android/cpp/proto:mlperf_task_proto"], @@ -72,16 +84,19 @@ genrule( "Backends.java", ], cmd = "cat $(location Backends.java.in) " + - select({ - ":use_qti": "| sed -e 's/QTI/\"qti\",/' ", - "//conditions:default": "| sed -e 's/QTI//' ", - }) + select({ - ":use_samsung": "| sed -e 's/SAMSUNG/\"samsung\",/' ", - "//conditions:default": "| sed -e 's/SAMSUNG//'", - }) + select({ - ":use_mediatek": "| sed -e 's/MEDIATEK/\"tfliteneuron\",/' ", - "//conditions:default": "| sed -e 's/MEDIATEK//' ", - }) + " > $(@)", + select({ + ":use_pixel": "| sed -e 's/PIXEL/\"tflitepixel\",/' ", + "//conditions:default": "| sed -e 's/PIXEL//' ", + }) + select({ + ":use_qti": "| sed -e 's/QTI/\"qti\",/' ", + "//conditions:default": "| sed -e 's/QTI//' ", + }) + select({ + ":use_samsung": "| sed -e 's/SAMSUNG/\"samsung\",/' ", + "//conditions:default": "| sed -e 's/SAMSUNG//'", + }) + select({ + ":use_mediatek": "| sed -e 's/MEDIATEK/\"tfliteneuron\",/' ", + "//conditions:default": "| sed -e 's/MEDIATEK//' ", + }) + " > $(@)", ) android_library( @@ -182,13 +197,14 @@ cc_library( deps = [ "//mobile_back_tflite:tflitebackend", ] + select({ - ":use_samsung": [ - "//mobile_back_samsung:samsungbackend", - ], + ":use_samsung": ["//mobile_back_samsung:samsungbackend"], "//conditions:default": [], }) + select({ ":use_mediatek": ["//mobile_back_tflite:tfliteneuronbackend"], "//conditions:default": [], + }) + select({ + ":use_pixel": ["//mobile_back_pixel:tflitepixelbackend"], + "//conditions:default": [], }), ) diff --git a/android/java/org/mlperf/inference/Backends.java.in b/android/java/org/mlperf/inference/Backends.java.in index 5258258d3..953eac354 100644 --- a/android/java/org/mlperf/inference/Backends.java.in +++ b/android/java/org/mlperf/inference/Backends.java.in @@ -5,5 +5,5 @@ public final class Backends { private Backends() { } - public static final String[] BACKEND_LIST = {QTI SAMSUNG MEDIATEK "tflite"}; + public static final String[] BACKEND_LIST = {QTI SAMSUNG MEDIATEK PIXEL "tflite"}; } diff --git a/mobile_back_pixel/BUILD b/mobile_back_pixel/BUILD new file mode 100644 index 000000000..70c579e08 --- /dev/null +++ b/mobile_back_pixel/BUILD @@ -0,0 +1,9 @@ +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "tflitepixelbackend", + srcs = ["//mobile_back_pixel/cpp/backend_tflite:libtflitepixelbackend.so"], +) diff --git a/mobile_back_pixel/Makefile b/mobile_back_pixel/Makefile new file mode 100644 index 000000000..52e7cb201 --- /dev/null +++ b/mobile_back_pixel/Makefile @@ -0,0 +1,111 @@ +# Copyright 2020-2021 The MLPerf Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +this_mkfile := $(abspath $(lastword $(MAKEFILE_LIST))) +TOPDIR := $(shell dirname ${this_mkfile}) +BUILDDIR=${TOPDIR}/build + +USERID=$(shell id -u) +GROUPID=$(shell id -g) + +SRCFILES=\ +Makefile \ +cpp/BUILD \ +cpp/c/BUILD \ +cpp/c/version_script.lds \ +cpp/c/backend_c.h \ +cpp/c/type.h \ +cpp/backend_tflite/tflite_c.cc \ +cpp/backend_tflite/tflite_settings.h \ +cpp/backend_tflite/utils.h \ +cpp/backend_tflite/BUILD + +ifeq (${USE_PROXY_WORKAROUND},1) + export PROXY_WORKAROUND1=\ + -v /etc/ssl/certs:/etc/ssl/certs \ + -v /usr/share/ca-certificates:/usr/share/ca-certificates \ + -v /usr/share/ca-certificates-java:/usr/share/ca-certificates-java + + export PROXY_WORKAROUND2=--host_jvm_args -Djavax.net.ssl.trustStore=/etc/ssl/certs/java/cacerts +else + export PROXY_WORKAROUND1= + export PROXY_WORKAROUND2= +endif + +all: ${BUILDDIR}/lib_x86_64/libtflitebackend.so ${BUILDDIR}/lib_arm64/libtflitebackend.so + +.PHONY: clean docker_image + +USER_ID=$(shell id -u) + +${BUILDDIR}/mlperf_mobile_image.stamp: ${TOPDIR}/docker/mlperf_mobile/Dockerfile + @mkdir -p ${BUILDDIR} + @docker image build -t mlcommons/mlperf_mobile:1.0 docker/mlperf_mobile + @touch $@ + +docker_image: ${BUILDDIR}/mlperf_mobile_image.stamp + +${BUILDDIR}/lib_arm64/libtflitebackend.so: ${SRCFILES} docker_image + @echo "Building arm64 libtflitebackend.so" + @mkdir -p ${BUILDDIR}/lib_arm64 + @mkdir -p ${BUILDDIR}/cache + @docker run \ + -e USER=mlperf \ + ${PROXY_WORKAROUND1} \ + -v $(CURDIR):/tflite_backend \ + -v ${BUILDDIR}/cache:/cache \ + -w /tflite_backend \ + -u ${USERID}:${GROUPID} \ + mlcommons/mlperf_mobile:1.0 bazel-3.7.2 ${PROXY_WORKAROUND2} \ + --output_user_root=/cache/bazel build \ + -c opt --cxxopt='--std=c++14' \ + --cxxopt='-Wno-deprecated-declarations' \ + --cxxopt='-Wno-unknown-attributes' \ + --host_cxxopt='--std=c++14' \ + --host_cxxopt='-Wno-deprecated-declarations' \ + --host_cxxopt='-Wno-class-memaccess' \ + --config=android_arm64 \ + :tflitebackend + @cp build/`readlink bazel-out`/arm64-v8a-opt/bin/cpp/backend_tflite/libtflitebackend.so $@ + @chmod 777 $@ + +${BUILDDIR}/lib_x86_64/libtflitebackend.so: ${SRCFILES} docker_image + @echo "Building x86_64 libtflitebackend.so" + @mkdir -p ${BUILDDIR}/lib_x86_64 + @mkdir -p ${BUILDDIR}/cache + docker run \ + -e USER=mlperf \ + ${PROXY_WORKAROUND1} \ + -v $(CURDIR):/tflite_backend \ + -v ${BUILDDIR}/cache:/cache \ + -w /tflite_backend \ + -u ${USERID}:${GROUPID} \ + mlcommons/mlperf_mobile:1.0 bazel-3.7.2 ${PROXY_WORKAROUND2} \ + --output_user_root=/cache/bazel build \ + -c opt --cxxopt='--std=c++14' \ + --cxxopt='-Wno-deprecated-declarations' \ + --cxxopt='-Wno-unknown-attributes' \ + --host_cxxopt='--std=c++14' \ + --host_cxxopt='-Wno-deprecated-declarations' \ + --host_cxxopt='-Wno-class-memaccess' \ + --config=android_x86_64 \ + :tflitebackend + @cp build/`readlink bazel-out`/x86_64-opt/bin/cpp/backend_tflite/libtflitebackend.so $@ + @chmod 777 $@ + +clean: + @rm -rf bazel-bin bazel-testlogs bazel-out bazel-tflite_backend + @rm -rf ${BUILDDIR} + diff --git a/mobile_back_pixel/README.md b/mobile_back_pixel/README.md new file mode 100644 index 000000000..6063383b9 --- /dev/null +++ b/mobile_back_pixel/README.md @@ -0,0 +1,14 @@ +# Mobile backend tflite + +Build so-library for selected architecture: +```bash +bazel build -c opt \ + --cxxopt='--std=c++14' \ + --host_cxxopt='--std=c++14' \ + --host_cxxopt='-Wno-deprecated-declarations' \ + --host_cxxopt='-Wno-class-memaccess' \ + --cxxopt='-Wno-deprecated-declarations' \ + --cxxopt='-Wno-unknown-attributes' \ + --fat_apk_cpu={x86_64|arm64-v8a|armeabi-v7a} \ + //cpp/backend_tflite:libtflitebackend.so +``` diff --git a/mobile_back_pixel/cpp/BUILD b/mobile_back_pixel/cpp/BUILD new file mode 100644 index 000000000..cb9c72e13 --- /dev/null +++ b/mobile_back_pixel/cpp/BUILD @@ -0,0 +1,18 @@ +# Copyright 2019 The MLPerf Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) diff --git a/mobile_back_pixel/cpp/backend_tflite/BUILD b/mobile_back_pixel/cpp/backend_tflite/BUILD new file mode 100644 index 000000000..874b71441 --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/BUILD @@ -0,0 +1,84 @@ +# Copyright 2019-2021 The MLPerf Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +load( + "@org_tensorflow//tensorflow/lite:build_def.bzl", + "tflite_copts", + "tflite_jni_binary", +) + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "resize_bilinear_op", + srcs = [ + "resize_argmax_op.cc", + ], + hdrs = [ + "resize_argmax_op.h", + ], + deps = [ + "@org_tensorflow//tensorflow/lite/c:common", + "@org_tensorflow//tensorflow/lite/kernels:kernel_util", + "@org_tensorflow//tensorflow/lite/core/api", + "@org_tensorflow//tensorflow/lite/kernels:cpu_backend_context", + "@org_tensorflow//tensorflow/lite/kernels:cpu_backend_threadpool", + ], +) + +cc_library( + name = "tflite_pixel", + srcs = [ + "tflite_pixel.cc", + ], + hdrs = [ + "tflite_settings_pixel.h", + "thread_pool.h", + ], + copts = tflite_copts() + select({ + "//android/commonlibs:use_asan": [ + "-fsanitize=address", + "-g", + "-O1", + "-fno-omit-frame-pointer", + ], + "//conditions:default": [], + }), + deps = [ + ":resize_bilinear_op", + "//android/cpp/c:headers", + "@org_tensorflow//tensorflow/lite/c:c_api", + "@org_tensorflow//tensorflow/lite/c:c_api_experimental", + "@org_tensorflow//tensorflow/lite/c:common", + "@org_tensorflow//tensorflow/core:tflite_portable_logging", + ] + select({ + "@org_tensorflow//tensorflow:android": [ + "@org_tensorflow//tensorflow/lite/delegates/gpu:delegate", + ], + "//conditions:default": [], + }), + alwayslink = 1, +) + +tflite_jni_binary( + name = "libtflitepixelbackend.so", + exported_symbols = "//android/cpp/c:exported_symbols.lds", + linkscript = "//android/cpp/c:version_script.lds", + deps = [ + ":tflite_pixel", + ], +) diff --git a/mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.cc b/mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.cc new file mode 100644 index 000000000..f18578e5a --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.cc @@ -0,0 +1,362 @@ +/* Copyright 2021 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include +#include +#ifdef __ARM_NEON +#include +#endif + +#include "resize_argmax_op.h" +#include "tensorflow/lite/core/api/profiler.h" +#include "tensorflow/lite/kernels/cpu_backend_context.h" +#include "tensorflow/lite/kernels/cpu_backend_threadpool.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +constexpr int kInputTensor = 0; +constexpr int kSizeTensor = 1; +constexpr int kAxis = 2; +constexpr int kOutputTensor = 0; + +using namespace tflite; + +namespace { + +inline int ArgMaxVector(const uint8_t* input_data, int size) { + int32_t max_index = 0; + uint8_t max_value = input_data[0]; + int32_t i = 0; +#ifdef __ARM_NEON + constexpr int VECTOR_SIZE = 16; + if (size >= VECTOR_SIZE) { + uint8x16_t max_value_u8x16; + for (; i <= size - VECTOR_SIZE; i += VECTOR_SIZE) { + max_value_u8x16 = vld1q_u8(input_data + i); + uint8_t max_from_vec; + max_from_vec = vmaxvq_u8(max_value_u8x16); + if (max_from_vec > max_value) { + max_value = max_from_vec; + max_index = i; + } + } + } + for (int start_idx = max_index; start_idx < max_index + VECTOR_SIZE; + start_idx++) { + if (input_data[start_idx] == max_value) { + max_index = start_idx; + break; + } + } + +#endif // __aarch64__ + // Leftover loop. + for (; i < size; ++i) { + const uint8_t curr_value = input_data[i]; + if (curr_value > max_value) { + max_value = curr_value; + max_index = i; + } + } + + return max_index; +} + +inline void fill_bilinear_row(uint32_t* row, uint32_t left_val, + uint32_t right_val) { + row[0] = left_val; + row[1] = (left_val * 3 + right_val) / 4; + row[2] = (left_val + right_val) / 2; + row[3] = (left_val + right_val * 3) / 4; +} + +#ifdef __ARM_NEON +constexpr uint32_t left_multipliers[] = {4, 3, 2, 1}; +constexpr uint32_t right_multipliers[] = {0, 1, 2, 3}; + +inline uint32x4_t fill_bilinear_row_simd(uint32_t left_val, + uint32_t right_val) { + const uint32x4_t left_multipliers_simd = vld1q_u32(left_multipliers); + const uint32x4_t right_multipliers_simd = vld1q_u32(right_multipliers); + + uint32x4_t left_top_simd = vmulq_n_u32(left_multipliers_simd, left_val); + uint32x4_t top_row_simd = + vmlaq_n_u32(left_top_simd, right_multipliers_simd, right_val); + // shift by 2 = divide by 4 + return vshrq_n_u32(top_row_simd, 2); +} + +#endif + +void resize_argmax_task_4x(uint8_t* input, int input_width, int input_height, + int input_argmax_depth, int32_t* output, + int output_width, int output_height, int start_row, + int row_count, int move, + tflite::Profiler* profiler) { + TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler, "resize_argmax_task_4x"); + uint8_t* in_ptr = input + start_row * input_width * input_argmax_depth; + int32_t* out_ptr = output + start_row * 4 * output_width; + int cnt = 0; + for (int row = start_row; row < start_row + row_count; row++) { + for (int col = 0; col < input_width; col++) { + int top_left; + int top_right; + int bottom_left; + int bottom_right; + top_left = *out_ptr; + if (row != input_height - 1 && col != input_width - 1) { + if ((row + 1 % move) > 0) { + if (col == 0) { + *(out_ptr + output_width * 4) = ArgMaxVector( + in_ptr + input_width * input_argmax_depth, input_argmax_depth); + } + *(out_ptr + output_width * 4 + 4) = ArgMaxVector( + in_ptr + input_width * input_argmax_depth + input_argmax_depth, + input_argmax_depth); + } + + top_right = *(out_ptr + 4); + bottom_left = *(out_ptr + output_width * 4); + bottom_right = *(out_ptr + output_width * 4 + 4); + } + if ((row == input_height - 1 || col == input_width - 1) || + (top_left == top_right && top_right == bottom_left && + bottom_left == bottom_right)) { + cnt++; +#ifdef __ARM_NEON + int32_t* temp_out = out_ptr; + int32x4_t f = vdupq_n_s32(top_left); + for (int square_row = 0; square_row < 4; square_row++) { + vst1q_s32(temp_out, f); + temp_out += output_width; + } +#else + for (int i = 1; i <= 3; ++i) { + out_ptr[i] = top_left; + } + for (int i = 1; i <= 3; ++i) { + memcpy(out_ptr + i * output_width, out_ptr, sizeof(int32_t) * 4); + } +#endif + } else { + uint8_t* top_right_col = in_ptr + input_argmax_depth; + uint8_t* top_left_col = in_ptr; + uint8_t* bot_right_col = + in_ptr + input_width * input_argmax_depth + input_argmax_depth; + uint8_t* bot_left_col = in_ptr + input_width * input_argmax_depth; + +#ifdef __ARM_NEON + uint32x4_t max_values[4]; + for (int i = 0; i < 4; i++) { + max_values[i] = vdupq_n_u32(0); + } + int32x4_t max_indices[4]; + for (int i = 0; i < 4; i++) { + max_indices[i] = vdupq_n_s32(0); + } +#else + uint32_t max_values[16] = {0}; + int32_t max_indices[16] = {0}; +#endif + for (int depth = 0; depth < input_argmax_depth; depth++) { +#ifdef __ARM_NEON + uint32x4_t simd_rows[5]; + simd_rows[0] = fill_bilinear_row_simd(*top_left_col, *top_right_col); + simd_rows[4] = fill_bilinear_row_simd(*bot_left_col, *bot_right_col); + + uint32x4_t row2 = vmlaq_n_u32(simd_rows[4], simd_rows[0], 3); + simd_rows[1] = vshrq_n_u32(row2, 2); + + uint32x4_t row3 = vaddq_u32(simd_rows[0], simd_rows[4]); + simd_rows[2] = vshrq_n_u32(row3, 1); + + uint32x4_t row4 = vmlaq_n_u32(simd_rows[0], simd_rows[4], 3); + simd_rows[3] = vshrq_n_u32(row4, 2); + + int32x4_t depth_simd = vdupq_n_s32(depth); + for (int i = 0; i < 4; i++) { + uint32x4_t mask = vcgtq_u32(simd_rows[i], max_values[i]); + max_values[i] = vbslq_u32(mask, simd_rows[i], max_values[i]); + max_indices[i] = vbslq_s32(mask, depth_simd, max_indices[i]); + } +#else + uint32_t bilinear_values[20]; + fill_bilinear_row(bilinear_values, *top_left_col, *top_right_col); + fill_bilinear_row(bilinear_values + 16, *bot_left_col, + *bot_right_col); + + for (int i = 0; i < 4; i++) { + bilinear_values[i + 4] = + (bilinear_values[i] * 3 + bilinear_values[i + 16]) / 4; + bilinear_values[i + 8] = + (bilinear_values[i] + bilinear_values[i + 16]) / 2; + bilinear_values[i + 12] = + (bilinear_values[i] + bilinear_values[i + 16] * 3) / 4; + } + + for (int i = 0; i < 16; i++) { + if (bilinear_values[i] > max_values[i]) { + max_indices[i] = depth; + max_values[i] = bilinear_values[i]; + } + } +#endif + top_right_col++; + top_left_col++; + bot_right_col++; + bot_left_col++; + } // for depth +#ifdef __ARM_NEON + int32_t* temp_out_ptr = out_ptr; + for (int square_row = 0; square_row < 4; square_row++) { + vst1q_s32(temp_out_ptr, max_indices[square_row]); + temp_out_ptr += output_width; + } +#else + int32_t* temp_out_ptr = out_ptr; + int index = 0; + for (int square_row = 0; square_row < 4; square_row++) { + for (int square_col = 0; square_col < 4; square_col++) { + temp_out_ptr[square_col] = max_indices[index]; + index++; + } + temp_out_ptr += output_width; + } +#endif + } // end regular bilinear resize + + in_ptr += input_argmax_depth; + out_ptr += 4; + } // for col + out_ptr += 3 * output_width; + } // for row +} + +} // namespace + +TfLiteStatus ResizeArgmax_Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input)); + const TfLiteTensor* size; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kSizeTensor, &size)); + const TfLiteTensor* axis; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kAxis, &axis)); + TF_LITE_ENSURE_EQ(context, *axis->data.i32, input->dims->size - 1); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kOutputTensor, &output)); + + return kTfLiteOk; +} + +struct ResizeArgmaxTask : cpu_backend_threadpool::Task { + ResizeArgmaxTask(uint8_t* input, int input_width, int input_height, + int input_argmax_depth, int32_t* output, int output_width, + int output_height, std::atomic& start_row, + int row_count, int move, tflite::Profiler* profiler) + : input_(input), + input_width_(input_width), + input_height_(input_height), + input_argmax_depth_(input_argmax_depth), + output_(output), + output_width_(output_width), + output_height_(output_height), + start_row_(start_row), + row_count_(row_count), + move_(move), + profiler_(profiler) {} + + void Run() override { + int start_row; + while ((start_row = start_row_ += move_) - move_ < row_count_) { + resize_argmax_task_4x(input_, input_width_, input_height_, + input_argmax_depth_, output_, output_width_, + output_height_, start_row - move_, move_, move_, + profiler_); + } + } + + uint8_t* input_; + int input_width_; + int input_height_; + int input_argmax_depth_; + int32_t* output_; + int output_width_; + int output_height_; + std::atomic& start_row_; + int row_count_; + int move_; + tflite::Profiler* profiler_; +}; + +TfLiteStatus ResizeArgmax_Invoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input)); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kOutputTensor, &output)); + int input_width = input->dims->data[1]; + int input_height = input->dims->data[2]; + int input_argmax_depth = input->dims->data[3]; + + int output_width = output->dims->data[1]; + int output_height = output->dims->data[2]; + tflite::CpuBackendContext* cpu_backend_context = + tflite::CpuBackendContext::GetFromContext(context); + const int thread_count = std::min(2, cpu_backend_context->max_num_threads()); + + int move = (thread_count == 1) ? 1 : 16; + uint8_t* argmax_input_ptr = input->data.uint8; + int32_t* argmax_output_ptr = output->data.i32; + for (int row = 0; row < input_height; row += move) { + for (int col = 0; col < input_width; col++) { + *argmax_output_ptr = ArgMaxVector(argmax_input_ptr, input_argmax_depth); + argmax_input_ptr += input_argmax_depth; + argmax_output_ptr += 4; + } + argmax_input_ptr += input_argmax_depth * input_width * (move - 1); + argmax_output_ptr += output_width * (4 * move - 1); + } + if (thread_count == 1) { + resize_argmax_task_4x(input->data.uint8, input_width, input_height, + input_argmax_depth, output->data.i32, output_width, + output_height, 0, input_height, 1, + (tflite::Profiler*)context->profiler); + } else { + std::vector tasks; + tasks.reserve(thread_count); + std::atomic start_row; + start_row = 0; + for (int i = 0; i < thread_count; i++) { + tasks.emplace_back(input->data.uint8, input_width, input_height, + input_argmax_depth, output->data.i32, output_width, + output_height, start_row, input->dims->data[2], move, + (tflite::Profiler*)context->profiler); + } + cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), + cpu_backend_context); + } + + return kTfLiteOk; +} + +TfLiteRegistration* Register_ResizeArgmax() { + static TfLiteRegistration r = {nullptr, nullptr, ResizeArgmax_Prepare, + ResizeArgmax_Invoke}; + return &r; +} diff --git a/mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.h b/mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.h new file mode 100644 index 000000000..9c6e139ba --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/resize_argmax_op.h @@ -0,0 +1,22 @@ +/* Copyright 2021 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef RESIZE_ARGMAX_OP_H_ +#define RESIZE_ARGMAX_OP_H_ + +#include "tensorflow/lite/c/common.h" + +TfLiteRegistration* Register_ResizeArgmax(); + +#endif \ No newline at end of file diff --git a/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc b/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc new file mode 100644 index 000000000..e78835316 --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc @@ -0,0 +1,484 @@ +/* Copyright 2021 The MLPerf Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "android/cpp/c/backend_c.h" +#include "android/cpp/c/type.h" +#include "tensorflow/lite/c/c_api.h" +#include "tensorflow/lite/c/c_api_experimental.h" +#include "tensorflow/lite/c/common.h" +#if __ANDROID__ +#include + +#include "tensorflow/lite/delegates/gpu/delegate.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" +#include "tensorflow/core/platform/logging.h" +#endif +#include "tflite_settings_pixel.h" +#include "resize_argmax_op.h" +#include "thread_pool.h" + +#define N_OFFLINE_INTEPRETERS 8 + +struct TFLiteBackendData { + const char* name = "TFLite"; + const char* vendor = "Google"; + TfLiteModel* model{nullptr}; + TfLiteInterpreterOptions* options[N_OFFLINE_INTEPRETERS] = {}; + TfLiteInterpreter* interpreter[N_OFFLINE_INTEPRETERS] = {}; + TfLiteInterpreter* interpreter8[N_OFFLINE_INTEPRETERS] = {}; + uint32_t batch_size = 64; + int32_t input_tensor_count; + void** acc_data[N_OFFLINE_INTEPRETERS] = {}; + std::unique_ptr executer; + bool use_shard = false; + bool has_temp_data = false; + int32_t original_tensor_size = 0; + + std::future status[N_OFFLINE_INTEPRETERS]; +}; + +static bool backendExists = false; + +inline mlperf_data_t::Type TfType2Type(TfLiteType type) { + switch (type) { + case kTfLiteFloat32: + return mlperf_data_t::Float32; + case kTfLiteUInt8: + return mlperf_data_t::Uint8; + case kTfLiteInt8: + return mlperf_data_t::Int8; + case kTfLiteFloat16: + return mlperf_data_t::Float16; + case kTfLiteInt32: + return mlperf_data_t::Int32; + case kTfLiteInt64: + return mlperf_data_t::Int64; + default: + printf("TfLiteType %d not supported", type); + return mlperf_data_t::Float32; + } +} + +size_t TFLiteNumElements(const TfLiteTensor* tensor) { + size_t result = 1; + for (int i = 0; i < TfLiteTensorNumDims(tensor); ++i) { + result *= TfLiteTensorDim(tensor, i); + } + return result; +} + +// TFLite is the standard backend for all hardwares. +bool mlperf_backend_matches_hardware(const char** not_allowed_message, + const char** settings, + const mlperf_device_info_t* device_info) { + *not_allowed_message = nullptr; + *settings = tflite_settings.c_str(); + printf("TFLite backend matches hardware"); + return true; +} + +#if __ANDROID__ +bool is_emulator() { + char ro_build_characteristics[PROP_VALUE_MAX + 1]; + if (__system_property_get("ro.build.characteristics", + ro_build_characteristics)) { + char* ptr; + ptr = strstr(ro_build_characteristics, "emulator"); + if (ptr) return true; + } + return false; +} +#endif + +// Create a new backend and return the pointer to it. +mlperf_backend_ptr_t mlperf_backend_create( + const char* model_path, mlperf_backend_configuration_t* configs, + const char* native_lib_path) { + // Verify only one instance of the backend exists at any time + if (backendExists) { + printf("Error: Only one backend instance should exist at a time"); + return nullptr; + } + + TFLiteBackendData* backend_data = new TFLiteBackendData(); + + backendExists = true; + + // Load the model. + backend_data->model = TfLiteModelCreateFromFile(model_path); + if (!backend_data->model) { + printf("Failed to load model: %s", model_path); + mlperf_backend_delete(backend_data); + return nullptr; + } + + backend_data->executer = + std::unique_ptr(new Threadpool(N_OFFLINE_INTEPRETERS)); + + // Create interpreter options. + // Create interpreter options function. + auto create_option = [&](TfLiteInterpreterOptions*& option_ptr) -> void { + option_ptr = TfLiteInterpreterOptionsCreate(); + TfLiteInterpreterOptionsAddCustomOp(option_ptr, + "ResizeArgmax", + Register_ResizeArgmax(), + 1, + 999); + TfLiteDelegate* delegate = nullptr; + + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], "num_threads") == 0) { + TfLiteInterpreterOptionsSetNumThreads(option_ptr, + atoi(configs->values[i])); + } + #if __ANDROID__ + if (!is_emulator() && ((strcmp(configs->accelerator, "gpu_f16") == 0) || + (strcmp(configs->accelerator, "gpu") == 0))) { + auto options = TfLiteGpuDelegateOptionsV2Default(); + if (strcmp(configs->accelerator, "gpu_f16") == 0) + options.inference_priority1 = + TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; + delegate = TfLiteGpuDelegateV2Create(&options); + } else if (strcmp(configs->accelerator, "nnapi") == 0) { + auto options = tflite::StatefulNnApiDelegate::Options(); + options.allow_fp16 = true; + options.disallow_nnapi_cpu = true; + options.accelerator_name = "google-edgetpu"; + options.use_burst_computation = true; + delegate = new tflite::StatefulNnApiDelegate(options); + } + if (delegate != nullptr) { + TfLiteInterpreterOptionsAddDelegate(option_ptr, delegate); + } + #endif + } + }; + + for (int k = 0; k < N_OFFLINE_INTEPRETERS; k++) { + // Create Backend Option + create_option(backend_data->options[k]); + + // Create the interpreter. + backend_data->interpreter[k] = + TfLiteInterpreterCreate(backend_data->model, backend_data->options[k]); + if (!backend_data->interpreter[k]) { + // create a vanilla interpreter + backend_data->interpreter[k] = TfLiteInterpreterCreate( + backend_data->model, TfLiteInterpreterOptionsCreate()); + if (!backend_data->interpreter[k]) { + printf("Failed to create the interpreter"); + mlperf_backend_delete(backend_data); + return nullptr; + } + } + + // Create the interpreter. + backend_data->interpreter8[k] = + TfLiteInterpreterCreate(backend_data->model, backend_data->options[k]); + if (!backend_data->interpreter8[k]) { + // create a vanilla interpreter + backend_data->interpreter8[k] = TfLiteInterpreterCreate( + backend_data->model, TfLiteInterpreterOptionsCreate()); + if (!backend_data->interpreter8[k]) { + printf("Failed to create the interpreter"); + mlperf_backend_delete(backend_data); + return nullptr; + } + } + } + + backend_data->input_tensor_count = + TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]); + for (int i = 0; i < N_OFFLINE_INTEPRETERS; i++) { + backend_data->acc_data[i] = + (void**)malloc(sizeof(void*) * backend_data->input_tensor_count); + memset(backend_data->acc_data[i], 0, sizeof(void*) * backend_data->input_tensor_count); + } + + for (int k = 0; k < N_OFFLINE_INTEPRETERS; k++) { + for (int i=0; iinput_tensor_count; ++i) { + TfLiteTensor* tensor = + TfLiteInterpreterGetInputTensor(backend_data->interpreter[k], i); + int32_t* dims = (int32_t*)malloc(sizeof(int32_t) * tensor->dims->size); + dims[0] = 1; + for (int i = 1; i < tensor->dims->size; i++) { + dims[i] = tensor->dims->data[i]; + } + TfLiteInterpreterResizeInputTensor(backend_data->interpreter[k], i, + dims, tensor->dims->size); + free(dims); + } + if (kTfLiteOk != TfLiteInterpreterAllocateTensors(backend_data->interpreter[k])) { + printf("Failed to allocate tensors"); + return nullptr; + } + } + + for (int k = 0; k < N_OFFLINE_INTEPRETERS; k++) { + for (int i=0; iinput_tensor_count; ++i) { + TfLiteTensor* tensor = + TfLiteInterpreterGetInputTensor(backend_data->interpreter8[k], i); + int32_t* dims = (int32_t*)malloc(sizeof(int32_t) * tensor->dims->size); + dims[0] = 8; + for (int i = 1; i < tensor->dims->size; i++) { + dims[i] = tensor->dims->data[i]; + } + TfLiteInterpreterResizeInputTensor(backend_data->interpreter8[k], i, + dims, tensor->dims->size); + free(dims); + } + if (kTfLiteOk != TfLiteInterpreterAllocateTensors(backend_data->interpreter8[k])) { + printf("Failed to allocate tensors"); + break; + } + } + + return backend_data; +} + +// Vendor name who create this backend. +const char* mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return backend_data->vendor; +} + +// Return the name of this backend. +const char* mlperf_backend_name(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return backend_data->name; +} + +// Destroy the backend pointer and its data. +void mlperf_backend_delete(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + for (int i = 0; i < N_OFFLINE_INTEPRETERS; i++) { + if (backend_data->use_shard) { + free(backend_data->acc_data[i]); + } + backend_data->acc_data[i] = nullptr; + } + TfLiteModelDelete(backend_data->model); + for (int i = 0; i < N_OFFLINE_INTEPRETERS; i++) { + TfLiteInterpreterOptionsDelete(backend_data->options[i]); + TfLiteInterpreterDelete(backend_data->interpreter[i]); + TfLiteInterpreterDelete(backend_data->interpreter8[i]); + } + delete backend_data; + backendExists = false; +} + +// Run the inference for a sample. +mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + + // main thread for batch_size == 1 + if (!backend_data->use_shard) { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(6, &cpuset); + CPU_SET(7, &cpuset); + sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); + if (TfLiteInterpreterInvoke(backend_data->interpreter[0]) != kTfLiteOk) { + printf("Failed to run the inference"); + return MLPERF_FAILURE; + } + } + + return MLPERF_SUCCESS; +} + +// Flush the staged queries immediately. +mlperf_status_t mlperf_backend_flush_queries(mlperf_backend_ptr_t backend_ptr) { + return MLPERF_SUCCESS; +} + +// Return the number of inputs of the model. +int32_t mlperf_backend_get_input_count(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]); +} + +// Return the type of the ith input. +mlperf_data_t mlperf_backend_get_input_type(mlperf_backend_ptr_t backend_ptr, + int32_t i) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const TfLiteTensor* tensor = + TfLiteInterpreterGetInputTensor(backend_data->interpreter[0], i); + mlperf_data_t type; + type.type = TfType2Type(TfLiteTensorType(tensor)); + type.size = TFLiteNumElements(tensor); + return type; +} + +// Set the data for ith input. +mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr, + int32_t batch_index, int32_t i, + void* data) { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(6, &cpuset); + CPU_SET(7, &cpuset); + sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); + + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const int real_batch_size = backend_data->batch_size / N_OFFLINE_INTEPRETERS; + const int shard = batch_index / (real_batch_size); + if (shard == 0 && batch_index == 0 && backend_data->use_shard == false) { + backend_data->use_shard = false; + } else { + backend_data->use_shard = true; + } + int real_batch_index = batch_index % real_batch_size; + + TfLiteTensor* tensor = nullptr; + if (backend_data->use_shard) { + tensor = TfLiteInterpreterGetInputTensor(backend_data->interpreter8[shard], i); + } else { + tensor = TfLiteInterpreterGetInputTensor(backend_data->interpreter[shard], i); + } + if (real_batch_index == 0 && backend_data->use_shard == false) { + if (backend_data->original_tensor_size == 0) { + backend_data->original_tensor_size = tensor->bytes; + memcpy((char*)tensor->data.raw, (char*)data, backend_data->original_tensor_size); + backend_data->has_temp_data = true; + } else { + tensor->data.raw = (char*)data; + } + } + + if (backend_data->use_shard) { + if (backend_data->acc_data[shard][i] == nullptr) { + backend_data->acc_data[shard][i] = malloc(real_batch_size * backend_data->original_tensor_size); + } + if (backend_data->has_temp_data) { + TfLiteTensor* tensor = TfLiteInterpreterGetInputTensor(backend_data->interpreter[shard], i); + memcpy( + (char*)backend_data->acc_data[shard][i], + (char*)tensor->data.raw, backend_data->original_tensor_size); + backend_data->has_temp_data = false; + } + memcpy( + ((char*)backend_data->acc_data[shard][i] + + ((batch_index % real_batch_size) * backend_data->original_tensor_size)), + data, backend_data->original_tensor_size); + if (real_batch_index == (real_batch_size - 1)) { + tensor->data.raw = (char*)backend_data->acc_data[shard][i]; + } + } + + // Allocate tensors. + if (((batch_index+1) % real_batch_size) == 0 && i == (backend_data->input_tensor_count - 1)) { + auto task = [](TFLiteBackendData* backend_data, int index) -> TfLiteStatus { + if (backend_data->use_shard) { + return TfLiteInterpreterInvoke(backend_data->interpreter8[index]); + } else { + return TfLiteInterpreterInvoke(backend_data->interpreter[index]); + } + }; + + // dispatch workers + if (backend_data->use_shard) { + backend_data->status[shard] = backend_data->executer->submit(task, backend_data, shard); + } + } + + return MLPERF_SUCCESS; +} + +// Return the number of outputs for the model. +int32_t mlperf_backend_get_output_count(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + if (backend_data->use_shard) { + return TfLiteInterpreterGetOutputTensorCount(backend_data->interpreter8[0]); + } else { + return TfLiteInterpreterGetOutputTensorCount(backend_data->interpreter[0]); + } +} + +// Return the type of ith output. +mlperf_data_t mlperf_backend_get_output_type(mlperf_backend_ptr_t backend_ptr, + int32_t i) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const TfLiteTensor* tensor; + if (backend_data->use_shard) { + tensor = TfLiteInterpreterGetOutputTensor(backend_data->interpreter8[0], i); + } else { + tensor = TfLiteInterpreterGetOutputTensor(backend_data->interpreter[0], i); + } + mlperf_data_t type; + type.type = TfType2Type(TfLiteTensorType(tensor)); + type.size = TFLiteNumElements(tensor); + return type; +} + +// Get the data from ith output. +mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, + uint32_t batch_index, int32_t i, + void** data) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const int real_batch_size = + (backend_data->use_shard) ? backend_data->batch_size / N_OFFLINE_INTEPRETERS : 1; + const int shard = batch_index / (real_batch_size); + + + if (backend_data->use_shard) { + if (backend_data->status[shard].valid()) { + if (backend_data->status[shard].get() != kTfLiteOk) { + printf("Failed to get output: %d", shard); + return MLPERF_FAILURE; + } + } + } + + const TfLiteTensor* output_tensor; + if (backend_data->use_shard) { + output_tensor = TfLiteInterpreterGetOutputTensor(backend_data->interpreter8[shard], i); + } else { + output_tensor = TfLiteInterpreterGetOutputTensor(backend_data->interpreter[shard], i); + } + batch_index %= (real_batch_size); + int non_batch_size = 1; + for (int i = 1; i < output_tensor->dims->size; i++) { + non_batch_size *= output_tensor->dims->data[i]; + } + switch (output_tensor->type) { + case kTfLiteFloat32: + *data = (output_tensor->data.f + (batch_index * non_batch_size)); + break; + case kTfLiteUInt8: + *data = (output_tensor->data.uint8 + (batch_index * non_batch_size)); + break; + case kTfLiteInt8: + *data = (output_tensor->data.int8 + (batch_index * non_batch_size)); + break; + case kTfLiteFloat16: + *data = (output_tensor->data.f16 + (batch_index * non_batch_size)); + break; + case kTfLiteInt32: + *data = (output_tensor->data.i32 + (batch_index * non_batch_size)); + break; + case kTfLiteInt64: + *data = (output_tensor->data.i64 + (batch_index * non_batch_size)); + break; + default: + printf("Data type not yet supported"); + return MLPERF_FAILURE; + } + return MLPERF_SUCCESS; +} diff --git a/mobile_back_pixel/cpp/backend_tflite/tflite_settings_pixel.h b/mobile_back_pixel/cpp/backend_tflite/tflite_settings_pixel.h new file mode 100644 index 000000000..297f39921 --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/tflite_settings_pixel.h @@ -0,0 +1,134 @@ +/* Copyright 2020-2021 The MLPerf Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#ifndef TFLITE_SETTINGS_H +#define TFLITE_SETTINGS_H + +const std::string tflite_settings = R"SETTINGS( +common_setting { + id: "num_threads" + name: "Number of threads" + value { + value: "2" + name: "2 threads" + } + acceptable_value { + value: "1" + name: "Single thread" + } + acceptable_value { + value: "2" + name: "2 threads" + } + acceptable_value { + value: "4" + name: "4 threads" + } + acceptable_value { + value: "8" + name: "8 threads" + } + acceptable_value { + value: "16" + name: "16 threads" + } +} + +common_setting { + id: "configuration" + name: "Configuration" + value { + value: "TFLite backend using NNAPI, GPU delegate or CPU delegate." + name: "Default" + } +} + +common_setting { + id: "share_results" + name: "Share results" + value { + value: "0" + name: "false" + } + acceptable_value { + value: "1" + name: "true" + } + acceptable_value { + value: "0" + name: "false" + } +} + +common_setting { + id: "cooldown" + name: "Cooldown" + value { + value: "0" + name: "false" + } + acceptable_value { + value: "1" + name: "true" + } + acceptable_value { + value: "0" + name: "false" + } +} + +benchmark_setting { + benchmark_id: "IC_tpu_uint8" + accelerator: "nnapi" + accelerator_desc: "NNAPI" + configuration: "TFLite" + src: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/tflite/mobilenet_edgetpu_224_1.0_uint8.tflite" +} + +benchmark_setting { + benchmark_id: "IC_tpu_uint8_offline" + accelerator: "nnapi" + accelerator_desc: "NNAPI" + configuration: "TFLite" + batch_size: 64 + src: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/tflite/mobilenet_edgetpu_224_1.0_uint8.tflite" +} + +benchmark_setting { + benchmark_id: "OD_uint8" + accelerator: "nnapi" + accelerator_desc: "NNAPI" + configuration: "TFLite" + src: "https://github.com/mlcommons/mobile_models/raw/main/v1_0/tflite/mobiledet_qat.tflite" +} + +benchmark_setting { + benchmark_id: "LU_float32" + accelerator: "nnapi" + accelerator_desc: "NNAPI" + configuration: "TFLite" + src: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/tflite/mobilebert_int8_384_nnapi.tflite" +} + +benchmark_setting { + benchmark_id: "IS_uint8" + accelerator: "nnapi" + accelerator_desc: "NNAPI" + configuration: "TFLite" + src: "https://github.com/mlcommons/mobile_models/raw/Google/v1_0/Google/deeplabv3.tflite" +})SETTINGS"; + +#endif diff --git a/mobile_back_pixel/cpp/backend_tflite/thread_pool.h b/mobile_back_pixel/cpp/backend_tflite/thread_pool.h new file mode 100644 index 000000000..dd1693693 --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/thread_pool.h @@ -0,0 +1,110 @@ +/* Copyright 2021 The MLPerf Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +/* Copyright (c) 2012 Jakob Progsch, Václav Zeman + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +==============================================================================*/ +// reference: +// https://github.com/progschj/ThreadPool + +#ifndef THREAD_POOL_H_ +#define THREAD_POOL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +class Threadpool { + private: + std::vector pool; + std::queue> tasks; + // synchronization variable + std::mutex m_lock; + std::condition_variable cond_var; + std::atomic available; + + public: + inline Threadpool(size_t thread_num) : available(true) { + for (size_t i = 0; i < thread_num; i++) { + pool.emplace_back([this] { + while (available.load()) { + std::function picked_task; + // consumer in critical section + { + std::unique_lock lock(this->m_lock); + auto wait_until = [this]() -> bool { + return !this->available.load() || !this->tasks.empty(); + }; + this->cond_var.wait(lock, wait_until); + if (!this->available && this->tasks.empty()) return; + picked_task = std::move(this->tasks.front()); + this->tasks.pop(); + } + // invoke picked task + picked_task(); + } + }); + } + } + ~Threadpool() { + available.store(false); + cond_var.notify_all(); + for (auto& worker : pool) { + if (worker.joinable()) worker.join(); + } + } + + template + auto submit(Fn&& fn, Args&&... args) -> std::future { + if (!available.load()) { + std::cerr << "don't accept commit after stop." << std::endl; + assert(false); + } + using FnReturnType = decltype(fn(args...)); + auto enq_task = std::make_shared>( + std::bind(std::forward(fn), std::forward(args)...)); + std::future ret = enq_task->get_future(); + // producer in critical section + { + std::lock_guard lock(m_lock); + tasks.emplace([enq_task]() { (*enq_task)(); }); + } + cond_var.notify_one(); + return ret; + } +}; +#endif // THREAD_POOL_H_ diff --git a/mobile_back_pixel/pixel_backend.mk b/mobile_back_pixel/pixel_backend.mk new file mode 100644 index 000000000..0cfeeb75b --- /dev/null +++ b/mobile_back_pixel/pixel_backend.mk @@ -0,0 +1,21 @@ +# Copyright 2021 The MLPerf Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +# Set the included backends +WITH_PIXEL=0 + +ifeq (${WITH_PIXEL},1) + PIXEL_BACKEND=--//android/java/org/mlperf/inference:with_pixel="1" +endif diff --git a/mobile_back_pixel/third_party/BUILD.bazel b/mobile_back_pixel/third_party/BUILD.bazel new file mode 100644 index 000000000..ab723dce9 --- /dev/null +++ b/mobile_back_pixel/third_party/BUILD.bazel @@ -0,0 +1,19 @@ +# Copyright 2020 The MLPerf Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + diff --git a/mobile_back_pixel/third_party/tf_gpu_delegate_fix_from_tf_master.diff b/mobile_back_pixel/third_party/tf_gpu_delegate_fix_from_tf_master.diff new file mode 100644 index 000000000..05a6002d1 --- /dev/null +++ b/mobile_back_pixel/third_party/tf_gpu_delegate_fix_from_tf_master.diff @@ -0,0 +1,37 @@ +diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD +index b37629a97aa..4d5e9682789 100644 +--- a/tensorflow/lite/delegates/gpu/cl/BUILD ++++ b/tensorflow/lite/delegates/gpu/cl/BUILD +@@ -3,6 +3,7 @@ load( + "//tensorflow/core/platform:build_config_root.bzl", + "tf_gpu_tests_tags", + ) ++load("//tensorflow:tensorflow.bzl", "workspace_root") + + package( + default_visibility = ["//visibility:public"], +@@ -490,7 +491,7 @@ flatbuffer_cc_library( + srcs = ["serialization.fbs"], + flatc_args = [ + "--scoped-enums", +- "-I ./", ++ "-I ./" + workspace_root, + ], + includes = [ + "//tensorflow/lite/delegates/gpu/common/task:serialization_base_cc_fbs_includes", +diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl +index a9da708bb53..d130169d7ad 100644 +--- a/tensorflow/tensorflow.bzl ++++ b/tensorflow/tensorflow.bzl +@@ -52,6 +52,11 @@ VERSION = "2.5.0" + VERSION_MAJOR = VERSION.split(".")[0] + two_gpu_tags = ["requires-gpu-nvidia:2", "notap", "manual", "no_pip"] + ++# The workspace root, to be used to set workspace 'include' paths in a way that ++# will still work correctly when TensorFlow is included as a dependency of an ++# external project. ++workspace_root = Label("//:WORKSPACE").workspace_root or "." ++ + def clean_dep(target): + """Returns string to 'target' in @org_tensorflow repository. + diff --git a/mobile_back_pixel/third_party/tf_grappler_cost.diff b/mobile_back_pixel/third_party/tf_grappler_cost.diff new file mode 100644 index 000000000..58391ea3e --- /dev/null +++ b/mobile_back_pixel/third_party/tf_grappler_cost.diff @@ -0,0 +1,19 @@ +diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD +index b205f5c3e5..b64f4c7dc9 100644 +--- a/tensorflow/core/grappler/costs/BUILD ++++ b/tensorflow/core/grappler/costs/BUILD +@@ -31,7 +31,13 @@ tf_proto_library( + srcs = ["op_performance_data.proto"], + cc_api_version = 2, + make_default_target_header_only = True, +- protodeps = tf_additional_all_protos(), ++ protodeps = [ ++ "//tensorflow/core/framework:attr_value_proto", ++ "//tensorflow/core/framework:resource_handle_proto", ++ "//tensorflow/core/framework:tensor_proto", ++ "//tensorflow/core/framework:tensor_shape_proto", ++ "//tensorflow/core/protobuf:for_core_protos", ++ ], + visibility = ["//visibility:public"], + ) + diff --git a/mobile_back_tflite/cpp/backend_tflite/tflite_settings.h b/mobile_back_tflite/cpp/backend_tflite/tflite_settings.h index 3e7586866..da812a93d 100644 --- a/mobile_back_tflite/cpp/backend_tflite/tflite_settings.h +++ b/mobile_back_tflite/cpp/backend_tflite/tflite_settings.h @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include - #ifndef TFLITE_SETTINGS_H #define TFLITE_SETTINGS_H +#include + const std::string tflite_settings = R"SETTINGS( common_setting { id: "num_threads"