diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch index 5d80e47df8f..eaed0decf23 100644 --- a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch +++ b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch @@ -1,37 +1,42 @@ -The comment is not true, the function actually takes the arguments as it should -Hence just redefine the function +Fix compile error on ARM: +> ./tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h:132:58: error: cannot convert 'int32x2_t' to 'int8x8_t' -Author: Alexander Grund (TU Dresden) +From https://github.com/tensorflow/tensorflow/pull/53782 + +From 4463f25d1622d162f870ff685da20f2c6df5bc6a Mon Sep 17 00:00:00 2001 +From: Stephan Hartmann +Date: Sat, 15 Jan 2022 21:06:27 +0100 +Subject: [PATCH] Fix casting in vdotq_four_lane_s32() in TFLite + +When building with GCC and dotprod ARM extension enabled, +vreinterpret_s32_s8() casts int8x8_t to int32x2_t. However, third +argument of vdotq_lane_s32() expects parameter of type int8x8_t. +--- + .../optimized/depthwiseconv_3x3_filter_common.h | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h -index 916edd561ff..9c8025dac49 100644 +index 916edd561ff32..c519a81bc864d 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h -@@ -122,26 +122,7 @@ inline int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) { - #endif // !__aarch64__ - - #ifdef __ARM_FEATURE_DOTPROD --// The vdotq_lane_s32 takes int8x8t for the rhs parameter, whereas the actual --// instruction selects from between 4 32-bit (4x8-bit packed) sub-registers, an --// unusual interpretation of "lane". --inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, -- int8x16_t rhs, const int lane) { -- switch (lane) { -- case 0: +@@ -129,16 +129,14 @@ inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, + int8x16_t rhs, const int lane) { + switch (lane) { + case 0: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0); -- case 1: ++ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 0); + case 1: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1); -- case 2: ++ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 1); + case 2: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), - 0); -- case 3: -- default: ++ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 0); + case 3: + default: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), - 1); -- } --} -- -+#define vdotq_four_lane_s32 vdotq_lane_s32 - #else ++ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 1); + } + } - inline int32x4_t vdotq_s32(int32x4_t acc, int8x16_t lhs, int8x16_t rhs) { diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a-CUDA-11.7.0.eb b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a-CUDA-11.7.0.eb index 8b7d6051102..7d8fc26cca1 100644 --- a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a-CUDA-11.7.0.eb +++ b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a-CUDA-11.7.0.eb @@ -193,7 +193,7 @@ exts_list = [ {'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch': '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'}, {'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch': - '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'}, + '5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'}, {'TensorFlow-2.5.0_fix-crash-on-shutdown.patch': '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'}, {'TensorFlow-2.7.1_fix_cpu_count.patch': diff --git a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a.eb b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a.eb index 361164c8f26..434de47a58b 100644 --- a/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a.eb +++ b/easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.9.1-foss-2022a.eb @@ -189,7 +189,7 @@ exts_list = [ {'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch': '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75'}, {'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch': - '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8'}, + '5edea55ce87d5adb14f6ed6996f308879e268b8cec760cf11288e3a56179a029'}, {'TensorFlow-2.5.0_fix-crash-on-shutdown.patch': '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd'}, {'TensorFlow-2.7.1_fix_cpu_count.patch':