-
Notifications
You must be signed in to change notification settings - Fork 706
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use updated patch from #17101
- Loading branch information
Showing
3 changed files
with
32 additions
and
27 deletions.
There are no files selected for viewing
55 changes: 30 additions & 25 deletions
55
easybuild/easyconfigs/t/TensorFlow/TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,42 @@ | ||
The comment is not true, the function actually takes the arguments as it should | ||
Hence just redefine the function | ||
Fix compile error on ARM: | ||
> ./tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h:132:58: error: cannot convert 'int32x2_t' to 'int8x8_t' | ||
|
||
Author: Alexander Grund (TU Dresden) | ||
From https://github.com/tensorflow/tensorflow/pull/53782 | ||
|
||
From 4463f25d1622d162f870ff685da20f2c6df5bc6a Mon Sep 17 00:00:00 2001 | ||
From: Stephan Hartmann <[email protected]> | ||
Date: Sat, 15 Jan 2022 21:06:27 +0100 | ||
Subject: [PATCH] Fix casting in vdotq_four_lane_s32() in TFLite | ||
|
||
When building with GCC and dotprod ARM extension enabled, | ||
vreinterpret_s32_s8() casts int8x8_t to int32x2_t. However, third | ||
argument of vdotq_lane_s32() expects parameter of type int8x8_t. | ||
--- | ||
.../optimized/depthwiseconv_3x3_filter_common.h | 10 ++++------ | ||
1 file changed, 4 insertions(+), 6 deletions(-) | ||
|
||
diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h | ||
index 916edd561ff..9c8025dac49 100644 | ||
index 916edd561ff32..c519a81bc864d 100644 | ||
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h | ||
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h | ||
@@ -122,26 +122,7 @@ inline int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) { | ||
#endif // !__aarch64__ | ||
|
||
#ifdef __ARM_FEATURE_DOTPROD | ||
-// The vdotq_lane_s32 takes int8x8t for the rhs parameter, whereas the actual | ||
-// instruction selects from between 4 32-bit (4x8-bit packed) sub-registers, an | ||
-// unusual interpretation of "lane". | ||
-inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, | ||
- int8x16_t rhs, const int lane) { | ||
- switch (lane) { | ||
- case 0: | ||
@@ -129,16 +129,14 @@ inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, | ||
int8x16_t rhs, const int lane) { | ||
switch (lane) { | ||
case 0: | ||
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0); | ||
- case 1: | ||
+ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 0); | ||
case 1: | ||
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1); | ||
- case 2: | ||
+ return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 1); | ||
case 2: | ||
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), | ||
- 0); | ||
- case 3: | ||
- default: | ||
+ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 0); | ||
case 3: | ||
default: | ||
- return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), | ||
- 1); | ||
- } | ||
-} | ||
- | ||
+#define vdotq_four_lane_s32 vdotq_lane_s32 | ||
#else | ||
+ return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 1); | ||
} | ||
} | ||
|
||
inline int32x4_t vdotq_s32(int32x4_t acc, int8x16_t lhs, int8x16_t rhs) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters