diff --git a/Makefile.am b/Makefile.am index 838407aa78..85ff31a081 100644 --- a/Makefile.am +++ b/Makefile.am @@ -199,6 +199,15 @@ libtesseract_la_LIBADD += libtesseract_neon.la noinst_LTLIBRARIES += libtesseract_neon.la endif +if HAVE_RVV +libtesseract_rvv_la_CXXFLAGS = $(RVV_CXXFLAGS) +libtesseract_rvv_la_CXXFLAGS += -O3 +libtesseract_rvv_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil +libtesseract_rvv_la_SOURCES = src/arch/intsimdmatrixrvv.cpp +libtesseract_la_LIBADD += libtesseract_rvv.la +noinst_LTLIBRARIES += libtesseract_rvv.la +endif + libtesseract_la_SOURCES += src/arch/intsimdmatrix.cpp libtesseract_la_SOURCES += src/arch/simddetect.cpp diff --git a/configure.ac b/configure.ac index 139e2352da..0dc0db00af 100644 --- a/configure.ac +++ b/configure.ac @@ -131,6 +131,7 @@ AM_CONDITIONAL([HAVE_AVX512F], false) AM_CONDITIONAL([HAVE_FMA], false) AM_CONDITIONAL([HAVE_SSE4_1], false) AM_CONDITIONAL([HAVE_NEON], false) +AM_CONDITIONAL([HAVE_RVV], false) case "${host_cpu}" in @@ -188,6 +189,16 @@ case "${host_cpu}" in ;; + riscv*) + + AX_CHECK_COMPILE_FLAG([-march=rv64gcv], [rvv=true], [rvv=false], [$WERROR]) + AM_CONDITIONAL([HAVE_RVV], [$rvv]) + if $rvv; then + AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) + check_for_rvv=1 + fi + ;; + *) AC_MSG_WARN([No compiler options for $host_cpu]) @@ -207,6 +218,16 @@ if test x$check_for_neon = x1; then fi fi +# additional checks for RVV targets +if test x$check_for_rvv = x1; then + AC_MSG_NOTICE([checking how to detect RVV availability]) + AC_CHECK_FUNCS([getauxval]) + + if test $ac_cv_func_getauxval = no; then + AC_MSG_WARN([RVV is available, but we don't know how to check for it. Will not be able to use RVV.]) + fi +fi + AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR]) AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd) diff --git a/src/arch/intsimdmatrix.h b/src/arch/intsimdmatrix.h index d93f928dbc..af88ab49c7 100644 --- a/src/arch/intsimdmatrix.h +++ b/src/arch/intsimdmatrix.h @@ -115,6 +115,8 @@ struct TESS_API IntSimdMatrix { static const IntSimdMatrix *intSimdMatrix; // Only available with NEON. static const IntSimdMatrix intSimdMatrixNEON; + // Only available with RVV. + static const IntSimdMatrix intSimdMatrixRVV; // Only available with AVX2 / AVX / FMA / SSE. static const IntSimdMatrix intSimdMatrixAVX2; static const IntSimdMatrix intSimdMatrixSSE; diff --git a/src/arch/intsimdmatrixrvv.cpp b/src/arch/intsimdmatrixrvv.cpp new file mode 100644 index 0000000000..cd0ee68098 --- /dev/null +++ b/src/arch/intsimdmatrixrvv.cpp @@ -0,0 +1,88 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrixrvv.cpp +// Description: matrix-vector product for 8-bit data on rvv. +// Author: sunyuechi +// +// Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS). +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include "config_auto.h" // for HAVE_RVV, ... +#endif + +#if HAVE_RVV +# include "intsimdmatrix.h" +# include "tesstypes.h" + +namespace tesseract { + +static int DotProduct(const int8_t *u, const int8_t *v, int num) { + int total = 0; + + asm __volatile__ ( + " .option arch, +v \n\t" + " vsetvli t0,zero,e32,m8,ta,ma \n\t" + " vmv.v.i v0,0 \n\t" + "1: \n\t" + " vsetvli t0,%[num],e8,m2,ta,ma \n\t" + " vle8.v v16,0(%[u]) \n\t" + " vle8.v v24,0(%[v]) \n\t" + " sub %[num],%[num],t0 \n\t" + " vwmul.vv v8,v24,v16 \n\t" + " add %[u],%[u],t0 \n\t" + " add %[v],%[v],t0 \n\t" + " vsetvli zero,zero,e16,m4,tu,ma \n\t" + " vwadd.wv v0,v0,v8 \n\t" + " bnez %[num],1b \n\t" + " vsetvli t0,zero,e32,m8,ta,ma \n\t" + " vmv.s.x v8,zero \n\t" + " vredsum.vs v0,v0,v8 \n\t" + " vmv.x.s %[total],v0 \n\t" + : [u] "+r" (u), + [v] "+r" (v), + [num] "+r" (num), + [total] "+r" (total) + : + : "cc", "memory" + ); + + return total; +} + +static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales, + const int8_t *u, TFloat *v) { + int num_out = dim1; + int num_in = dim2 - 1; + for (int i = 0; i < num_out; ++i) { + const int8_t *wi_start = wi + i * dim2; + int total = DotProduct(wi_start, u, num_in); + // Add in the bias and apply scaling. + v[i] = (total + wi_start[num_in] * INT8_MAX) * scales[i]; + } +} + +const IntSimdMatrix IntSimdMatrix::intSimdMatrixRVV = { + // Function. + matrixDotVector, + // Number of 32 bit outputs held in each register. + 1, + // Maximum number of registers that we will use to hold outputs. + 1, + // Number of 8 bit inputs in the inputs register. + 1, + // Number of inputs in each weight group. + 1 +}; + +} // namespace tesseract. + +#endif /* HAVE_RVV */ diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp index 1afe5a5d81..9acd78a886 100644 --- a/src/arch/simddetect.cpp +++ b/src/arch/simddetect.cpp @@ -65,6 +65,13 @@ # endif #endif +#if defined(HAVE_RVV) +# if defined(HAVE_GETAUXVAL) +# include +# define HWCAP_RV(letter) (1ul << ((letter) - 'A')) +# endif +#endif + namespace tesseract { // Computes and returns the dot product of the two n-vectors u and v. @@ -89,6 +96,8 @@ bool SIMDDetect::neon_available_ = true; #elif defined(HAVE_NEON) // If true, then Neon has been detected. bool SIMDDetect::neon_available_; +#elif defined(HAVE_RVV) +bool SIMDDetect::rvv_available_; #else // If true, then AVX has been detected. bool SIMDDetect::avx_available_; @@ -229,6 +238,13 @@ SIMDDetect::SIMDDetect() { elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); neon_available_ = hwcap & HWCAP_NEON; # endif +#endif + +#if defined(HAVE_RVV) +# if defined(HAVE_GETAUXVAL) + const unsigned long hwcap = getauxval(AT_HWCAP); + rvv_available_ = hwcap & HWCAP_RV('V'); +# endif #endif // Select code for calculation of dot product based on autodetection. @@ -258,6 +274,10 @@ SIMDDetect::SIMDDetect() { } else if (neon_available_) { // NEON detected. SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON); +#endif +#if defined(HAVE_RVV) + } else if (rvv_available_) { + SetDotProduct(DotProductGeneric, &IntSimdMatrix::intSimdMatrixRVV); #endif } diff --git a/src/arch/simddetect.h b/src/arch/simddetect.h index fcb0f53eca..5d4eb33880 100644 --- a/src/arch/simddetect.h +++ b/src/arch/simddetect.h @@ -63,6 +63,10 @@ class SIMDDetect { static inline bool IsNEONAvailable() { return detector.neon_available_; } + // Returns true if RVV is available on this system. + static inline bool IsRVVAvailable() { + return detector.rvv_available_; + } // Update settings after config variable was set. static TESS_API void Update(); @@ -86,6 +90,8 @@ class SIMDDetect { static TESS_API bool sse_available_; // If true, then NEON has been detected. static TESS_API bool neon_available_; + // If true, then RVV has been detected. + static TESS_API bool rvv_available_; }; } // namespace tesseract diff --git a/src/tesseract.cpp b/src/tesseract.cpp index 3ed330d8ca..4558ca388a 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -112,6 +112,9 @@ static void PrintVersionInfo() { #if defined(HAVE_NEON) || defined(__aarch64__) if (tesseract::SIMDDetect::IsNEONAvailable()) printf(" Found NEON\n"); +#elif defined(HAVE_RVV) + if (tesseract::SIMDDetect::IsRVVAvailable()) + printf(" Found RVV\n"); #else if (tesseract::SIMDDetect::IsAVX512BWAvailable()) { printf(" Found AVX512BW\n");