From b0035538af1209abbe405bf2ab45921169620513 Mon Sep 17 00:00:00 2001 From: mayeut Date: Mon, 27 Jul 2015 20:12:48 +0200 Subject: [PATCH] Use __emul under msvc x86 for fast 64 = 32 * 32 Update uclouvain/openjpeg#220 --- src/lib/openjp2/opj_includes.h | 8 ++++++-- src/lib/openjp2/opj_intmath.h | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index bd9ae09a0..22b2f62a7 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -162,12 +162,16 @@ static INLINE long opj_lrintf(float f) { } #endif - - #if defined(_MSC_VER) && (_MSC_VER < 1400) #define vsnprintf _vsnprintf #endif +/* MSVC x86 is really bad at doing int64 = int32 * int32 on its own. Use intrinsic. */ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86) +# include +# pragma intrinsic(__emul) +#endif + #include "opj_inttypes.h" #include "opj_clock.h" #include "opj_malloc.h" diff --git a/src/lib/openjp2/opj_intmath.h b/src/lib/openjp2/opj_intmath.h index 188a09a78..f39f68431 100644 --- a/src/lib/openjp2/opj_intmath.h +++ b/src/lib/openjp2/opj_intmath.h @@ -184,7 +184,11 @@ Multiply two fixed-precision rational numbers. @return Returns a * b */ static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) { +#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86) + OPJ_INT64 temp = __emul(a, b); +#else OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ; +#endif temp += 4096; assert((temp >> 13) <= (OPJ_INT64)0x7FFFFFFF); assert((temp >> 13) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1)); @@ -192,7 +196,11 @@ static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) { } static INLINE OPJ_INT32 opj_int_fix_mul_t1(OPJ_INT32 a, OPJ_INT32 b) { +#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86) + OPJ_INT64 temp = __emul(a, b); +#else OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ; +#endif temp += 4096; assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) <= (OPJ_INT64)0x7FFFFFFF); assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));