From 86a684de7d44ba18694892afce3a74622a76a3a7 Mon Sep 17 00:00:00 2001 From: bab2min Date: Sat, 1 Jun 2024 00:57:11 +0900 Subject: [PATCH] Update BitUtils.h and search.cpp for Windows ARM64 support (#168) --- include/kiwi/BitUtils.h | 10 +++++++++- src/search.cpp | 30 ++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/include/kiwi/BitUtils.h b/include/kiwi/BitUtils.h index a6361460..26a76a91 100644 --- a/include/kiwi/BitUtils.h +++ b/include/kiwi/BitUtils.h @@ -118,7 +118,11 @@ namespace kiwi { #if defined(__GNUC__) return __builtin_popcount(v); -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) && defined(KIWI_ARCH_ARM64) + uint64x1_t w = { v }; + w = neon_cnt(w); + return w.n64_u64[0]; +#elif defined(_MSV_VER) return __popcnt(v); #else throw ""; @@ -129,6 +133,10 @@ namespace kiwi { #if defined(__GNUC__) return __builtin_popcountll(v); +#elif defined(_MSC_VER) && defined(KIWI_ARCH_ARM64) + uint64x1_t w = { v }; + w = neon_cnt(w); + return w.n64_u64[0]; #elif defined(_MSC_VER) && defined(_M_X64) return __popcnt64(v); #else diff --git a/src/search.cpp b/src/search.cpp index 6f14f0ef..987616ed 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -893,6 +893,28 @@ namespace kiwi #endif #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || KIWI_ARCH_ARM64 + +#if defined(_MSC_VER) +#define ALIGNED_(x) __declspec(align(x)) +#define _MERGE_U8x8(a, b, c, d, e, f, g, h) (uint64_t(a) | (uint64_t(b) << 8) | (uint64_t(c) << 16) | (uint64_t(d) << 24) | (uint64_t(e) << 32) | (uint64_t(f) << 40) | (uint64_t(g) << 48) | (uint64_t(h) << 56)) +#define _MERGE_U16x4(a, b, c, d) (uint64_t(a) | (uint64_t(b) << 16) | (uint64_t(c) << 32) | (uint64_t(d) << 48)) +#define _MERGE_U32x2(a, b) (uint64_t(a) | (uint64_t(b) << 32)) + +#define INIT_U8x16(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) (_MERGE_U8x8(a, b, c, d, e, f, g, h)), (_MERGE_U8x8(i, j, k, l, m, n, o, p)) +#define INIT_U16x8(a, b, c, d, e, f, g, h) (_MERGE_U16x4(a, b, c, d), _MERGE_U16x4(e, f, g, h)) +#define INIT_U32x4(a, b, c, d) (_MERGE_U32x2(a, b), _MERGE_U32x2(c, d)) + +#else +#if defined(__GNUC__) +#define ALIGNED_(x) __attribute__ ((aligned(x))) + +#define INIT_U8x16(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p +#define INIT_U16x8(a, b, c, d, e, f, g, h) a, b, c, d, e, f, g, h +#define INIT_U32x4(a, b, c, d) a, b, c, d + +#endif +#endif + namespace kiwi { namespace nst @@ -905,7 +927,7 @@ namespace kiwi int8x16_t ptarget = vdupq_n_s8(target), pkey; uint8x16_t peq, pgt, pmasked; - static const uint8x16_t __attribute__((aligned(16))) mask = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; + static const uint8x16_t ALIGNED_(16) mask = { INIT_U8x16(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128) }; while (i < size) { @@ -938,7 +960,7 @@ namespace kiwi int16x8_t ptarget = vdupq_n_s16(target), pkey; uint16x8_t peq, pgt; - static const uint16x8_t __attribute__((aligned(16))) mask = { 1, 2, 4, 8, 16, 32, 64, 128 }; + static const uint16x8_t ALIGNED_(16) mask = { INIT_U16x8(1, 2, 4, 8, 16, 32, 64, 128) }; while (i < size) { @@ -968,7 +990,7 @@ namespace kiwi int32x4_t ptarget = vdupq_n_s32(target), pkey; uint32x4_t peq, pgt; - static const uint32x4_t __attribute__((aligned(16))) mask = { 1, 2, 4, 8 }; + static const uint32x4_t ALIGNED_(16) mask = { INIT_U32x4(1, 2, 4, 8) }; while (i < size) { @@ -998,7 +1020,7 @@ namespace kiwi int64x2_t ptarget = vdupq_n_s64(target), pkey; uint64x2_t peq, pgt; - static const uint64x2_t __attribute__((aligned(16))) mask = { 1, 2 }; + static const uint64x2_t ALIGNED_(16) mask = { 1, 2 }; while (i < size) {