Skip to content

Commit

Permalink
arm64 crc prefetch optimise (facebook#5773)
Browse files Browse the repository at this point in the history
Summary:
prefetch data for following block,avoid cache miss when doing crc caculate

I do performance test at kunpeng-920 server(arm-v8, [email protected])
./db_bench --benchmarks=crc32c --block_size=500000000
before optimise : 587313.500 micros/op 1 ops/sec;  811.9 MB/s (500000000 per op)
after optimise  : 289248.500 micros/op 3 ops/sec; 1648.5 MB/s (500000000 per op)
Pull Request resolved: facebook#5773

Differential Revision: D17347339

fbshipit-source-id: bfcd74f0f0eb4b322b959be68019ddcaae1e3341
  • Loading branch information
HouBingjian authored and Yi Wu committed Mar 16, 2021
1 parent bb23947 commit 081c276
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
5 changes: 4 additions & 1 deletion util/crc32c_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data,
*/
uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;

/* First 8 bytei for better pipelining */
/* Prefetch data for following block to avoid cache miss */
PREF1KL1((uint8_t *)buf64, 1024);

/* First 8 byte for better pipelining */
crc0 = crc32c_u64(crc, *buf64++);

/* 3 blocks crc32c parallel computation
Expand Down
11 changes: 11 additions & 0 deletions util/crc32c_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,17 @@
#define crc32c_u16(crc, v) __crc32ch(crc, v)
#define crc32c_u32(crc, v) __crc32cw(crc, v)
#define crc32c_u64(crc, v) __crc32cd(crc, v)
#define PREF4X64L1(buffer,PREF_OFFSET, ITR) \
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));

#define PREF1KL1(buffer,PREF_OFFSET) \
PREF4X64L1(buffer,(PREF_OFFSET), 0) \
PREF4X64L1(buffer,(PREF_OFFSET), 4) \
PREF4X64L1(buffer,(PREF_OFFSET), 8) \
PREF4X64L1(buffer,(PREF_OFFSET), 12)

extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data, size_t len);
extern uint32_t crc32c_runtime_check(void);
Expand Down

0 comments on commit 081c276

Please sign in to comment.