-
-
Notifications
You must be signed in to change notification settings - Fork 127
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
liblzma: Speed up CRC32 calculation on 64-bit LoongArch
The crc.w.{b/h/w/d}.w instructions in LoongArch can calculate the CRC32 result for 1/2/4/8 bytes in a single operation. Using these is much faster compared to the generic method. Optimized CRC32 is enabled unconditionally on 64-bit LoongArch because the LoongArch specification says that CRC32 instructions shall be implemented for 64-bit processors. Optimized CRC32 isn't enabled for 32-bit LoongArch processors because not enough information is available about them. Co-authored-by: Lasse Collin <[email protected]> Closes: #86
- Loading branch information
Showing
6 changed files
with
149 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// SPDX-License-Identifier: 0BSD | ||
|
||
/////////////////////////////////////////////////////////////////////////////// | ||
// | ||
/// \file crc32_loongarch.h | ||
/// \brief CRC32 calculation with LoongArch optimization | ||
// | ||
// Authors: Xi Ruoyao | ||
// Lasse Collin | ||
// | ||
/////////////////////////////////////////////////////////////////////////////// | ||
|
||
#ifndef LZMA_CRC32_LOONGARCH_H | ||
#define LZMA_CRC32_LOONGARCH_H | ||
|
||
#include <larchintrin.h> | ||
|
||
|
||
static uint32_t | ||
crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc_unsigned) | ||
{ | ||
int32_t crc = (int32_t)~crc_unsigned; | ||
|
||
if (size >= 8) { | ||
const size_t align = (0 - (uintptr_t)buf) & 7; | ||
|
||
if (align & 1) | ||
crc = __crc_w_b_w((int8_t)*buf++, crc); | ||
|
||
if (align & 2) { | ||
crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc); | ||
buf += 2; | ||
} | ||
|
||
if (align & 4) { | ||
crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc); | ||
buf += 4; | ||
} | ||
|
||
size -= align; | ||
|
||
for (const uint8_t *limit = buf + (size & ~(size_t)7); | ||
buf < limit; buf += 8) | ||
crc = __crc_w_d_w((int64_t)aligned_read64le(buf), crc); | ||
|
||
size &= 7; | ||
} | ||
|
||
if (size & 4) { | ||
crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc); | ||
buf += 4; | ||
} | ||
|
||
if (size & 2) { | ||
crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc); | ||
buf += 2; | ||
} | ||
|
||
if (size & 1) | ||
crc = __crc_w_b_w((int8_t)*buf, crc); | ||
|
||
return (uint32_t)~crc; | ||
} | ||
|
||
#endif // LZMA_CRC32_LOONGARCH_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters