-
Notifications
You must be signed in to change notification settings - Fork 317
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
zstd: Add amd64 match length assembly (#824)
* zstd: Add amd64 match length assembly Copied from the S2 implementation. 5-10% faster.
- Loading branch information
Showing
5 changed files
with
118 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
//go:build amd64 && !appengine && !noasm && gc | ||
// +build amd64,!appengine,!noasm,gc | ||
|
||
// Copyright 2019+ Klaus Post. All rights reserved. | ||
// License information can be found in the LICENSE file. | ||
|
||
package zstd | ||
|
||
// matchLen returns how many bytes match in a and b | ||
// | ||
// It assumes that: | ||
// | ||
// len(a) <= len(b) and len(a) > 0 | ||
// | ||
//go:noescape | ||
func matchLen(a []byte, b []byte) int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
// Copied from S2 implementation. | ||
|
||
//go:build !appengine && !noasm && gc && !noasm | ||
|
||
#include "textflag.h" | ||
|
||
// func matchLen(a []byte, b []byte) int | ||
// Requires: BMI | ||
TEXT ·matchLen(SB), NOSPLIT, $0-56 | ||
MOVQ a_base+0(FP), AX | ||
MOVQ b_base+24(FP), CX | ||
MOVQ a_len+8(FP), DX | ||
|
||
// matchLen | ||
XORL SI, SI | ||
CMPL DX, $0x08 | ||
JB matchlen_match4_standalone | ||
|
||
matchlen_loopback_standalone: | ||
MOVQ (AX)(SI*1), BX | ||
XORQ (CX)(SI*1), BX | ||
TESTQ BX, BX | ||
JZ matchlen_loop_standalone | ||
|
||
#ifdef GOAMD64_v3 | ||
TZCNTQ BX, BX | ||
#else | ||
BSFQ BX, BX | ||
#endif | ||
SARQ $0x03, BX | ||
LEAL (SI)(BX*1), SI | ||
JMP gen_match_len_end | ||
|
||
matchlen_loop_standalone: | ||
LEAL -8(DX), DX | ||
LEAL 8(SI), SI | ||
CMPL DX, $0x08 | ||
JAE matchlen_loopback_standalone | ||
|
||
matchlen_match4_standalone: | ||
CMPL DX, $0x04 | ||
JB matchlen_match2_standalone | ||
MOVL (AX)(SI*1), BX | ||
CMPL (CX)(SI*1), BX | ||
JNE matchlen_match2_standalone | ||
LEAL -4(DX), DX | ||
LEAL 4(SI), SI | ||
|
||
matchlen_match2_standalone: | ||
CMPL DX, $0x02 | ||
JB matchlen_match1_standalone | ||
MOVW (AX)(SI*1), BX | ||
CMPW (CX)(SI*1), BX | ||
JNE matchlen_match1_standalone | ||
LEAL -2(DX), DX | ||
LEAL 2(SI), SI | ||
|
||
matchlen_match1_standalone: | ||
CMPL DX, $0x01 | ||
JB gen_match_len_end | ||
MOVB (AX)(SI*1), BL | ||
CMPB (CX)(SI*1), BL | ||
JNE gen_match_len_end | ||
INCL SI | ||
|
||
gen_match_len_end: | ||
MOVQ SI, ret+48(FP) | ||
RET |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//go:build !amd64 || appengine || !gc || noasm | ||
// +build !amd64 appengine !gc noasm | ||
|
||
// Copyright 2019+ Klaus Post. All rights reserved. | ||
// License information can be found in the LICENSE file. | ||
|
||
package zstd | ||
|
||
import ( | ||
"encoding/binary" | ||
"math/bits" | ||
) | ||
|
||
// matchLen returns the maximum common prefix length of a and b. | ||
// a must be the shortest of the two. | ||
func matchLen(a, b []byte) (n int) { | ||
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { | ||
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) | ||
if diff != 0 { | ||
return n + bits.TrailingZeros64(diff)>>3 | ||
} | ||
n += 8 | ||
} | ||
|
||
for i := range a { | ||
if a[i] != b[i] { | ||
break | ||
} | ||
n++ | ||
} | ||
return n | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters