This repository has been archived by the owner on Jul 27, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathfind_match_length.h
108 lines (94 loc) · 3.57 KB
/
find_match_length.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Modified from flate/snappy code from jyrki/sesse. The key
// difference between this code and that is the support for
// compare over two disjoint blocks.
#ifndef UTIL_COMPRESSION_GIPFELI_INTERNAL_FIND_MATCH_LENGTH_H_
#define UTIL_COMPRESSION_GIPFELI_INTERNAL_FIND_MATCH_LENGTH_H_
#include "stubs-internal.h"
namespace util {
namespace compression {
// Return the largest match of n bytes starting at s1 and s2.
//
// Assumptions
//
// a) s1 and s2 are in two separate blocks next to each other.
// i.e. once you hit the end of s1, we continue matching from
// start of s2.
//
// b) For peformance, we copy the first 8 bytes of s2 also at the end of
// s1. This allows us to just do UNALIGNED_LOAD64 at the end of the
// block.
//
// Separate implementation for x86_64, for speed.
//
#if defined(__GNUC__) && defined(ARCH_K8)
static inline int MultiBlockFindMatchLength(const uint8* s1,
const uint8* s1_limit,
const uint8* s2_base,
const uint8* s2,
const uint8* s2_limit) {
const uint8* s2_start = s2;
// Find out how long the match is. We loop over the data 64 bits at a
// time until we find a 64-bit block that doesn't match; then we find
// the first non-matching bit and use that to calculate the total
// length of the match.
while (PREDICT_TRUE(s2 <= s2_limit - 8 && s1 <= s1_limit - 8)) {
if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1))) {
s2 += 8;
s1 += 8;
} else {
int matched = s2 - s2_start;
// On current (mid-2008) Opteron models there is a 3% more
// efficient code sequence to find the first non-matching byte.
// However, what follows is ~10% better on Intel Core 2 and newer,
// and we expect AMD's bsf instruction to improve.
uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1);
int matching_bits = Bits::FindLSBSetNonZero64(x);
matched += matching_bits >> 3;
return matched;
}
// If we hit end of s1 block, switch to start of s2. Note that we
// assume first 8 bytes of s2 are copied at end of s1 for ease of
// comparison.
if (PREDICT_FALSE(s1 >= s1_limit)) {
s1 = s2_base + (s1 - s1_limit);
s1_limit = s2_limit;
}
}
while (PREDICT_TRUE((s2 < s2_limit) && (s1 < s1_limit) && (*s1 == *s2))) {
s1++;
s2++;
}
return s2 - s2_start;
}
#else
static inline int MultiBlockFindMatchLength(const uint8* s1,
const uint8* s1_limit,
const uint8* s2_base,
const uint8* s2,
const uint8* s2_limit) {
const uint8 *s2_start = s2;
// Find out how long the match is. We loop over the data 32 bits at a
// time until we find a 32-bit block that doesn't match; then we find
// the first non-matching bit and use that to calculate the total
// length of the match.
while (s2 <= s2_limit - 4 && s1 <= s1_limit - 4 &&
UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1)) {
if (PREDICT_FALSE(s1 >= s1_limit)) {
s1 = s2_base + (s1 - s1_limit);
s1_limit = s2_limit;
}
s2 += 4;
s1 += 4;
}
while ((s2 < s2_limit) && (s1 < s1_limit) && (*s1 == *s2)) {
s1++;
s2++;
}
return s2 - s2_start;
}
#endif
} // namespace compression
} // namespace util
#endif // UTIL_COMPRESSION_GIPFELI_INTERNAL_FIND_MATCH_LENGTH_H_