-
Notifications
You must be signed in to change notification settings - Fork 472
/
Copy pathreadahead.go
194 lines (188 loc) · 7.07 KB
/
readahead.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
package objstorage
const (
// Constants for dynamic readahead of data blocks. Note that the size values
// make sense as some multiple of the default block size; and they should
// both be larger than the default block size.
minFileReadsForReadahead = 2
// TODO(bilal): Have the initial size value be a factor of the block size,
// as opposed to a hardcoded value.
initialReadaheadSize = 64 << 10 /* 64KB */
maxReadaheadSize = 256 << 10 /* 256KB */
)
// readaheadState contains state variables related to readahead. Updated on
// file reads.
type readaheadState struct {
// Number of sequential reads.
numReads int64
// Size issued to the next call to Prefetch. Starts at or above
// initialReadaheadSize and grows exponentially until maxReadaheadSize.
size int64
// prevSize is the size used in the last Prefetch call.
prevSize int64
// The byte offset up to which the OS has been asked to read ahead / cached.
// When reading ahead, reads up to this limit should not incur an IO
// operation. Reads after this limit can benefit from a new call to
// Prefetch.
limit int64
}
func (rs *readaheadState) recordCacheHit(offset, blockLength int64) {
currentReadEnd := offset + blockLength
if rs.numReads >= minFileReadsForReadahead {
if currentReadEnd >= rs.limit && offset <= rs.limit+maxReadaheadSize {
// This is a read that would have resulted in a readahead, had it
// not been a cache hit.
rs.limit = currentReadEnd
return
}
if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+maxReadaheadSize {
// We read too far away from rs.limit to benefit from readahead in
// any scenario. Reset all variables.
rs.numReads = 1
rs.limit = currentReadEnd
rs.size = initialReadaheadSize
rs.prevSize = 0
return
}
// Reads in the range [rs.limit - rs.prevSize, rs.limit] end up
// here. This is a read that is potentially benefitting from a past
// readahead.
return
}
if currentReadEnd >= rs.limit && offset <= rs.limit+maxReadaheadSize {
// Blocks are being read sequentially and would benefit from readahead
// down the line.
rs.numReads++
return
}
// We read too far ahead of the last read, or before it. This indicates
// a random read, where readahead is not desirable. Reset all variables.
rs.numReads = 1
rs.limit = currentReadEnd
rs.size = initialReadaheadSize
rs.prevSize = 0
}
// maybeReadahead updates state and determines whether to issue a readahead /
// prefetch call for a block read at offset for blockLength bytes.
// Returns a size value (greater than 0) that should be prefetched if readahead
// would be beneficial.
func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 {
currentReadEnd := offset + blockLength
if rs.numReads >= minFileReadsForReadahead {
// The minimum threshold of sequential reads to justify reading ahead
// has been reached.
// There are two intervals: the interval being read:
// [offset, currentReadEnd]
// as well as the interval where a read would benefit from read ahead:
// [rs.limit, rs.limit + rs.size]
// We increase the latter interval to
// [rs.limit, rs.limit + maxReadaheadSize] to account for cases where
// readahead may not be beneficial with a small readahead size, but over
// time the readahead size would increase exponentially to make it
// beneficial.
if currentReadEnd >= rs.limit && offset <= rs.limit+maxReadaheadSize {
// We are doing a read in the interval ahead of
// the last readahead range. In the diagrams below, ++++ is the last
// readahead range, ==== is the range represented by
// [rs.limit, rs.limit + maxReadaheadSize], and ---- is the range
// being read.
//
// rs.limit rs.limit + maxReadaheadSize
// ++++++++++|===========================|
//
// |-------------|
// offset currentReadEnd
//
// This case is also possible, as are all cases with an overlap
// between [rs.limit, rs.limit + maxReadaheadSize] and [offset,
// currentReadEnd]:
//
// rs.limit rs.limit + maxReadaheadSize
// ++++++++++|===========================|
//
// |-------------|
// offset currentReadEnd
//
//
rs.numReads++
rs.limit = offset + rs.size
rs.prevSize = rs.size
// Increase rs.size for the next read.
rs.size *= 2
if rs.size > maxReadaheadSize {
rs.size = maxReadaheadSize
}
return rs.prevSize
}
if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+maxReadaheadSize {
// The above conditional has rs.limit > rs.prevSize to confirm that
// rs.limit - rs.prevSize would not underflow.
// We read too far away from rs.limit to benefit from readahead in
// any scenario. Reset all variables.
// The case where we read too far ahead:
//
// (rs.limit - rs.prevSize) (rs.limit) (rs.limit + maxReadaheadSize)
// |+++++++++++++|=============|
//
// |-------------|
// offset currentReadEnd
//
// Or too far behind:
//
// (rs.limit - rs.prevSize) (rs.limit) (rs.limit + maxReadaheadSize)
// |+++++++++++++|=============|
//
// |-------------|
// offset currentReadEnd
//
rs.numReads = 1
rs.limit = currentReadEnd
rs.size = initialReadaheadSize
rs.prevSize = 0
return 0
}
// Reads in the range [rs.limit - rs.prevSize, rs.limit] end up
// here. This is a read that is potentially benefitting from a past
// readahead, but there's no reason to issue a readahead call at the
// moment.
//
// (rs.limit - rs.prevSize) (rs.limit + maxReadaheadSize)
// |+++++++++++++|===============|
// (rs.limit)
//
// |-------|
// offset currentReadEnd
//
rs.numReads++
return 0
}
if currentReadEnd >= rs.limit && offset <= rs.limit+maxReadaheadSize {
// Blocks are being read sequentially and would benefit from readahead
// down the line.
//
// (rs.limit) (rs.limit + maxReadaheadSize)
// |=============|
//
// |-------|
// offset currentReadEnd
//
rs.numReads++
return 0
}
// We read too far ahead of the last read, or before it. This indicates
// a random read, where readahead is not desirable. Reset all variables.
//
// (rs.limit - maxReadaheadSize) (rs.limit) (rs.limit + maxReadaheadSize)
// |+++++++++++++|=============|
//
// |-------|
// offset currentReadEnd
//
rs.numReads = 1
rs.limit = currentReadEnd
rs.size = initialReadaheadSize
rs.prevSize = 0
return 0
}