forked from cockroachdb/pebble
-
Notifications
You must be signed in to change notification settings - Fork 0
/
level_iter.go
433 lines (395 loc) · 11.6 KB
/
level_iter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
package pebble
import (
"sort"
)
// tableNewIters creates a new point and range-del iterator for the given file
// number. If bytesIterated is specified, it is incremented as the given file is
// iterated through.
type tableNewIters func(
meta *fileMetadata, opts *IterOptions, bytesIterated *uint64,
) (internalIterator, internalIterator, error)
// levelIter provides a merged view of the sstables in a level.
//
// levelIter is used during compaction and as part of the Iterator
// implementation. When used as part of the Iterator implementation, level
// iteration needs to "pause" at sstable boundaries if a range deletion
// tombstone is the source of that boundary. We know if a range tombstone is
// the smallest or largest key in a file because the kind will be
// InternalKeyKindRangeDeletion. If the boundary key is a range deletion
// tombstone, we materialize a fake entry to return from levelIter. This
// prevents mergingIter from advancing past the sstable until the sstable
// contains the smallest (or largest for reverse iteration) key in the merged
// heap. Note that Iterator treat a range deletion tombstone as a no-op and
// processes range deletions via mergingIter.
type levelIter struct {
opts *IterOptions
tableOpts IterOptions
cmp Compare
index int
// The key to return when iterating past an sstable boundary and that
// boundary is a range deletion tombstone. Note that if boundary != nil, then
// iter == nil, and if iter != nil, then boundary == nil.
boundary *InternalKey
iter internalIterator
newIters tableNewIters
rangeDelIter *internalIterator
files []fileMetadata
err error
// Pointer into this level's entry in `mergingIter::largestUserKeys`. We populate it
// with the largest user key for the currently opened file. It is used to limit the optimization
// that seeks lower-level iterators past keys shadowed by a range tombstone. Limiting this
// seek to the file upper-bound is necessary since range tombstones are stored untruncated,
// while they only apply to keys within their containing file's boundaries. For a detailed
// example, see comment above `mergingIter`.
//
// This field differs from the `boundary` field in a few ways:
// - `boundary` is only populated when the iterator is positioned exactly on the sentinel key.
// - `boundary` can hold either the lower- or upper-bound, depending on the iterator direction.
// - `boundary` is not exposed to the next higher-level iterator, i.e., `mergingIter`.
largestUserKey *[]byte
// bytesIterated keeps track of the number of bytes iterated during compaction.
bytesIterated *uint64
}
// levelIter implements the internalIterator interface.
var _ internalIterator = (*levelIter)(nil)
func newLevelIter(
opts *IterOptions,
cmp Compare,
newIters tableNewIters,
files []fileMetadata,
bytesIterated *uint64,
) *levelIter {
l := &levelIter{}
l.init(opts, cmp, newIters, files, bytesIterated)
return l
}
func (l *levelIter) init(
opts *IterOptions,
cmp Compare,
newIters tableNewIters,
files []fileMetadata,
bytesIterated *uint64,
) {
l.opts = opts
if l.opts != nil {
l.tableOpts.TableFilter = l.opts.TableFilter
}
l.cmp = cmp
l.index = -1
l.newIters = newIters
l.files = files
l.bytesIterated = bytesIterated
}
func (l *levelIter) initRangeDel(rangeDelIter *internalIterator) {
l.rangeDelIter = rangeDelIter
}
func (l *levelIter) initLargestUserKey(largestUserKey *[]byte) {
l.largestUserKey = largestUserKey
}
func (l *levelIter) findFileGE(key []byte) int {
// Find the earliest file whose largest key is >= ikey. Note that the range
// deletion sentinel key is handled specially and a search for K will not
// find a table where K<range-del-sentinel> is the largest key. This prevents
// loading untruncated range deletions from a table which can't possibly
// contain the target key and is required for correctness by DB.Get.
//
// TODO(peter): inline the binary search.
return sort.Search(len(l.files), func(i int) bool {
largest := &l.files[i].largest
c := l.cmp(largest.UserKey, key)
if c > 0 {
return true
}
return c == 0 && largest.Trailer != InternalKeyRangeDeleteSentinel
})
}
func (l *levelIter) findFileLT(key []byte) int {
// Find the last file whose smallest key is < ikey.
index := sort.Search(len(l.files), func(i int) bool {
return l.cmp(l.files[i].smallest.UserKey, key) >= 0
})
return index - 1
}
func (l *levelIter) loadFile(index, dir int) bool {
l.boundary = nil
if l.index == index {
return l.iter != nil
}
if l.iter != nil {
l.err = l.iter.Close()
if l.err != nil {
return false
}
l.iter = nil
}
if l.rangeDelIter != nil {
*l.rangeDelIter = nil
}
for ; ; index += dir {
l.index = index
if l.index < 0 || l.index >= len(l.files) {
return false
}
f := &l.files[l.index]
l.tableOpts.LowerBound = l.opts.GetLowerBound()
if l.tableOpts.LowerBound != nil {
if l.cmp(f.largest.UserKey, l.tableOpts.LowerBound) < 0 {
// The largest key in the sstable is smaller than the lower bound.
if dir < 0 {
return false
}
continue
}
if l.cmp(l.tableOpts.LowerBound, f.smallest.UserKey) < 0 {
// The lower bound is smaller than the smallest key in the
// table. Iteration within the table does not need to check the lower
// bound.
l.tableOpts.LowerBound = nil
}
}
l.tableOpts.UpperBound = l.opts.GetUpperBound()
if l.tableOpts.UpperBound != nil {
if l.cmp(f.smallest.UserKey, l.tableOpts.UpperBound) >= 0 {
// The smallest key in the sstable is greater than or equal to the
// lower bound.
if dir > 0 {
return false
}
continue
}
if l.cmp(l.tableOpts.UpperBound, f.largest.UserKey) > 0 {
// The upper bound is greater than the largest key in the
// table. Iteration within the table does not need to check the upper
// bound.
l.tableOpts.UpperBound = nil
}
}
var rangeDelIter internalIterator
l.iter, rangeDelIter, l.err = l.newIters(f, &l.tableOpts, l.bytesIterated)
if l.err != nil || l.iter == nil {
return false
}
if l.rangeDelIter != nil {
*l.rangeDelIter = rangeDelIter
}
if l.largestUserKey != nil {
*l.largestUserKey = f.largest.UserKey
}
return true
}
}
func (l *levelIter) SeekGE(key []byte) (*InternalKey, []byte) {
// NB: the top-level Iterator has already adjusted key based on
// IterOptions.LowerBound.
if !l.loadFile(l.findFileGE(key), 1) {
return nil, nil
}
if key, val := l.iter.SeekGE(key); key != nil {
return key, val
}
return l.skipEmptyFileForward()
}
func (l *levelIter) SeekPrefixGE(prefix, key []byte) (*InternalKey, []byte) {
// NB: the top-level Iterator has already adjusted key based on
// IterOptions.LowerBound.
if !l.loadFile(l.findFileGE(key), 1) {
return nil, nil
}
if key, val := l.iter.SeekPrefixGE(prefix, key); key != nil {
return key, val
}
return l.skipEmptyFileForward()
}
func (l *levelIter) SeekLT(key []byte) (*InternalKey, []byte) {
// NB: the top-level Iterator has already adjusted key based on
// IterOptions.UpperBound.
if !l.loadFile(l.findFileLT(key), -1) {
return nil, nil
}
if key, val := l.iter.SeekLT(key); key != nil {
return key, val
}
return l.skipEmptyFileBackward()
}
func (l *levelIter) First() (*InternalKey, []byte) {
// NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is
// set.
if !l.loadFile(0, 1) {
return nil, nil
}
if key, val := l.iter.First(); key != nil {
return key, val
}
return l.skipEmptyFileForward()
}
func (l *levelIter) Last() (*InternalKey, []byte) {
// NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is
// set.
if !l.loadFile(len(l.files)-1, -1) {
return nil, nil
}
if key, val := l.iter.Last(); key != nil {
return key, val
}
return l.skipEmptyFileBackward()
}
func (l *levelIter) Next() (*InternalKey, []byte) {
if l.err != nil {
return nil, nil
}
if l.iter == nil {
if l.boundary != nil {
if l.loadFile(l.index+1, 1) {
if key, val := l.iter.First(); key != nil {
return key, val
}
return l.skipEmptyFileForward()
}
return nil, nil
}
if l.index == -1 && l.loadFile(0, 1) {
// The iterator was positioned off the beginning of the level. Position
// at the first entry.
if key, val := l.iter.First(); key != nil {
return key, val
}
return l.skipEmptyFileForward()
}
return nil, nil
}
if key, val := l.iter.Next(); key != nil {
return key, val
}
return l.skipEmptyFileForward()
}
func (l *levelIter) Prev() (*InternalKey, []byte) {
if l.err != nil {
return nil, nil
}
if l.iter == nil {
if l.boundary != nil {
if l.loadFile(l.index-1, -1) {
if key, val := l.iter.Last(); key != nil {
return key, val
}
return l.skipEmptyFileBackward()
}
return nil, nil
}
if n := len(l.files); l.index == n && l.loadFile(n-1, -1) {
// The iterator was positioned off the end of the level. Position at the
// last entry.
if key, val := l.iter.Last(); key != nil {
return key, val
}
return l.skipEmptyFileBackward()
}
return nil, nil
}
if key, val := l.iter.Prev(); key != nil {
return key, val
}
return l.skipEmptyFileBackward()
}
func (l *levelIter) skipEmptyFileForward() (*InternalKey, []byte) {
var key *InternalKey
var val []byte
for ; key == nil; key, val = l.iter.First() {
if l.err = l.iter.Close(); l.err != nil {
return nil, nil
}
l.iter = nil
if l.rangeDelIter != nil {
// We're being used as part of an Iterator and we've reached the end of
// the sstable. If the boundary is a range deletion tombstone, return
// that key.
if f := &l.files[l.index]; f.largest.Kind() == InternalKeyKindRangeDelete {
l.boundary = &f.largest
return l.boundary, nil
}
*l.rangeDelIter = nil
}
// Current file was exhausted. Move to the next file.
if !l.loadFile(l.index+1, 1) {
return nil, nil
}
}
return key, val
}
func (l *levelIter) skipEmptyFileBackward() (*InternalKey, []byte) {
var key *InternalKey
var val []byte
for ; key == nil; key, val = l.iter.Last() {
if l.err = l.iter.Close(); l.err != nil {
return nil, nil
}
l.iter = nil
if l.rangeDelIter != nil {
// We're being used as part of an Iterator and we've reached the end of
// the sstable. If the boundary is a range deletion tombstone, return
// that key.
if f := &l.files[l.index]; f.smallest.Kind() == InternalKeyKindRangeDelete {
l.boundary = &f.smallest
return l.boundary, nil
}
*l.rangeDelIter = nil
}
// Current file was exhausted. Move to the previous file.
if !l.loadFile(l.index-1, -1) {
return nil, nil
}
}
return key, val
}
func (l *levelIter) Key() *InternalKey {
if l.iter == nil {
if l.boundary != nil {
return l.boundary
}
return nil
}
return l.iter.Key()
}
func (l *levelIter) Value() []byte {
if l.iter == nil {
return nil
}
return l.iter.Value()
}
func (l *levelIter) Valid() bool {
if l.iter == nil {
return l.boundary != nil
}
return l.iter.Valid()
}
func (l *levelIter) Error() error {
if l.err != nil || l.iter == nil {
return l.err
}
return l.iter.Error()
}
func (l *levelIter) Close() error {
if l.iter != nil {
l.err = l.iter.Close()
l.iter = nil
}
if l.rangeDelIter != nil {
if t := *l.rangeDelIter; t != nil {
if err := t.Close(); err != nil && l.err == nil {
l.err = err
}
}
*l.rangeDelIter = nil
}
return l.err
}
func (l *levelIter) SetBounds(lower, upper []byte) {
l.opts.LowerBound = lower
l.opts.UpperBound = upper
if l.iter != nil {
l.iter.SetBounds(lower, upper)
}
}