-
Notifications
You must be signed in to change notification settings - Fork 466
/
properties.go
361 lines (338 loc) · 13.3 KB
/
properties.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
package sstable
import (
"bytes"
"encoding/binary"
"fmt"
"math"
"reflect"
"sort"
"unsafe"
"github.com/cockroachdb/pebble/internal/intern"
)
const propertiesBlockRestartInterval = math.MaxInt32
const propGlobalSeqnumName = "rocksdb.external_sst_file.global_seqno"
var propTagMap = make(map[string]reflect.StructField)
var propBoolTrue = []byte{'1'}
var propBoolFalse = []byte{'0'}
var columnFamilyIDField = func() reflect.StructField {
f, ok := reflect.TypeOf(Properties{}).FieldByName("ColumnFamilyID")
if !ok {
panic("Properties.ColumnFamilyID field not found")
}
return f
}()
var propOffsetTagMap = make(map[uintptr]string)
func init() {
t := reflect.TypeOf(Properties{})
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
if tag := f.Tag.Get("prop"); tag != "" {
switch f.Type.Kind() {
case reflect.Bool:
case reflect.Uint32:
case reflect.Uint64:
case reflect.String:
default:
panic(fmt.Sprintf("unsupported property field type: %s %s", f.Name, f.Type))
}
propTagMap[tag] = f
propOffsetTagMap[f.Offset] = tag
}
}
}
// Properties holds the sstable property values. The properties are
// automatically populated during sstable creation and load from the properties
// meta block when an sstable is opened.
type Properties struct {
// ID of column family for this SST file, corresponding to the CF identified
// by column_family_name.
ColumnFamilyID uint64 `prop:"rocksdb.column.family.id"`
// Name of the column family with which this SST file is associated. Empty if
// the column family is unknown.
ColumnFamilyName string `prop:"rocksdb.column.family.name"`
// The name of the comparer used in this table.
ComparerName string `prop:"rocksdb.comparator"`
// The compression algorithm used to compress blocks.
CompressionName string `prop:"rocksdb.compression"`
// The compression options used to compress blocks.
CompressionOptions string `prop:"rocksdb.compression_options"`
// The time when the SST file was created. Since SST files are immutable,
// this is equivalent to last modified time.
CreationTime uint64 `prop:"rocksdb.creation.time"`
// The total size of all data blocks.
DataSize uint64 `prop:"rocksdb.data.size"`
// The external sstable version format. Version 2 is the one RocksDB has been
// using since 5.13. RocksDB only uses the global sequence number for an
// sstable if this property has been set.
ExternalFormatVersion uint32 `prop:"rocksdb.external_sst_file.version"`
// Actual SST file creation time. 0 means unknown.
FileCreationTime uint64 `prop:"rocksdb.file.creation.time"`
// The name of the filter policy used in this table. Empty if no filter
// policy is used.
FilterPolicyName string `prop:"rocksdb.filter.policy"`
// The size of filter block.
FilterSize uint64 `prop:"rocksdb.filter.size"`
// If 0, key is variable length. Otherwise number of bytes for each key.
FixedKeyLen uint64 `prop:"rocksdb.fixed.key.length"`
// Format version, reserved for backward compatibility.
FormatVersion uint64 `prop:"rocksdb.format.version"`
// The global sequence number to use for all entries in the table. Present if
// the table was created externally and ingested whole.
GlobalSeqNum uint64 `prop:"rocksdb.external_sst_file.global_seqno"`
// Whether the index key is user key or an internal key.
IndexKeyIsUserKey uint64 `prop:"rocksdb.index.key.is.user.key"`
// Total number of index partitions if kTwoLevelIndexSearch is used.
IndexPartitions uint64 `prop:"rocksdb.index.partitions"`
// The size of index block.
IndexSize uint64 `prop:"rocksdb.index.size"`
// The index type. TODO(peter): add a more detailed description.
IndexType uint32 `prop:"rocksdb.block.based.table.index.type"`
// Whether delta encoding is used to encode the index values.
IndexValueIsDeltaEncoded uint64 `prop:"rocksdb.index.value.is.delta.encoded"`
// The name of the merger used in this table. Empty if no merger is used.
MergerName string `prop:"rocksdb.merge.operator"`
// The number of blocks in this table.
NumDataBlocks uint64 `prop:"rocksdb.num.data.blocks"`
// The number of deletion entries in this table, including both point and
// range deletions.
NumDeletions uint64 `prop:"rocksdb.deleted.keys"`
// The number of entries in this table.
NumEntries uint64 `prop:"rocksdb.num.entries"`
// The number of merge operands in the table.
NumMergeOperands uint64 `prop:"rocksdb.merge.operands"`
// The number of range deletions in this table.
NumRangeDeletions uint64 `prop:"rocksdb.num.range-deletions"`
// The number of RANGEKEYDELs in this table.
NumRangeKeyDels uint64 `prop:"pebble.num.range-key-dels"`
// The number of range keys in this table.
NumRangeKeys uint64 `prop:"pebble.num.range-keys"`
// The number of RANGEKEYSETs in this table.
NumRangeKeySets uint64 `prop:"pebble.num.range-key-sets"`
// The number of RANGEKEYUNSETs in this table.
NumRangeKeyUnsets uint64 `prop:"pebble.num.range-key-unsets"`
// Timestamp of the earliest key. 0 if unknown.
OldestKeyTime uint64 `prop:"rocksdb.oldest.key.time"`
// The name of the prefix extractor used in this table. Empty if no prefix
// extractor is used.
PrefixExtractorName string `prop:"rocksdb.prefix.extractor.name"`
// If filtering is enabled, was the filter created on the key prefix.
PrefixFiltering bool `prop:"rocksdb.block.based.table.prefix.filtering"`
// A comma separated list of names of the property collectors used in this
// table.
PropertyCollectorNames string `prop:"rocksdb.property.collectors"`
// Total raw key size.
RawKeySize uint64 `prop:"rocksdb.raw.key.size"`
// Total raw rangekey key size.
RawRangeKeyKeySize uint64 `prop:"pebble.raw.rangekey.key.size"`
// Total raw rangekey value size.
RawRangeKeyValueSize uint64 `prop:"pebble.raw.rangekey.value.size"`
// Total raw value size.
RawValueSize uint64 `prop:"rocksdb.raw.value.size"`
// Size of the top-level index if kTwoLevelIndexSearch is used.
TopLevelIndexSize uint64 `prop:"rocksdb.top-level.index.size"`
// User collected properties.
UserProperties map[string]string
// If filtering is enabled, was the filter created on the whole key.
WholeKeyFiltering bool `prop:"rocksdb.block.based.table.whole.key.filtering"`
// Loaded set indicating which fields have been loaded from disk. Indexed by
// the field's byte offset within the struct
// (reflect.StructField.Offset). Only set if the properties have been loaded
// from a file. Only exported for testing purposes.
Loaded map[uintptr]struct{}
}
// NumPointDeletions returns the number of point deletions in this table.
func (p *Properties) NumPointDeletions() uint64 {
return p.NumDeletions - p.NumRangeDeletions
}
func (p *Properties) String() string {
var buf bytes.Buffer
v := reflect.ValueOf(*p)
vt := v.Type()
for i := 0; i < v.NumField(); i++ {
ft := vt.Field(i)
tag := ft.Tag.Get("prop")
if tag == "" {
continue
}
f := v.Field(i)
// TODO(peter): Use f.IsZero() when we can rely on go1.13.
if zero := reflect.Zero(f.Type()); zero.Interface() == f.Interface() {
// Skip printing of zero values which were not loaded from disk.
if _, ok := p.Loaded[ft.Offset]; !ok {
continue
}
}
fmt.Fprintf(&buf, "%s: ", tag)
switch ft.Type.Kind() {
case reflect.Bool:
fmt.Fprintf(&buf, "%t\n", f.Bool())
case reflect.Uint32:
fmt.Fprintf(&buf, "%d\n", f.Uint())
case reflect.Uint64:
u := f.Uint()
if ft.Offset == columnFamilyIDField.Offset && u == math.MaxInt32 {
fmt.Fprintf(&buf, "-\n")
} else {
fmt.Fprintf(&buf, "%d\n", f.Uint())
}
case reflect.String:
fmt.Fprintf(&buf, "%s\n", f.String())
default:
panic("not reached")
}
}
keys := make([]string, 0, len(p.UserProperties))
for key := range p.UserProperties {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key])
}
return buf.String()
}
func (p *Properties) load(b block, blockOffset uint64) error {
i, err := newRawBlockIter(bytes.Compare, b)
if err != nil {
return err
}
p.Loaded = make(map[uintptr]struct{})
v := reflect.ValueOf(p).Elem()
for valid := i.First(); valid; valid = i.Next() {
tag := intern.Bytes(i.Key().UserKey)
if f, ok := propTagMap[tag]; ok {
p.Loaded[f.Offset] = struct{}{}
field := v.FieldByIndex(f.Index)
switch f.Type.Kind() {
case reflect.Bool:
field.SetBool(bytes.Equal(i.Value(), propBoolTrue))
case reflect.Uint32:
field.SetUint(uint64(binary.LittleEndian.Uint32(i.Value())))
case reflect.Uint64:
var n uint64
if tag == propGlobalSeqnumName {
n = binary.LittleEndian.Uint64(i.Value())
} else {
n, _ = binary.Uvarint(i.Value())
}
field.SetUint(n)
case reflect.String:
field.SetString(intern.Bytes(i.Value()))
default:
panic("not reached")
}
continue
}
if p.UserProperties == nil {
p.UserProperties = make(map[string]string)
}
p.UserProperties[tag] = string(i.Value())
}
return nil
}
func (p *Properties) saveBool(m map[string][]byte, offset uintptr, value bool) {
tag := propOffsetTagMap[offset]
if value {
m[tag] = propBoolTrue
} else {
m[tag] = propBoolFalse
}
}
func (p *Properties) saveUint32(m map[string][]byte, offset uintptr, value uint32) {
var buf [4]byte
binary.LittleEndian.PutUint32(buf[:], value)
m[propOffsetTagMap[offset]] = buf[:]
}
func (p *Properties) saveUint64(m map[string][]byte, offset uintptr, value uint64) {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], value)
m[propOffsetTagMap[offset]] = buf[:]
}
func (p *Properties) saveUvarint(m map[string][]byte, offset uintptr, value uint64) {
var buf [10]byte
n := binary.PutUvarint(buf[:], value)
m[propOffsetTagMap[offset]] = buf[:n]
}
func (p *Properties) saveString(m map[string][]byte, offset uintptr, value string) {
m[propOffsetTagMap[offset]] = []byte(value)
}
func (p *Properties) save(w *rawBlockWriter) {
m := make(map[string][]byte)
for k, v := range p.UserProperties {
m[k] = []byte(v)
}
p.saveUvarint(m, unsafe.Offsetof(p.ColumnFamilyID), p.ColumnFamilyID)
if p.ColumnFamilyName != "" {
p.saveString(m, unsafe.Offsetof(p.ColumnFamilyName), p.ColumnFamilyName)
}
if p.ComparerName != "" {
p.saveString(m, unsafe.Offsetof(p.ComparerName), p.ComparerName)
}
if p.CompressionName != "" {
p.saveString(m, unsafe.Offsetof(p.CompressionName), p.CompressionName)
}
if p.CompressionOptions != "" {
p.saveString(m, unsafe.Offsetof(p.CompressionOptions), p.CompressionOptions)
}
p.saveUvarint(m, unsafe.Offsetof(p.CreationTime), p.CreationTime)
p.saveUvarint(m, unsafe.Offsetof(p.DataSize), p.DataSize)
if p.ExternalFormatVersion != 0 {
p.saveUint32(m, unsafe.Offsetof(p.ExternalFormatVersion), p.ExternalFormatVersion)
p.saveUint64(m, unsafe.Offsetof(p.GlobalSeqNum), p.GlobalSeqNum)
}
if p.FileCreationTime > 0 {
p.saveUvarint(m, unsafe.Offsetof(p.FileCreationTime), p.FileCreationTime)
}
if p.FilterPolicyName != "" {
p.saveString(m, unsafe.Offsetof(p.FilterPolicyName), p.FilterPolicyName)
}
p.saveUvarint(m, unsafe.Offsetof(p.FilterSize), p.FilterSize)
p.saveUvarint(m, unsafe.Offsetof(p.FixedKeyLen), p.FixedKeyLen)
p.saveUvarint(m, unsafe.Offsetof(p.FormatVersion), p.FormatVersion)
p.saveUvarint(m, unsafe.Offsetof(p.IndexKeyIsUserKey), p.IndexKeyIsUserKey)
if p.IndexPartitions != 0 {
p.saveUvarint(m, unsafe.Offsetof(p.IndexPartitions), p.IndexPartitions)
p.saveUvarint(m, unsafe.Offsetof(p.TopLevelIndexSize), p.TopLevelIndexSize)
}
p.saveUvarint(m, unsafe.Offsetof(p.IndexSize), p.IndexSize)
p.saveUint32(m, unsafe.Offsetof(p.IndexType), p.IndexType)
p.saveUvarint(m, unsafe.Offsetof(p.IndexValueIsDeltaEncoded), p.IndexValueIsDeltaEncoded)
if p.MergerName != "" {
p.saveString(m, unsafe.Offsetof(p.MergerName), p.MergerName)
}
p.saveUvarint(m, unsafe.Offsetof(p.NumDataBlocks), p.NumDataBlocks)
p.saveUvarint(m, unsafe.Offsetof(p.NumEntries), p.NumEntries)
p.saveUvarint(m, unsafe.Offsetof(p.NumDeletions), p.NumDeletions)
p.saveUvarint(m, unsafe.Offsetof(p.NumMergeOperands), p.NumMergeOperands)
p.saveUvarint(m, unsafe.Offsetof(p.NumRangeDeletions), p.NumRangeDeletions)
if p.NumRangeKeys > 0 {
p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeyDels), p.NumRangeKeyDels)
p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeys), p.NumRangeKeys)
p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeySets), p.NumRangeKeySets)
p.saveUvarint(m, unsafe.Offsetof(p.NumRangeKeyUnsets), p.NumRangeKeyUnsets)
p.saveUvarint(m, unsafe.Offsetof(p.RawRangeKeyKeySize), p.RawRangeKeyKeySize)
p.saveUvarint(m, unsafe.Offsetof(p.RawRangeKeyValueSize), p.RawRangeKeyValueSize)
}
p.saveUvarint(m, unsafe.Offsetof(p.OldestKeyTime), p.OldestKeyTime)
if p.PrefixExtractorName != "" {
p.saveString(m, unsafe.Offsetof(p.PrefixExtractorName), p.PrefixExtractorName)
}
p.saveBool(m, unsafe.Offsetof(p.PrefixFiltering), p.PrefixFiltering)
if p.PropertyCollectorNames != "" {
p.saveString(m, unsafe.Offsetof(p.PropertyCollectorNames), p.PropertyCollectorNames)
}
p.saveUvarint(m, unsafe.Offsetof(p.RawKeySize), p.RawKeySize)
p.saveUvarint(m, unsafe.Offsetof(p.RawValueSize), p.RawValueSize)
p.saveBool(m, unsafe.Offsetof(p.WholeKeyFiltering), p.WholeKeyFiltering)
keys := make([]string, 0, len(m))
for key := range m {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
w.add(InternalKey{UserKey: []byte(key)}, m[key])
}
}