diff --git a/sstable/writer.go b/sstable/writer.go index e99a8b30ee2..6147b509809 100644 --- a/sstable/writer.go +++ b/sstable/writer.go @@ -152,12 +152,16 @@ type Writer struct { xxHasher *xxhash.Digest topLevelIndexBlock blockWriter - indexPartitions []indexBlockWriterAndBlockProperties + indexPartitions []indexBlockAndBlockProperties + keyAlloc []byte + indexBlockAlloc []byte } -type indexBlockWriterAndBlockProperties struct { - writer blockWriter +type indexBlockAndBlockProperties struct { + nEntries int + sep InternalKey properties []byte + block []byte } // Set sets the value for the given key. The sequence number is set to @@ -267,7 +271,7 @@ func (w *Writer) addPoint(key InternalKey, value []byte) error { // semantically identical, because we need to ensure that SmallestPoint.UserKey // is not nil. This is required by WriterMetadata.Smallest in order to // distinguish between an unset SmallestPoint and a zero-length one. - w.meta.SmallestPoint = w.meta.LargestPoint.Clone() + w.keyAlloc, w.meta.SmallestPoint = cloneKeyWithBuf(w.meta.LargestPoint, w.keyAlloc) } w.props.NumEntries++ @@ -432,14 +436,25 @@ func (w *Writer) addIndexEntry(key InternalKey, bhp BlockHandleWithProperties) e prevKey := base.DecodeInternalKey(w.block.curKey) var sep InternalKey if key.UserKey == nil && key.Trailer == 0 { - sep = prevKey.Successor(w.compare, w.successor, nil) + if len(w.keyAlloc) < len(prevKey.UserKey) { + w.keyAlloc = make([]byte, len(prevKey.UserKey)+keyAllocSize) + } + sep = prevKey.Successor(w.compare, w.successor, w.keyAlloc[:0]) + w.keyAlloc = w.keyAlloc[len(sep.UserKey):] } else { - sep = prevKey.Separator(w.compare, w.separator, nil, key) + if len(w.keyAlloc) < len(prevKey.UserKey) { + w.keyAlloc = make([]byte, len(prevKey.UserKey)+keyAllocSize) + } + sep = prevKey.Separator(w.compare, w.separator, w.keyAlloc[:0], key) + w.keyAlloc = w.keyAlloc[len(sep.UserKey):] } encoded := encodeBlockHandleWithProperties(w.tmp[:], bhp) if supportsTwoLevelIndex(w.tableFormat) && shouldFlush(sep, encoded, &w.indexBlock, w.indexBlockSize, w.indexBlockSizeThreshold) { + if cap(w.indexPartitions) == 0 { + w.indexPartitions = make([]indexBlockAndBlockProperties, 0, 32) + } // Enable two level indexes if there is more than one index block. w.twoLevelIndex = true if err := w.finishIndexBlock(); err != nil { @@ -484,6 +499,19 @@ func shouldFlush( return newSize > blockSize } +const keyAllocSize = 256 << 10 + +func cloneKeyWithBuf(k InternalKey, buf []byte) ([]byte, InternalKey) { + if len(k.UserKey) == 0 { + return buf, k + } + if len(buf) < len(k.UserKey) { + buf = make([]byte, len(k.UserKey)+keyAllocSize) + } + n := copy(buf, k.UserKey) + return buf[n:], InternalKey{UserKey: buf[:n:n], Trailer: k.Trailer} +} + // finishIndexBlock finishes the current index block and adds it to the top // level index block. This is only used when two level indexes are enabled. func (w *Writer) finishIndexBlock() error { @@ -498,14 +526,20 @@ func (w *Writer) finishIndexBlock() error { w.blockPropsEncoder.addProp(shortID(i), scratch) } } - w.indexPartitions = append(w.indexPartitions, - indexBlockWriterAndBlockProperties{ - writer: w.indexBlock, - properties: w.blockPropsEncoder.props(), - }) - w.indexBlock = blockWriter{ - restartInterval: 1, + part := indexBlockAndBlockProperties{nEntries: w.indexBlock.nEntries, properties: w.blockPropsEncoder.props()} + w.keyAlloc, part.sep = cloneKeyWithBuf(base.DecodeInternalKey(w.indexBlock.curKey), w.keyAlloc) + bk := w.indexBlock.finish() + if len(w.indexBlockAlloc) < len(bk) { + if cap(w.indexBlockAlloc) == 0 { + w.indexBlockAlloc = make([]byte, len(bk)*16) + } else { + w.indexBlockAlloc = make([]byte, len(bk)*32) + } } + n := copy(w.indexBlockAlloc, bk) + part.block = w.indexBlockAlloc[:n:n] + w.indexBlockAlloc = w.indexBlockAlloc[n:] + w.indexPartitions = append(w.indexPartitions, part) return nil } @@ -517,9 +551,9 @@ func (w *Writer) writeTwoLevelIndex() (BlockHandle, error) { for i := range w.indexPartitions { b := &w.indexPartitions[i] - w.props.NumDataBlocks += uint64(b.writer.nEntries) - sep := base.DecodeInternalKey(b.writer.curKey) - data := b.writer.finish() + w.props.NumDataBlocks += uint64(b.nEntries) + + data := b.block w.props.IndexSize += uint64(len(data)) bh, err := w.writeBlock(data, w.compression) if err != nil { @@ -530,7 +564,7 @@ func (w *Writer) writeTwoLevelIndex() (BlockHandle, error) { Props: b.properties, } encoded := encodeBlockHandleWithProperties(w.tmp[:], bhp) - w.topLevelIndexBlock.add(sep, encoded) + w.topLevelIndexBlock.add(b.sep, encoded) } // NB: RocksDB includes the block trailer length in the index size