From 0011c057c4af3bb2e2be9df26fe5f434a1f67e96 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Fri, 20 May 2022 18:42:22 -0400 Subject: [PATCH] db: fix data race in SeekPrefixGE Fix a data race in SeekPrefixGE. The merging iterator's findNextEntry method has an optimization for prefix iteration: When the merging iterator observes a range deletion and seeks lower-level iterators to the deletion's end boundary, the merging iterator checks whether the current iteration prefix is exhausted. Previously, if the entire iterator was exhausted, this optimization would accidentally read the heap root key `m.heap.items[0].key` for a heap that is empty. The iterator that surfaced `m.heap.items[0].key` would have already been exhausted, and the key could point into memory already released (eg, a sstable iterator already returned to a pool). This data race is not likely to have caused any inconsistencies in production, because under the conditions the race was possible the iterator was already exhausted and would be recognized as exhausted regardless of the result of the comparison. This commit fixes this data race by waiting until the beginning of the next iteration of the findNextEntry loop (if there is another iteration) to compare the heap root's prefix against the the search prefix. Fix #1698. --- merging_iter.go | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/merging_iter.go b/merging_iter.go index 2ac14a71f3..3f6aa439a2 100644 --- a/merging_iter.go +++ b/merging_iter.go @@ -649,27 +649,35 @@ func (m *mergingIter) isNextEntryDeleted(item *mergingIterItem) bool { // Starting from the current entry, finds the first (next) entry that can be returned. func (m *mergingIter) findNextEntry() (*InternalKey, []byte) { + var reseeked bool for m.heap.len() > 0 && m.err == nil { item := &m.heap.items[0] if m.levels[item.index].isSyntheticIterBoundsKey { break } + // For prefix iteration, stop if we already seeked the iterator due to a + // range tombstone and are now past the prefix. We could amortize the + // cost of this comparison, by doing it only after we have iterated in + // this for loop a few times. But unless we find a performance benefit + // to that, we do the simple thing and compare each time. Note that + // isNextEntryDeleted already did at least 4 key comparisons in order to + // return true, and additionally at least one heap comparison to step to + // the next entry. + // + // Note that we cannot move this comparison into the isNextEntryDeleted + // branch. Once isNextEntryDeleted determines a key is deleted and seeks + // the level's iterator, item.key's memory is potentially invalid. If + // the iterator is now exhausted, item.key may be garbage. + if m.prefix != nil && reseeked { + if n := m.split(item.key.UserKey); !bytes.Equal(m.prefix, item.key.UserKey[:n]) { + return nil, nil + } + } + m.addItemStats(item) if m.isNextEntryDeleted(item) { m.stats.PointsCoveredByRangeTombstones++ - // For prefix iteration, stop if we are past the prefix. We could - // amortize the cost of this comparison, by doing it only after we - // have iterated in this for loop a few times. But unless we find - // a performance benefit to that, we do the simple thing and - // compare each time. Note that isNextEntryDeleted already did at - // least 4 key comparisons in order to return true, and - // additionally at least one heap comparison to step to the next - // entry. - if m.prefix != nil { - if n := m.split(item.key.UserKey); !bytes.Equal(m.prefix, item.key.UserKey[:n]) { - return nil, nil - } - } + reseeked = true continue } if item.key.Visible(m.snapshot) &&