store: Setting the start and end to prior posting changes (#837)

* setting the start and end to prior posting changes * really need some tests data but this may also be the fix * moving the start and end inside the loop, so they are not updated as we iterate over items * Added regressions tests for #829. Moved bucket e2e tests to table test. Signed-off-by: Bartek Plotka <[email protected]> * Fixed overestimation for fetching chunks and series. Signed-off-by: Bartek Plotka <[email protected]> * Removed wrong comment. Signed-off-by: Bartek Plotka <[email protected]> * changing func to match interface
thanos-io · Feb 13, 2019 · 1b6f6da · 1b6f6da
1 parent 8622888
commit 1b6f6da
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 112 deletions.
diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go
@@ -172,6 +172,8 @@ type BucketStore struct {
 	debugLogging bool
 	// Number of goroutines to use when syncing blocks from object storage.
 	blockSyncConcurrency int
+
+	partitioner partitioner
 }
 
 // NewBucketStore creates a new bucket backed store that implements the store API against
@@ -197,6 +199,9 @@ func NewBucketStore(
 	if err != nil {
 		return nil, errors.Wrap(err, "create chunk pool")
 	}
+
+	const maxGapSize = 512 * 1024
+
 	s := &BucketStore{
 		logger:               logger,
 		bucket:               bucket,
@@ -207,6 +212,7 @@ func NewBucketStore(
 		blockSets:            map[uint64]*bucketBlockSet{},
 		debugLogging:         debugLogging,
 		blockSyncConcurrency: blockSyncConcurrency,
+		partitioner:          gapBasedPartitioner{maxGapSize: maxGapSize},
 	}
 	s.metrics = newBucketStoreMetrics(reg)
 
@@ -353,6 +359,7 @@ func (s *BucketStore) addBlock(ctx context.Context, id ulid.ULID) (err error) {
 		dir,
 		s.indexCache,
 		s.chunkPool,
+		s.partitioner,
 	)
 	if err != nil {
 		return errors.Wrap(err, "new bucket block")
@@ -981,6 +988,8 @@ type bucketBlock struct {
 	chunkObjs []string
 
 	pendingReaders sync.WaitGroup
+
+	partitioner partitioner
 }
 
 func newBucketBlock(
@@ -991,14 +1000,16 @@ func newBucketBlock(
 	dir string,
 	indexCache *indexCache,
 	chunkPool *pool.BytesPool,
+	p partitioner,
 ) (b *bucketBlock, err error) {
 	b = &bucketBlock{
-		logger:     logger,
-		bucket:     bkt,
-		indexObj:   path.Join(id.String(), block.IndexFilename),
-		indexCache: indexCache,
-		chunkPool:  chunkPool,
-		dir:        dir,
+		logger:      logger,
+		bucket:      bkt,
+		indexObj:    path.Join(id.String(), block.IndexFilename),
+		indexCache:  indexCache,
+		chunkPool:   chunkPool,
+		dir:         dir,
+		partitioner: p,
 	}
 	if err = b.loadMeta(ctx, id); err != nil {
 		return nil, errors.Wrap(err, "load meta")
@@ -1243,8 +1254,6 @@ type postingPtr struct {
 
 // fetchPostings returns sorted slice of postings that match the selected labels.
 func (r *bucketIndexReader) fetchPostings(keys labels.Labels) (index.Postings, error) {
-	const maxGapSize = 512 * 1024
-
 	var (
 		ptrs     []postingPtr
 		postings = make([]index.Postings, 0, len(keys))
@@ -1282,9 +1291,9 @@ func (r *bucketIndexReader) fetchPostings(keys labels.Labels) (index.Postings, e
 
 	// TODO(bwplotka): Asses how large in worst case scenario this can be. (e.g fetch for AllPostingsKeys)
 	// Consider sub split if too big.
-	parts := partitionRanges(len(ptrs), func(i int) (start, end uint64) {
+	parts := r.block.partitioner.Partition(len(ptrs), func(i int) (start, end uint64) {
 		return uint64(ptrs[i].ptr.Start), uint64(ptrs[i].ptr.End)
-	}, maxGapSize)
+	})
 
 	var g run.Group
 	for _, p := range parts {
@@ -1346,7 +1355,6 @@ func (r *bucketIndexReader) fetchPostings(keys labels.Labels) (index.Postings, e
 
 func (r *bucketIndexReader) PreloadSeries(ids []uint64) error {
 	const maxSeriesSize = 64 * 1024
-	const maxGapSize = 512 * 1024
 
 	var newIDs []uint64
 
@@ -1359,17 +1367,18 @@ func (r *bucketIndexReader) PreloadSeries(ids []uint64) error {
 	}
 	ids = newIDs
 
-	parts := partitionRanges(len(ids), func(i int) (start, end uint64) {
+	parts := r.block.partitioner.Partition(len(ids), func(i int) (start, end uint64) {
 		return ids[i], ids[i] + maxSeriesSize
-	}, maxGapSize)
+	})
 	var g run.Group
 
 	for _, p := range parts {
 		ctx, cancel := context.WithCancel(r.ctx)
+		s, e := p.start, p.end
 		i, j := p.elemRng[0], p.elemRng[1]
 
 		g.Add(func() error {
-			return r.loadSeries(ctx, ids[i:j], p.start, p.end+maxSeriesSize)
+			return r.loadSeries(ctx, ids[i:j], s, e)
 		}, func(err error) {
 			if err != nil {
 				cancel()
@@ -1419,12 +1428,22 @@ type part struct {
 	elemRng [2]int
 }
 
-// partitionRanges partitions length entries into n <= length ranges that cover all
-// input ranges.
-// It combines entries that are separated by reasonably small gaps.
-// It supports overlapping ranges.
-// NOTE: It expects range to be sorted by start time.
-func partitionRanges(length int, rng func(int) (uint64, uint64), maxGapSize uint64) (parts []part) {
+type partitioner interface {
+	// Partition partitions length entries into n <= length ranges that cover all
+	// input ranges
+	// It supports overlapping ranges.
+	// NOTE: It expects range to be sorted by start time.
+	Partition(length int, rng func(int) (uint64, uint64)) []part
+}
+
+type gapBasedPartitioner struct {
+	maxGapSize uint64
+}
+
+// Partition partitions length entries into n <= length ranges that cover all
+// input ranges by combining entries that are separated by reasonably small gaps.
+// It is used to combine multiple small ranges from object storage into bigger, more efficient/cheaper ones.
+func (g gapBasedPartitioner) Partition(length int, rng func(int) (uint64, uint64)) (parts []part) {
 	j := 0
 	k := 0
 	for k < length {
@@ -1438,7 +1457,7 @@ func partitionRanges(length int, rng func(int) (uint64, uint64), maxGapSize uint
 		for ; k < length; k++ {
 			s, e := rng(k)
 
-			if p.end+maxGapSize < s {
+			if p.end+g.maxGapSize < s {
 				break
 			}
 
@@ -1521,27 +1540,27 @@ func (r *bucketChunkReader) addPreload(id uint64) error {
 // preload all added chunk IDs. Must be called before the first call to Chunk is made.
 func (r *bucketChunkReader) preload() error {
 	const maxChunkSize = 16000
-	const maxGapSize = 512 * 1024
 
 	var g run.Group
 
 	for seq, offsets := range r.preloads {
 		sort.Slice(offsets, func(i, j int) bool {
 			return offsets[i] < offsets[j]
 		})
-		parts := partitionRanges(len(offsets), func(i int) (start, end uint64) {
+		parts := r.block.partitioner.Partition(len(offsets), func(i int) (start, end uint64) {
 			return uint64(offsets[i]), uint64(offsets[i]) + maxChunkSize
-		}, maxGapSize)
+		})
 
 		seq := seq
 		offsets := offsets
 
 		for _, p := range parts {
 			ctx, cancel := context.WithCancel(r.ctx)
+			s, e := uint32(p.start), uint32(p.end)
 			m, n := p.elemRng[0], p.elemRng[1]
 
 			g.Add(func() error {
-				return r.loadChunks(ctx, offsets[m:n], seq, uint32(p.start), uint32(p.end)+maxChunkSize)
+				return r.loadChunks(ctx, offsets[m:n], seq, s, e)
 			}, func(err error) {
 				if err != nil {
 					cancel()
@@ -1559,11 +1578,11 @@ func (r *bucketChunkReader) loadChunks(ctx context.Context, offs []uint32, seq i
 	if err != nil {
 		return errors.Wrapf(err, "read range for %d", seq)
 	}
-	r.chunkBytes = append(r.chunkBytes, b)
 
 	r.mtx.Lock()
 	defer r.mtx.Unlock()
 
+	r.chunkBytes = append(r.chunkBytes, b)
 	r.stats.chunksFetchCount++
 	r.stats.chunksFetched += len(offs)
 	r.stats.chunksFetchDurationSum += time.Since(begin)