From c0451ff54191f7cfb54c5f44e97ecf5cedba4e6b Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Mon, 20 Apr 2020 09:25:31 +0100 Subject: [PATCH] Reverted addition of deletion mark for partial uploads. Fixes https://github.com/thanos-io/thanos/issues/2459 (quick fix). This keeps the logic from the 0.11.0 which was good enough. Some improvement for future: https://github.com/thanos-io/thanos/issues/2470 Signed-off-by: Bartlomiej Plotka --- cmd/thanos/compact.go | 2 +- pkg/compact/clean.go | 10 +++++++--- pkg/compact/clean_test.go | 13 ++++--------- test/e2e/compact_test.go | 7 ++++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 1bad45f773e..2213535eaf7 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -420,7 +420,7 @@ func runCompact( } // No need to resync before partial uploads and delete marked blocks. Last sync should be valid. - compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), bkt, partialUploadDeleteAttempts, blocksMarkedForDeletion) + compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), bkt, partialUploadDeleteAttempts, blockCleanupFailures) if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { return errors.Wrap(err, "error cleaning blocks") } diff --git a/pkg/compact/clean.go b/pkg/compact/clean.go index 53cfb169083..6951ef328e0 100644 --- a/pkg/compact/clean.go +++ b/pkg/compact/clean.go @@ -27,7 +27,7 @@ func BestEffortCleanAbortedPartialUploads( partial map[ulid.ULID]error, bkt objstore.Bucket, deleteAttempts prometheus.Counter, - blocksMarkedForDeletion prometheus.Counter, + blockCleanupFailures prometheus.Counter, ) { level.Info(logger).Log("msg", "started cleaning of aborted partial uploads") @@ -45,10 +45,14 @@ func BestEffortCleanAbortedPartialUploads( deleteAttempts.Inc() level.Info(logger).Log("msg", "found partially uploaded block; marking for deletion", "block", id) - if err := block.MarkForDeletion(ctx, logger, bkt, id, blocksMarkedForDeletion); err != nil { - level.Warn(logger).Log("msg", "failed to delete aborted partial upload; skipping", "block", id, "thresholdAge", PartialUploadThresholdAge, "err", err) + // We don't gather any information about deletion marks for partial blocks, so let's simply remove it. We waited + // long PartialUploadThresholdAge already. + // TODO(bwplotka): Fix some edge cases: https://github.com/thanos-io/thanos/issues/2470 . + if err := block.Delete(ctx, logger, bkt, id); err != nil { + level.Warn(logger).Log("msg", "failed to delete aborted partial upload; will retry in next iteration", "block", id, "thresholdAge", PartialUploadThresholdAge, "err", err) return } + blockCleanupFailures.Inc() level.Info(logger).Log("msg", "deleted aborted partial upload", "block", id, "thresholdAge", PartialUploadThresholdAge) } level.Info(logger).Log("msg", "cleaning of aborted partial uploads done") diff --git a/pkg/compact/clean_test.go b/pkg/compact/clean_test.go index 1321c888ba6..51d603799bc 100644 --- a/pkg/compact/clean_test.go +++ b/pkg/compact/clean_test.go @@ -59,22 +59,17 @@ func TestBestEffortCleanAbortedPartialUploads(t *testing.T) { testutil.Ok(t, bkt.Upload(ctx, path.Join(shouldIgnoreID2.String(), "chunks", "000001"), &fakeChunk)) deleteAttempts := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) - blocksMarkedForDeletion := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) - + blockCleanupFailures := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) _, partial, err := metaFetcher.Fetch(ctx) testutil.Ok(t, err) - BestEffortCleanAbortedPartialUploads(ctx, logger, partial, bkt, deleteAttempts, blocksMarkedForDeletion) + BestEffortCleanAbortedPartialUploads(ctx, logger, partial, bkt, deleteAttempts, blockCleanupFailures) testutil.Equals(t, 1.0, promtest.ToFloat64(deleteAttempts)) + testutil.Equals(t, 1.0, promtest.ToFloat64(blockCleanupFailures)) exists, err := bkt.Exists(ctx, path.Join(shouldDeleteID.String(), "chunks", "000001")) testutil.Ok(t, err) - testutil.Equals(t, true, exists) - - exists, err = bkt.Exists(ctx, path.Join(shouldDeleteID.String(), metadata.DeletionMarkFilename)) - testutil.Ok(t, err) - testutil.Equals(t, true, exists) - testutil.Equals(t, 1.0, promtest.ToFloat64(blocksMarkedForDeletion)) + testutil.Equals(t, false, exists) exists, err = bkt.Exists(ctx, path.Join(shouldIgnoreID1.String(), "chunks", "000001")) testutil.Ok(t, err) diff --git a/test/e2e/compact_test.go b/test/e2e/compact_test.go index 9524b960dd7..6212259b6b9 100644 --- a/test/e2e/compact_test.go +++ b/test/e2e/compact_test.go @@ -504,11 +504,11 @@ func TestCompactWithStoreGateway(t *testing.T) { // NOTE: We cannot assert on intermediate `thanos_blocks_meta_` metrics as those are gauge and change dynamically due to many // compaction groups. Wait for at least first compaction iteration (next is in 5m). testutil.Ok(t, c.WaitSumMetrics(e2e.Greater(0), "thanos_compactor_iterations_total")) - testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compactor_blocks_cleaned_total")) // This should be 1 [BUG no 1]. TODO: fix. + testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(1), "thanos_compactor_blocks_cleaned_total")) testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(2*4+2+2*3+1), "thanos_compactor_blocks_marked_for_deletion_total")) // 17. testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(2), "thanos_compactor_aborted_partial_uploads_deletion_attempts_total")) // We should have 1. testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(5), "thanos_compact_group_compactions_total")) - // TODO(bwplotka): This is confusing, should be either normal compaction or vertical not both (?) + // TODO(bwplotka): This is confusing, should be either normal compaction or vertical not both (https://github.com/thanos-io/thanos/issues/2469). testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(3), "thanos_compact_group_vertical_compactions_total")) testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_group_compactions_failures_total")) testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(12), "thanos_compact_group_compaction_runs_started_total")) @@ -519,7 +519,8 @@ func TestCompactWithStoreGateway(t *testing.T) { testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(float64( len(rawBlockIDs)+7+ - 5, // 5 compactions, 5 newly added blocks. + 5+ // 5 compactions, 5 newly added blocks. + -1, // Partial block removed. )), "thanos_blocks_meta_synced")) testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total"))