diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index b0ea2f346cd..8fc6629f990 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -121,6 +121,9 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { compactionConcurrency := cmd.Flag("compact.concurrency", "Number of goroutines to use when compacting groups."). Default("1").Int() + deleteDelay := modelDuration(cmd.Flag("delete-delay", fmt.Sprintf("Time before a block marked for deletion is deleted from bucket")). + Default("15m")) + selectorRelabelConf := regSelectorRelabelFlags(cmd) m[component.Compact.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error { @@ -130,6 +133,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { *dataDir, objStoreConfig, time.Duration(*consistencyDelay), + time.Duration(*deleteDelay), *haltOnError, *acceptMalformedIndex, *wait, @@ -158,6 +162,7 @@ func runCompact( dataDir string, objStoreConfig *extflag.PathOrContent, consistencyDelay time.Duration, + deleteDelay time.Duration, haltOnError bool, acceptMalformedIndex bool, wait bool, @@ -187,7 +192,13 @@ func runCompact( Name: "thanos_compactor_aborted_partial_uploads_deletion_attempts_total", Help: "Total number of started deletions of blocks that are assumed aborted and only partially uploaded.", }) - reg.MustRegister(halted, retried, iterations, partialUploadDeleteAttempts) + deleteDelayMetric := prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "thanos_delete_delay_seconds", + Help: "Configured delete delay in seconds.", + }, func() float64 { + return deleteDelay.Seconds() + }) + reg.MustRegister(halted, retried, iterations, partialUploadDeleteAttempts, deleteDelayMetric) downsampleMetrics := newDownsampleMetrics(reg) @@ -275,9 +286,10 @@ func runCompact( } var ( - compactDir = path.Join(dataDir, "compact") - downsamplingDir = path.Join(dataDir, "downsample") - indexCacheDir = path.Join(dataDir, "index_cache") + compactDir = path.Join(dataDir, "compact") + compactorMetaDir = path.Join(dataDir, "compactor-metas") + downsamplingDir = path.Join(dataDir, "downsample") + indexCacheDir = path.Join(dataDir, "index_cache") ) if err := os.RemoveAll(downsamplingDir); err != nil { @@ -285,6 +297,7 @@ func runCompact( return errors.Wrap(err, "clean working downsample directory") } + blockDeletionScheduler := compact.NewScheduleBlockDelete(logger, compactorMetaDir, bkt, deleteDelay) compactor, err := compact.NewBucketCompactor(logger, sy, comp, compactDir, bkt, concurrency) if err != nil { cancel() @@ -330,10 +343,28 @@ func runCompact( return errors.Wrap(err, fmt.Sprintf("retention failed")) } - compact.BestEffortCleanAbortedPartialUploads(ctx, logger, metaFetcher, bkt, partialUploadDeleteAttempts) + compact.BestEffortCleanAbortedPartialUploads(ctx, logger, metaFetcher, blockDeletionScheduler, partialUploadDeleteAttempts) return nil } + g.Add(func() error { + if !wait { + return blockDeletionScheduler.ScheduleDelete(ctx) + } + + // --wait=true is specified. + return runutil.Repeat(5*time.Minute, ctx.Done(), func() error { + err := blockDeletionScheduler.ScheduleDelete(ctx) + if err == nil { + return nil + } + + return errors.Wrap(err, "error cleaning blocks") + }) + }, func(error) { + cancel() + }) + g.Add(func() error { defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") diff --git a/docs/components/compact.md b/docs/components/compact.md index edd71eb7b11..fbff1264773 100644 --- a/docs/components/compact.md +++ b/docs/components/compact.md @@ -11,8 +11,8 @@ It is generally not semantically concurrency safe and must be deployed as a sing It is also responsible for downsampling of data: -* creating 5m downsampling for blocks larger than **40 hours** (2d, 2w) -* creating 1h downsampling for blocks larger than **10 days** (2w). +- creating 5m downsampling for blocks larger than **40 hours** (2d, 2w) +- creating 1h downsampling for blocks larger than **10 days** (2w). Example: @@ -35,9 +35,9 @@ On-disk data is safe to delete between restarts and should be the first attempt Resolution - distance between data points on your graphs. E.g. -* raw - the same as scrape interval at the moment of data ingestion -* 5m - data point is every 5 minutes -* 1h - data point is every 1h +- raw - the same as scrape interval at the moment of data ingestion +- 5m - data point is every 5 minutes +- 1h - data point is every 1h Keep in mind, that the initial goal of downsampling is not saving disk space (Read further for elaboration on storage space consumption). The goal of downsampling is providing an opportunity to get fast results for range queries of big time intervals like months or years. In other words, if you set `--retention.resolution-raw` less then `--retention.resolution-5m` and `--retention.resolution-1h` - you might run into a problem of not being able to "zoom in" to your historical data. @@ -66,7 +66,8 @@ compacting blocks from an instance even when a Prometheus instance goes down for ## Flags -[embedmd]:# (flags/compact.txt $) +[embedmd]: # "flags/compact.txt $" + ```$ usage: thanos compact [] @@ -144,5 +145,5 @@ Flags: selecting blocks. It follows native Prometheus relabel-config syntax. See format details: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config - + --delete-delay=15m Time before a block marked for deletion is deleted from bucket. ``` diff --git a/pkg/compact/clean.go b/pkg/compact/clean.go index af681a940e4..fb2926fd329 100644 --- a/pkg/compact/clean.go +++ b/pkg/compact/clean.go @@ -12,7 +12,6 @@ import ( "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" "github.com/thanos-io/thanos/pkg/block" - "github.com/thanos-io/thanos/pkg/objstore" ) const ( @@ -21,7 +20,7 @@ const ( PartialUploadThresholdAge = 2 * 24 * time.Hour ) -func BestEffortCleanAbortedPartialUploads(ctx context.Context, logger log.Logger, fetcher block.MetadataFetcher, bkt objstore.Bucket, deleteAttempts prometheus.Counter) { +func BestEffortCleanAbortedPartialUploads(ctx context.Context, logger log.Logger, fetcher block.MetadataFetcher, blockDeletionScheduler *ScheduleBlockDelete, deleteAttempts prometheus.Counter) { level.Info(logger).Log("msg", "started cleaning of aborted partial uploads") _, partial, err := fetcher.Fetch(ctx) if err != nil { @@ -41,7 +40,7 @@ func BestEffortCleanAbortedPartialUploads(ctx context.Context, logger log.Logger } deleteAttempts.Inc() - if err := block.Delete(ctx, logger, bkt, id); err != nil { + if err := blockDeletionScheduler.MarkBlockForDeletion(id); err != nil { level.Warn(logger).Log("msg", "failed to delete aborted partial upload; skipping", "block", id, "thresholdAge", PartialUploadThresholdAge, "err", err) return } diff --git a/pkg/compact/clean_test.go b/pkg/compact/clean_test.go index 85654f8c8a3..7eb923b0d05 100644 --- a/pkg/compact/clean_test.go +++ b/pkg/compact/clean_test.go @@ -7,6 +7,8 @@ import ( "bytes" "context" "encoding/json" + "io/ioutil" + "os" "path" "testing" "time" @@ -28,6 +30,12 @@ func TestBestEffortCleanAbortedPartialUploads(t *testing.T) { bkt := inmem.NewBucket() logger := log.NewNopLogger() + // Create fresh, empty directory for actual test. + dir, err := ioutil.TempDir("", "test-clean") + testutil.Ok(t, err) + defer func() { testutil.Ok(t, os.RemoveAll(dir)) }() + + blockDeletionScheduler := NewScheduleBlockDelete(logger, dir, nil, 15*time.Minute) metaFetcher, err := block.NewMetaFetcher(nil, 32, bkt, "", nil) testutil.Ok(t, err) @@ -58,7 +66,7 @@ func TestBestEffortCleanAbortedPartialUploads(t *testing.T) { testutil.Ok(t, bkt.Upload(ctx, path.Join(shouldIgnoreID2.String(), "chunks", "000001"), &fakeChunk)) deleteAttempts := prometheus.NewCounter(prometheus.CounterOpts{}) - BestEffortCleanAbortedPartialUploads(ctx, logger, metaFetcher, bkt, deleteAttempts) + BestEffortCleanAbortedPartialUploads(ctx, logger, metaFetcher, blockDeletionScheduler, deleteAttempts) testutil.Equals(t, 1.0, promtest.ToFloat64(deleteAttempts)) exists, err := bkt.Exists(ctx, path.Join(shouldDeleteID.String(), "chunks", "000001")) diff --git a/pkg/compact/schedule_delete.go b/pkg/compact/schedule_delete.go new file mode 100644 index 00000000000..32716f81550 --- /dev/null +++ b/pkg/compact/schedule_delete.go @@ -0,0 +1,143 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package compact + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "strings" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/objstore" + "github.com/thanos-io/thanos/pkg/runutil" +) + +// DebugCompactorMetas is folder to store compactor metadata +// about when the block is scheduled to be deleted. +const DebugCompactorMetas = "debug/compactor-metas" + +// CompactorMeta stores block id and when block was marked for deletion. +type CompactorMeta struct { + ID ulid.ULID `json:"id"` + DeletionTime int64 `json:"deletion_time"` +} + +// ScheduleBlockDelete marks the block to be deleted. +type ScheduleBlockDelete struct { + dir string + logger log.Logger + deleteDelay time.Duration + bkt objstore.Bucket +} + +// NewScheduleBlockDelete creates a new ScheduleBlockDelete. +func NewScheduleBlockDelete(logger log.Logger, dir string, bkt objstore.Bucket, deleteDelay time.Duration) *ScheduleBlockDelete { + return &ScheduleBlockDelete{ + dir: dir, + logger: logger, + deleteDelay: deleteDelay, + bkt: bkt, + } +} + +// ScheduleDelete deletes blocks from bucket +// deleteDelay duration after block is marked for deletion. +func (s *ScheduleBlockDelete) ScheduleDelete(ctx context.Context) error { + compactorMetaPath := filepath.Join(s.dir, "download") + defer func() { + if err := os.RemoveAll(compactorMetaPath); err != nil { + level.Error(s.logger).Log("msg", "failed to remove compactor meta dir", compactorMetaPath, "err", err) + } + }() + + if err := os.RemoveAll(compactorMetaPath); err != nil { + return errors.Wrap(err, "clean compaction compactor meta dir") + } + + if err := objstore.DownloadDir(ctx, s.logger, s.bkt, DebugCompactorMetas, compactorMetaPath); err != nil { + return errors.Wrap(err, "downlad compactor-metas dir") + } + + return filepath.Walk(compactorMetaPath, func(path string, fileInfo os.FileInfo, err error) error { + _, file := filepath.Split(path) + if _, err := ulid.Parse(strings.TrimSuffix(file, filepath.Ext(file))); err != nil { + compactorMetaBytes, err := ioutil.ReadFile(path) + if err != nil { + return errors.Wrap(err, "read compactor meta") + } + + compactorMeta := CompactorMeta{} + + if err := json.Unmarshal([]byte(compactorMetaBytes), &compactorMeta); err != nil { + return errors.Wrap(err, "unmarshal compactor meta") + } + + if time.Now().Unix()-compactorMeta.DeletionTime > s.deleteDelay.Milliseconds() { + if err := block.Delete(ctx, s.logger, s.bkt, compactorMeta.ID); err != nil { + return errors.Wrap(err, "delete block") + } + + if err := os.RemoveAll(filepath.Join(s.dir, compactorMeta.ID.String())); err != nil { + return errors.Wrap(err, "delete compactor-meta.json") + } + } + } + return nil + }) +} + +// MarkBlockForDeletion creates a file +// which stores information about when the block was marked for deletion. +func (s *ScheduleBlockDelete) MarkBlockForDeletion(ctx context.Context, id ulid.ULID) error { + compactorMetaExists, err := objstore.Exists(ctx, s.bkt, path.Join(DebugCompactorMetas, fmt.Sprintf("%s.json", id.String()))) + if err != nil { + return errors.Wrap(err, fmt.Sprint("check compactor meta for id %s in bucket", id.String())) + } + if compactorMetaExists { + level.Info(s.logger).Log("msg", "compactor-meta already exists for block id", id.String()) + return nil + } + + compactorMetaPath := filepath.Join(s.dir, "upload", fmt.Sprintf("%s.json", id.String())) + compactorMeta := &CompactorMeta{ + ID: id, + DeletionTime: time.Now().Unix(), + } + + f, err := os.Create(compactorMetaPath) + if err != nil { + return err + } + + enc := json.NewEncoder(f) + enc.SetIndent("", "\t") + + if err := enc.Encode(compactorMeta); err != nil { + runutil.CloseWithLogOnErr(s.logger, f, "write meta file close") + return err + } + if err := f.Close(); err != nil { + return err + } + + if err := objstore.UploadFile(ctx, s.logger, s.bkt, compactorMetaPath, path.Join(DebugCompactorMetas, fmt.Sprintf("%s.json", id.String()))); err != nil { + return errors.Wrap(err, "upload meta file to debug dir") + } + + if err := os.Remove(compactorMetaPath); err != nil { + return errors.Wrap(err, "delete compactor-meta file") + } + + return nil +}