diff --git a/pkg/block/index.go b/pkg/block/index.go index eb0a3689c0..d844b6593f 100644 --- a/pkg/block/index.go +++ b/pkg/block/index.go @@ -11,6 +11,8 @@ import ( "strings" "time" + "github.com/prometheus/common/model" + "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/prometheus/tsdb/fileutil" @@ -345,13 +347,17 @@ func GatherIndexIssueStats(logger log.Logger, fn string, minTime int64, maxTime if lastLset != nil && labels.Compare(lastLset, lset) >= 0 { return stats, errors.Errorf("series %v out of order; previous %v", lset, lastLset) } - l0 := lset[0] - for _, l := range lset[1:] { - if l.Name <= l0.Name { - return stats, errors.Errorf("out-of-order label set %s for series %d", lset, id) - } - l0 = l + + // Ensure that __name__ comes first. + if lset[0].Name != model.MetricNameLabel { + return stats, errors.Errorf("label set %s does not start with %s for series %d", lset, model.MetricNameLabel, id) } + + // Ensure all but the first value (__name__) are sorted. + if !sort.SliceIsSorted(lset[1:], func(i, j int) bool { return lset[i+1].Name <= lset[j+1].Name }) { + return stats, errors.Errorf("out-of-order label set %s for series %d", lset, id) + } + if len(chks) == 0 { return stats, errors.Errorf("empty chunks for series %d", id) } diff --git a/pkg/block/index_test.go b/pkg/block/index_test.go index 80c10e8e6e..2c69079333 100644 --- a/pkg/block/index_test.go +++ b/pkg/block/index_test.go @@ -1,11 +1,15 @@ package block import ( + "fmt" "io/ioutil" "os" "path/filepath" "testing" + "github.com/prometheus/tsdb/index" + "github.com/prometheus/tsdb/tsdbutil" + "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/prometheus/tsdb/labels" @@ -44,3 +48,195 @@ func TestWriteReadIndexCache(t *testing.T) { testutil.Equals(t, []string{"1"}, vals) testutil.Equals(t, 6, len(postings)) } + +func TestGatherIndexIssueStats(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "test-gather-stats") + testutil.Ok(t, err) + // Cleans up all temp files. + defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }() + + // Non existent file. + _, err = GatherIndexIssueStats(log.NewNopLogger(), "", 0, 0) + testutil.NotOk(t, err) + + // Real but empty file. + tmpfile, err := ioutil.TempFile(tmpDir, "empty-file") + testutil.Ok(t, err) + _, err = GatherIndexIssueStats(log.NewNopLogger(), tmpfile.Name(), 0, 0) + testutil.NotOk(t, err) + + // Working test cases. + testCases := []struct { + name string + lsets []labels.Labels + expected Stats + startTime int64 + endTime int64 + numSamples int + }{ + { + name: "empty case", + }, + { + name: "1 labelset name only", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + }, + }, + expected: Stats{TotalSeries: 1}, + }, + { + name: "2 labelsets name only", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + }, { + {Name: "__name__", Value: "metric_name_2"}, + }, + }, + expected: Stats{TotalSeries: 2}, + }, + { + name: "2 labelsets with adtl labels", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + {Name: "label", Value: "value1"}, + }, { + {Name: "__name__", Value: "metric_name"}, + {Name: "label", Value: "value2"}, + }, + }, + expected: Stats{TotalSeries: 2}, + }, + { + name: "1 labelset with lots of labels", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + {Name: "a", Value: "1"}, + {Name: "b", Value: "2"}, + {Name: "c", Value: "3"}, + {Name: "d", Value: "4"}, + {Name: "e", Value: "5"}, + }, + }, + expected: Stats{TotalSeries: 1}, + }, + { + // Name always comes first and prometheus doesn't sort it. + // https://github.com/prometheus/prometheus/blob/9f903fb3f7841f55d1b634d5fb02986a8bfc5e0c/pkg/textparse/promparse.go#L230 + name: "1 labelset with capitalized label", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + {Name: "CapitalizedLabel", Value: "1"}, + {Name: "CapitalizedLabel2", Value: "2"}, + {Name: "more_common_label", Value: "3"}, + }, + }, + expected: Stats{TotalSeries: 1}, + }, + } + + for caseNum, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tmpfile, err := ioutil.TempFile(tmpDir, fmt.Sprintf("test-case-%d", caseNum)) + testutil.Ok(t, err) + + testutil.Ok(t, writeTestIndex(tmpfile.Name(), tc.startTime, tc.numSamples, tc.lsets...)) + + stats, err := GatherIndexIssueStats(log.NewNopLogger(), tmpfile.Name(), tc.startTime, tc.endTime) + + testutil.Ok(t, err) + // == works for now since the struct is only ints (no pointers or nested values) + testutil.Assert(t, tc.expected == stats, + "index stats did not have an expected value. \nExpected: %+v \nActual: %+v", tc.expected, stats) + }) + } +} + +func TestGatherIndexIssueStatsBadLabelSet(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "test-gather-stats") + testutil.Ok(t, err) + // Cleans up all temp files. + defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }() + + // All of these label sets should produce an error. + testCases := []struct { + name string + lsets []labels.Labels + }{ + { + name: "no __name__ label", + lsets: []labels.Labels{ + { + {Name: "no_name", Value: "we're missing __name__"}, + }, + }, + }, + { + // Should be illegal according to https://github.com/prometheus/tsdb/issues/32#issuecomment-292771463 + name: "capital letter before name label", + lsets: []labels.Labels{ + { + {Name: "ABC", Value: "onetwothree"}, + {Name: "__name__", Value: "metric_name"}, + }, + }, + }, + } + + for caseNum, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tmpfile, err := ioutil.TempFile(tmpDir, fmt.Sprintf("test-case-%d", caseNum)) + testutil.Ok(t, err) + + testutil.Ok(t, writeTestIndex(tmpfile.Name(), 0, 0, tc.lsets...)) + + _, err = GatherIndexIssueStats(log.NewNopLogger(), tmpfile.Name(), 0, 0) + testutil.NotOk(t, err) + }) + } +} + +// writeTestIndex is a helper function that creates an index file with dummy data for testing +func writeTestIndex(filename string, startTime int64, numSamples int, lsets ...labels.Labels) error { + chunks := tsdbutil.PopulatedChunk(numSamples, startTime) + + symbols := make(map[string]struct{}) + for _, lset := range lsets { + for _, l := range lset { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} + } + } + + memPostings := index.NewMemPostings() + for i, lset := range lsets { + memPostings.Add(uint64(i), lset) + } + + w, err := index.NewWriter(filename) + if err != nil { + return err + } + + if err = w.AddSymbols(symbols); err != nil { + return err + } + + for i, lset := range lsets { + if err = w.AddSeries(uint64(i), lset, chunks); err != nil { + return err + } + } + + pKey, pVal := index.AllPostingsKey() + if err = w.WritePostings(pKey, pVal, memPostings.All()); err != nil { + return err + } + + return w.Close() +}