From a1fbd92d3ef60fc8169bbf09d720c0d3e3358b12 Mon Sep 17 00:00:00 2001 From: Steve Mitchell Date: Sat, 9 Feb 2019 09:55:09 -0500 Subject: [PATCH 1/2] change GatherIndexIssueStats validation logic on sorted labels deliberately exclude the first element when checking that a label set is in sorted order. The first element is always "__name__" for prometheus and while '_' is lexographically before all lowercase letters, it does not precede upper case letters. This made compaction fail on valid metrics such as up{Z="one", a="two"} since the expected order was [Z, __name__, a]. This change brings the validation more in line with what prometheus is using at https://github.com/prometheus/prometheus/blob/9f903fb3f7841f55d1b634d5fb02986a8bfc5e0c/pkg/textparse/promparse.go#L230 Add additional check may also be needed to make sure that the first value is always __name__ --- pkg/block/index.go | 11 ++- pkg/block/index_test.go | 155 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 6 deletions(-) diff --git a/pkg/block/index.go b/pkg/block/index.go index eb0a3689c0..f867861a53 100644 --- a/pkg/block/index.go +++ b/pkg/block/index.go @@ -345,13 +345,12 @@ func GatherIndexIssueStats(logger log.Logger, fn string, minTime int64, maxTime if lastLset != nil && labels.Compare(lastLset, lset) >= 0 { return stats, errors.Errorf("series %v out of order; previous %v", lset, lastLset) } - l0 := lset[0] - for _, l := range lset[1:] { - if l.Name <= l0.Name { - return stats, errors.Errorf("out-of-order label set %s for series %d", lset, id) - } - l0 = l + + // Ensure all but the first value (__name__) are sorted + if !sort.SliceIsSorted(lset[1:], func(i, j int) bool { return lset[i+1].Name <= lset[j+1].Name }) { + return stats, errors.Errorf("out-of-order label set %s for series %d", lset, id) } + if len(chks) == 0 { return stats, errors.Errorf("empty chunks for series %d", id) } diff --git a/pkg/block/index_test.go b/pkg/block/index_test.go index 80c10e8e6e..8e69383392 100644 --- a/pkg/block/index_test.go +++ b/pkg/block/index_test.go @@ -1,11 +1,15 @@ package block import ( + "fmt" "io/ioutil" "os" "path/filepath" "testing" + "github.com/prometheus/tsdb/index" + "github.com/prometheus/tsdb/tsdbutil" + "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/prometheus/tsdb/labels" @@ -44,3 +48,154 @@ func TestWriteReadIndexCache(t *testing.T) { testutil.Equals(t, []string{"1"}, vals) testutil.Equals(t, 6, len(postings)) } + +func TestGatherIndexIssueStats(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "test-gather-stats") + testutil.Ok(t, err) + // Cleans up all temp files + defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }() + + l := log.NewNopLogger() + + // Non existent file + _, err = GatherIndexIssueStats(l, "", 0, 0) + testutil.NotOk(t, err) + + // Real but empty file + tmpfile, err := ioutil.TempFile(tmpDir, "empty-file") + testutil.Ok(t, err) + _, err = GatherIndexIssueStats(l, tmpfile.Name(), 0, 0) + testutil.NotOk(t, err) + + // Working test cases + testCases := []struct { + name string + lsets []labels.Labels + expected Stats + startTime int64 + endTime int64 + numSamples int + }{ + { + name: "empty case", + }, + { + name: "1 labelset name only", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + }, + }, + expected: Stats{TotalSeries: 1}, + }, + { + name: "2 labelsets name only", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + }, { + {Name: "__name__", Value: "metric_name_2"}, + }, + }, + expected: Stats{TotalSeries: 2}, + }, + { + name: "2 labelsets with adtl labels", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + {Name: "label", Value: "value1"}, + }, { + {Name: "__name__", Value: "metric_name"}, + {Name: "label", Value: "value2"}, + }, + }, + expected: Stats{TotalSeries: 2}, + }, + { + name: "1 labelset with lots of labels", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + {Name: "a", Value: "1"}, + {Name: "b", Value: "2"}, + {Name: "c", Value: "3"}, + {Name: "d", Value: "4"}, + {Name: "e", Value: "5"}, + }, + }, + expected: Stats{TotalSeries: 1}, + }, + { + // Name always comes first and prometheus doesn't sort it. + // https://github.com/prometheus/prometheus/blob/9f903fb3f7841f55d1b634d5fb02986a8bfc5e0c/pkg/textparse/promparse.go#L230 + name: "1 labelset with capitalized label", + lsets: []labels.Labels{ + { + {Name: "__name__", Value: "metric_name"}, + {Name: "CapitalizedLabel", Value: "1"}, + {Name: "CapitalizedLabel2", Value: "2"}, + {Name: "more_common_label", Value: "3"}, + }, + }, + expected: Stats{TotalSeries: 1}, + }, + } + + for caseNum, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tmpfile, err := ioutil.TempFile(tmpDir, fmt.Sprintf("test-case-%d", caseNum)) + testutil.Ok(t, err) + + testutil.Ok(t, writeTestIndex(tmpfile.Name(), tc.startTime, tc.numSamples, tc.lsets...)) + + stats, err := GatherIndexIssueStats(l, tmpfile.Name(), tc.startTime, tc.endTime) + + testutil.Ok(t, err) + // == works for now since the struct is only ints (no pointers or nested values) + testutil.Assert(t, tc.expected == stats, + "index stats did not have an expected value. \nExpected: %+v \nActual: %+v", tc.expected, stats) + }) + } + +} + +// writeTestIndex is a helper function that creates an index file with dummy data for testing +func writeTestIndex(filename string, startTime int64, numSamples int, lsets ...labels.Labels) error { + chunks := tsdbutil.PopulatedChunk(numSamples, startTime) + + symbols := make(map[string]struct{}) + for _, lset := range lsets { + for _, l := range lset { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} + } + } + + memPostings := index.NewMemPostings() + for i, lset := range lsets { + memPostings.Add(uint64(i), lset) + } + + w, err := index.NewWriter(filename) + if err != nil { + return err + } + + if err = w.AddSymbols(symbols); err != nil { + return err + } + + for i, lset := range lsets { + if err = w.AddSeries(uint64(i), lset, chunks); err != nil { + return err + } + } + + pKey, pVal := index.AllPostingsKey() + if err = w.WritePostings(pKey, pVal, memPostings.All()); err != nil { + return err + } + + return w.Close() +} From 275fd1e288ad75d95490ecb0c6e8e64d7659830f Mon Sep 17 00:00:00 2001 From: Steve Mitchell Date: Tue, 12 Feb 2019 13:48:03 -0500 Subject: [PATCH 2/2] add check to ensure __name__ comes first and negative test cases --- pkg/block/index.go | 9 ++++++- pkg/block/index_test.go | 59 ++++++++++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/pkg/block/index.go b/pkg/block/index.go index f867861a53..d844b6593f 100644 --- a/pkg/block/index.go +++ b/pkg/block/index.go @@ -11,6 +11,8 @@ import ( "strings" "time" + "github.com/prometheus/common/model" + "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/prometheus/tsdb/fileutil" @@ -346,7 +348,12 @@ func GatherIndexIssueStats(logger log.Logger, fn string, minTime int64, maxTime return stats, errors.Errorf("series %v out of order; previous %v", lset, lastLset) } - // Ensure all but the first value (__name__) are sorted + // Ensure that __name__ comes first. + if lset[0].Name != model.MetricNameLabel { + return stats, errors.Errorf("label set %s does not start with %s for series %d", lset, model.MetricNameLabel, id) + } + + // Ensure all but the first value (__name__) are sorted. if !sort.SliceIsSorted(lset[1:], func(i, j int) bool { return lset[i+1].Name <= lset[j+1].Name }) { return stats, errors.Errorf("out-of-order label set %s for series %d", lset, id) } diff --git a/pkg/block/index_test.go b/pkg/block/index_test.go index 8e69383392..2c69079333 100644 --- a/pkg/block/index_test.go +++ b/pkg/block/index_test.go @@ -52,22 +52,20 @@ func TestWriteReadIndexCache(t *testing.T) { func TestGatherIndexIssueStats(t *testing.T) { tmpDir, err := ioutil.TempDir("", "test-gather-stats") testutil.Ok(t, err) - // Cleans up all temp files + // Cleans up all temp files. defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }() - l := log.NewNopLogger() - - // Non existent file - _, err = GatherIndexIssueStats(l, "", 0, 0) + // Non existent file. + _, err = GatherIndexIssueStats(log.NewNopLogger(), "", 0, 0) testutil.NotOk(t, err) - // Real but empty file + // Real but empty file. tmpfile, err := ioutil.TempFile(tmpDir, "empty-file") testutil.Ok(t, err) - _, err = GatherIndexIssueStats(l, tmpfile.Name(), 0, 0) + _, err = GatherIndexIssueStats(log.NewNopLogger(), tmpfile.Name(), 0, 0) testutil.NotOk(t, err) - // Working test cases + // Working test cases. testCases := []struct { name string lsets []labels.Labels @@ -149,7 +147,7 @@ func TestGatherIndexIssueStats(t *testing.T) { testutil.Ok(t, writeTestIndex(tmpfile.Name(), tc.startTime, tc.numSamples, tc.lsets...)) - stats, err := GatherIndexIssueStats(l, tmpfile.Name(), tc.startTime, tc.endTime) + stats, err := GatherIndexIssueStats(log.NewNopLogger(), tmpfile.Name(), tc.startTime, tc.endTime) testutil.Ok(t, err) // == works for now since the struct is only ints (no pointers or nested values) @@ -157,7 +155,50 @@ func TestGatherIndexIssueStats(t *testing.T) { "index stats did not have an expected value. \nExpected: %+v \nActual: %+v", tc.expected, stats) }) } +} + +func TestGatherIndexIssueStatsBadLabelSet(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "test-gather-stats") + testutil.Ok(t, err) + // Cleans up all temp files. + defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }() + // All of these label sets should produce an error. + testCases := []struct { + name string + lsets []labels.Labels + }{ + { + name: "no __name__ label", + lsets: []labels.Labels{ + { + {Name: "no_name", Value: "we're missing __name__"}, + }, + }, + }, + { + // Should be illegal according to https://github.com/prometheus/tsdb/issues/32#issuecomment-292771463 + name: "capital letter before name label", + lsets: []labels.Labels{ + { + {Name: "ABC", Value: "onetwothree"}, + {Name: "__name__", Value: "metric_name"}, + }, + }, + }, + } + + for caseNum, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tmpfile, err := ioutil.TempFile(tmpDir, fmt.Sprintf("test-case-%d", caseNum)) + testutil.Ok(t, err) + + testutil.Ok(t, writeTestIndex(tmpfile.Name(), 0, 0, tc.lsets...)) + + _, err = GatherIndexIssueStats(log.NewNopLogger(), tmpfile.Name(), 0, 0) + testutil.NotOk(t, err) + }) + } } // writeTestIndex is a helper function that creates an index file with dummy data for testing