From 17418a2b1bc7da242b3a47ee8805815857884934 Mon Sep 17 00:00:00 2001 From: Vilius Pranckaitis Date: Tue, 5 Oct 2021 23:41:53 +1100 Subject: [PATCH] [query] Log summary of time series labels upon "literal too long" error (#3801) --- .../api/v1/handler/prometheus/remote/write.go | 43 +++++++++++++++++++ .../handler/prometheus/remote/write_test.go | 42 +++++++++++++++--- src/query/models/options.go | 38 +++++++++++++--- src/query/models/options_test.go | 14 +++++- src/query/models/tags.go | 12 ++++-- src/query/models/tags_test.go | 30 +++++++++++++ src/query/models/types.go | 6 +++ src/x/serialize/limits.go | 8 ++-- 8 files changed, 173 insertions(+), 20 deletions(-) diff --git a/src/query/api/v1/handler/prometheus/remote/write.go b/src/query/api/v1/handler/prometheus/remote/write.go index 9a123b38f6..a3f507c136 100644 --- a/src/query/api/v1/handler/prometheus/remote/write.go +++ b/src/query/api/v1/handler/prometheus/remote/write.go @@ -29,6 +29,7 @@ import ( "io/ioutil" "net/http" "strings" + "sync/atomic" "time" "github.com/m3db/m3/src/cmd/services/m3coordinator/ingest" @@ -70,6 +71,13 @@ const ( // defaultForwardingTimeout is the default forwarding timeout. defaultForwardingTimeout = 15 * time.Second + + // maxLiteralIsTooLongLogCount is the number of times the time series labels should be logged + // upon "literal is too long" error. + maxLiteralIsTooLongLogCount = 10 + // literalPrefixLength is the length of the label literal prefix that is logged upon + // "literal is too long" error. + literalPrefixLength = 100 ) var ( @@ -119,6 +127,9 @@ type PromWriteHandler struct { nowFn clock.NowFn instrumentOpts instrument.Options metrics promWriteMetrics + + // Counting the number of times of "literal is too long" error for log sampling purposes. + numLiteralIsTooLong uint32 } // NewPromWriteHandler returns a new instance of handler. @@ -481,6 +492,20 @@ func (h *PromWriteHandler) parseRequest( } } + // Check if any of the labels exceed literal length limits and occasionally print them + // in a log message for debugging purposes. + maxTagLiteralLength := int(h.tagOptions.MaxTagLiteralLength()) + for _, ts := range req.Timeseries { + for _, l := range ts.Labels { + if len(l.Name) > maxTagLiteralLength || len(l.Value) > maxTagLiteralLength { + h.maybeLogLabelsWithTooLongLiterals(h.instrumentOpts.Logger(), l) + err := fmt.Errorf("label literal is too long: nameLength=%d, valueLength=%d, maxLength=%d", + len(l.Name), len(l.Value), maxTagLiteralLength) + return parseRequestResult{}, err + } + } + } + return parseRequestResult{ Request: &req, Options: opts, @@ -555,6 +580,24 @@ func (h *PromWriteHandler) forward( return nil } +func (h *PromWriteHandler) maybeLogLabelsWithTooLongLiterals(logger *zap.Logger, label prompb.Label) { + if atomic.AddUint32(&h.numLiteralIsTooLong, 1) > maxLiteralIsTooLongLogCount { + return + } + + safePrefix := func(b []byte, l int) []byte { + if len(b) <= l { + return b + } + return b[:l] + } + + logger.Warn("label exceeds literal length limits", + zap.String("namePrefix", string(safePrefix(label.Name, literalPrefixLength))), + zap.String("valuePrefix", string(safePrefix(label.Value, literalPrefixLength))), + ) +} + func newPromTSIter( timeseries []prompb.TimeSeries, tagOpts models.TagOptions, diff --git a/src/query/api/v1/handler/prometheus/remote/write_test.go b/src/query/api/v1/handler/prometheus/remote/write_test.go index 3f71305f35..4f6545bfb4 100644 --- a/src/query/api/v1/handler/prometheus/remote/write_test.go +++ b/src/query/api/v1/handler/prometheus/remote/write_test.go @@ -29,9 +29,15 @@ import ( "math" "net/http" "net/http/httptest" + "strings" "testing" "time" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/uber-go/tally" + "github.com/m3db/m3/src/cmd/services/m3coordinator/ingest" "github.com/m3db/m3/src/cmd/services/m3query/config" "github.com/m3db/m3/src/dbnode/generated/proto/annotation" @@ -47,11 +53,6 @@ import ( "github.com/m3db/m3/src/x/headers" "github.com/m3db/m3/src/x/instrument" xtest "github.com/m3db/m3/src/x/test" - - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/uber-go/tally" ) func makeOptions(ds ingest.DownsamplerAndWriter) options.HandlerOptions { @@ -418,6 +419,37 @@ func TestPromWriteDisabledMetricsTypes(t *testing.T) { require.NoError(t, capturedIter.Error()) } +func TestPromWriteLiteralIsTooLongError(t *testing.T) { + ctrl := xtest.NewController(t) + defer ctrl.Finish() + + opts := makeOptions(ingest.NewMockDownsamplerAndWriter(ctrl)) + handler, err := NewPromWriteHandler(opts) + require.NoError(t, err) + + veryLongLiteral := strings.Repeat("x", int(opts.TagOptions().MaxTagLiteralLength())+1) + promReq := &prompb.WriteRequest{ + Timeseries: []prompb.TimeSeries{ + { + Labels: []prompb.Label{ + {Name: []byte("name1"), Value: []byte("value1")}, + {Name: []byte("name2"), Value: []byte(veryLongLiteral)}, + }, + }, + }, + } + + for i := 0; i < maxLiteralIsTooLongLogCount*2; i++ { + promReqBody := test.GeneratePromWriteRequestBody(t, promReq) + req := httptest.NewRequest(PromWriteHTTPMethod, PromWriteURL, promReqBody) + writer := httptest.NewRecorder() + handler.ServeHTTP(writer, req) + resp := writer.Result() + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + require.NoError(t, resp.Body.Close()) + } +} + func BenchmarkWriteDatapoints(b *testing.B) { ctrl := xtest.NewController(b) defer ctrl.Finish() diff --git a/src/query/models/options.go b/src/query/models/options.go index 794c91d9f5..32074ee614 100644 --- a/src/query/models/options.go +++ b/src/query/models/options.go @@ -25,16 +25,23 @@ import ( "errors" "github.com/prometheus/common/model" + + "github.com/m3db/m3/src/x/serialize" ) -var ( - defaultMetricName = []byte(model.MetricNameLabel) - defaultBucketName = []byte("le") +const ( defaultAllowTagNameDuplicates = false defaultAllowTagValueEmpty = false + defaultMaxTagLiteralLength = serialize.DefaultMaxTagLiteralLength +) - errNoName = errors.New("metric name is missing or empty") - errNoBucket = errors.New("bucket name is missing or empty") +var ( + defaultMetricName = []byte(model.MetricNameLabel) + defaultBucketName = []byte("le") + + errNoName = errors.New("metric name is missing or empty") + errNoBucket = errors.New("bucket name is missing or empty") + errNonPositiveLiteralLength = errors.New("max literal length should be positive") ) type tagOptions struct { @@ -45,6 +52,7 @@ type tagOptions struct { filters Filters allowTagNameDuplicates bool allowTagValueEmpty bool + maxTagLiteralLength uint16 } // NewTagOptions builds a new tag options with default values. @@ -56,6 +64,7 @@ func NewTagOptions() TagOptions { idScheme: TypeQuoted, allowTagNameDuplicates: defaultAllowTagNameDuplicates, allowTagValueEmpty: defaultAllowTagValueEmpty, + maxTagLiteralLength: defaultMaxTagLiteralLength, } } @@ -68,6 +77,10 @@ func (o *tagOptions) Validate() error { return errNoBucket } + if o.maxTagLiteralLength == 0 { + return errNonPositiveLiteralLength + } + return o.idScheme.Validate() } @@ -131,10 +144,23 @@ func (o *tagOptions) AllowTagValueEmpty() bool { return o.allowTagValueEmpty } +// SetMaxTagLiteralLength sets the maximum length of a tag Name/Value. +func (o *tagOptions) SetMaxTagLiteralLength(value uint16) TagOptions { + opts := *o + opts.maxTagLiteralLength = value + return &opts +} + +// MaxTagLiteralLength returns the maximum length of a tag Name/Value. +func (o *tagOptions) MaxTagLiteralLength() uint16 { + return o.maxTagLiteralLength +} + func (o *tagOptions) Equals(other TagOptions) bool { return o.idScheme == other.IDSchemeType() && bytes.Equal(o.metricName, other.MetricName()) && bytes.Equal(o.bucketName, other.BucketName()) && o.allowTagNameDuplicates == other.AllowTagNameDuplicates() && - o.allowTagValueEmpty == other.AllowTagValueEmpty() + o.allowTagValueEmpty == other.AllowTagValueEmpty() && + o.maxTagLiteralLength == other.MaxTagLiteralLength() } diff --git a/src/query/models/options_test.go b/src/query/models/options_test.go index ccc65641b3..a2c0f2d6fc 100644 --- a/src/query/models/options_test.go +++ b/src/query/models/options_test.go @@ -31,18 +31,21 @@ func TestDefaultTagOptions(t *testing.T) { assert.NoError(t, opts.Validate()) assert.Equal(t, defaultMetricName, opts.MetricName()) assert.Equal(t, TypeQuoted, opts.IDSchemeType()) + assert.Equal(t, defaultMaxTagLiteralLength, opts.MaxTagLiteralLength()) } func TestValidTagOptions(t *testing.T) { opts := NewTagOptions(). SetIDSchemeType(TypePrependMeta). SetMetricName([]byte("name")). - SetBucketName([]byte("bucket")) + SetBucketName([]byte("bucket")). + SetMaxTagLiteralLength(42) assert.NoError(t, opts.Validate()) assert.Equal(t, []byte("name"), opts.MetricName()) assert.Equal(t, []byte("bucket"), opts.BucketName()) assert.Equal(t, TypePrependMeta, opts.IDSchemeType()) + assert.Equal(t, uint16(42), opts.MaxTagLiteralLength()) } func TestBadNameTagOptions(t *testing.T) { @@ -75,6 +78,12 @@ func TestBadSchemeTagOptions(t *testing.T) { assert.EqualError(t, opts.Validate(), msg) } +func TestBadMaxLiteralLength(t *testing.T) { + opts := NewTagOptions(). + SetMaxTagLiteralLength(0) + assert.EqualError(t, opts.Validate(), errNonPositiveLiteralLength.Error()) +} + func TestOptionsEquals(t *testing.T) { opts, other := NewTagOptions(), NewTagOptions() assert.True(t, opts.Equals(other)) @@ -92,4 +101,7 @@ func TestOptionsEquals(t *testing.T) { opts = opts.SetMetricName(n) opts = opts.SetIDSchemeType(IDSchemeType(10)) assert.False(t, opts.Equals(other)) + + opts = NewTagOptions().SetMaxTagLiteralLength(42) + assert.False(t, opts.Equals(other)) } diff --git a/src/query/models/tags.go b/src/query/models/tags.go index 03658757cd..c0a849ca7c 100644 --- a/src/query/models/tags.go +++ b/src/query/models/tags.go @@ -315,16 +315,20 @@ func (t Tags) validate() error { var ( allowTagNameDuplicates = t.Opts.AllowTagNameDuplicates() allowTagValueEmpty = t.Opts.AllowTagValueEmpty() + maxTagLiteralLength = int(t.Opts.MaxTagLiteralLength()) ) // Sorted alphanumerically otherwise, use bytes.Compare once for // both order and unique test. for i, tag := range t.Tags { - if len(tag.Name) == 0 { + if nameLen := len(tag.Name); nameLen == 0 { return fmt.Errorf("tag name empty: index=%d", i) + } else if nameLen > maxTagLiteralLength { + return fmt.Errorf("tag name too long: index=%d, length=%d, maxLength=%d", i, nameLen, maxTagLiteralLength) } - if !allowTagValueEmpty && len(tag.Value) == 0 { - return fmt.Errorf("tag value empty: index=%d, name=%s", - i, t.Tags[i].Name) + if valueLen := len(tag.Value); !allowTagValueEmpty && valueLen == 0 { + return fmt.Errorf("tag value empty: index=%d, name=%s", i, tag.Name) + } else if valueLen > maxTagLiteralLength { + return fmt.Errorf("tag value too long: index=%d, length=%d, maxLength=%d", i, valueLen, maxTagLiteralLength) } if i == 0 { continue // Don't check order/unique attributes. diff --git a/src/query/models/tags_test.go b/src/query/models/tags_test.go index 859b51bd5e..15769142bc 100644 --- a/src/query/models/tags_test.go +++ b/src/query/models/tags_test.go @@ -492,6 +492,36 @@ func TestTagsValidateDuplicateQuotedWithAllowTagNameDuplicates(t *testing.T) { require.NoError(t, err) } +func TestTagsValidateTagNameTooLong(t *testing.T) { + tags := NewTags(0, NewTagOptions(). + SetIDSchemeType(TypeQuoted). + SetMaxTagLiteralLength(10)) + tags = tags.AddTag(Tag{ + Name: []byte("name_longer_than_10_characters"), + Value: []byte("baz"), + }) + err := tags.Validate() + require.EqualError(t, err, "tag name too long: index=0, length=30, maxLength=10") + require.True(t, xerrors.IsInvalidParams(err)) +} + +func TestTagsValidateTagValueTooLong(t *testing.T) { + tags := NewTags(0, NewTagOptions(). + SetIDSchemeType(TypeQuoted). + SetMaxTagLiteralLength(10)) + tags = tags.AddTag(Tag{ + Name: []byte("bar"), + Value: []byte("baz"), + }) + tags = tags.AddTag(Tag{ + Name: []byte("foo"), + Value: []byte("value_longer_than_10_characters"), + }) + err := tags.Validate() + require.EqualError(t, err, "tag value too long: index=1, length=31, maxLength=10") + require.True(t, xerrors.IsInvalidParams(err)) +} + func TestTagsValidateEmptyNameGraphite(t *testing.T) { tags := NewTags(0, NewTagOptions().SetIDSchemeType(TypeGraphite)) tags = tags.AddTag(Tag{Name: nil, Value: []byte("bar")}) diff --git a/src/query/models/types.go b/src/query/models/types.go index d2d42dfe72..494a70e4ee 100644 --- a/src/query/models/types.go +++ b/src/query/models/types.go @@ -113,6 +113,12 @@ type TagOptions interface { // AllowTagValueEmpty returns the value to allow empty tag values to appear. AllowTagValueEmpty() bool + // SetMaxTagLiteralLength sets the maximum length of a tag Name/Value. + SetMaxTagLiteralLength(value uint16) TagOptions + + // MaxTagLiteralLength returns the maximum length of a tag Name/Value. + MaxTagLiteralLength() uint16 + // Equals determines if two tag options are equivalent. Equals(other TagOptions) bool } diff --git a/src/x/serialize/limits.go b/src/x/serialize/limits.go index 4a623adce0..19322fd505 100644 --- a/src/x/serialize/limits.go +++ b/src/x/serialize/limits.go @@ -22,12 +22,12 @@ package serialize import "math" -var ( +const ( // defaultMaxNumberTags is the maximum number of tags that can be encoded. defaultMaxNumberTags uint16 = math.MaxUint16 - // defaultMaxTagLiteralLength is the maximum length of a tag Name/Value. - defaultMaxTagLiteralLength uint16 = math.MaxUint16 + // DefaultMaxTagLiteralLength is the maximum length of a tag Name/Value. + DefaultMaxTagLiteralLength uint16 = math.MaxUint16 ) type limits struct { @@ -39,7 +39,7 @@ type limits struct { func NewTagSerializationLimits() TagSerializationLimits { return &limits{ maxNumberTags: defaultMaxNumberTags, - maxTagLiteralLength: defaultMaxTagLiteralLength, + maxTagLiteralLength: DefaultMaxTagLiteralLength, } }