From 142d35b84c30723ce563ce41db81d40de98b948a Mon Sep 17 00:00:00 2001 From: Wesley Kim Date: Tue, 19 Jan 2021 17:53:06 -0500 Subject: [PATCH 01/13] [m3db] Check bloom filter before stream request allocation (#3103) * [m3db] Check bloom filter before stream request allocation * Add test assertions for bloom filer misses metric * Remove redundant series-read metric --- src/dbnode/persist/fs/retriever.go | 128 +++++++++++++----------- src/dbnode/persist/fs/retriever_test.go | 32 +++--- 2 files changed, 91 insertions(+), 69 deletions(-) diff --git a/src/dbnode/persist/fs/retriever.go b/src/dbnode/persist/fs/retriever.go index 23622ab484..9fd1f353f2 100644 --- a/src/dbnode/persist/fs/retriever.go +++ b/src/dbnode/persist/fs/retriever.go @@ -89,12 +89,12 @@ const ( type blockRetriever struct { sync.RWMutex - opts BlockRetrieverOptions - fsOpts Options - logger *zap.Logger - queryLimits limits.QueryLimits - bytesReadLimit limits.LookbackLimit - seriesReadCount tally.Counter + opts BlockRetrieverOptions + fsOpts Options + logger *zap.Logger + queryLimits limits.QueryLimits + bytesReadLimit limits.LookbackLimit + seriesBloomFilterMisses tally.Counter newSeekerMgrFn newSeekerMgrFn @@ -126,18 +126,18 @@ func NewBlockRetriever( scope := fsOpts.InstrumentOptions().MetricsScope().SubScope("retriever") return &blockRetriever{ - opts: opts, - fsOpts: fsOpts, - logger: fsOpts.InstrumentOptions().Logger(), - queryLimits: opts.QueryLimits(), - bytesReadLimit: opts.QueryLimits().BytesReadLimit(), - seriesReadCount: scope.Counter("series-read"), - newSeekerMgrFn: NewSeekerManager, - reqPool: opts.RetrieveRequestPool(), - bytesPool: opts.BytesPool(), - idPool: opts.IdentifierPool(), - status: blockRetrieverNotOpen, - notifyFetch: make(chan struct{}, 1), + opts: opts, + fsOpts: fsOpts, + logger: fsOpts.InstrumentOptions().Logger(), + queryLimits: opts.QueryLimits(), + bytesReadLimit: opts.QueryLimits().BytesReadLimit(), + seriesBloomFilterMisses: scope.Counter("series-bloom-filter-misses"), + newSeekerMgrFn: NewSeekerManager, + reqPool: opts.RetrieveRequestPool(), + bytesPool: opts.BytesPool(), + idPool: opts.IdentifierPool(), + status: blockRetrieverNotOpen, + notifyFetch: make(chan struct{}, 1), // We just close this channel when the fetchLoops should shutdown, so no // buffering is required fetchLoopsShouldShutdownCh: make(chan struct{}), @@ -560,6 +560,33 @@ func (r *blockRetriever) fetchBatch( } } +func (r *blockRetriever) seriesPresentInBloomFilter( + id ident.ID, + shard uint32, + startTime time.Time, +) (bool, error) { + // Capture variable and RLock() because this slice can be modified in the + // Open() method + r.RLock() + // This should never happen unless caller tries to use Stream() before Open() + if r.seekerMgr == nil { + r.RUnlock() + return false, errNoSeekerMgr + } + r.RUnlock() + + idExists, err := r.seekerMgr.Test(id, shard, startTime) + if err != nil { + return false, err + } + + if !idExists { + r.seriesBloomFilterMisses.Inc(1) + } + + return idExists, nil +} + // streamRequest returns a bool indicating if the ID was found, and any errors. func (r *blockRetriever) streamRequest( ctx context.Context, @@ -568,11 +595,10 @@ func (r *blockRetriever) streamRequest( id ident.ID, startTime time.Time, nsCtx namespace.Context, -) (bool, error) { +) error { req.resultWg.Add(1) - r.seriesReadCount.Inc(1) if err := r.queryLimits.DiskSeriesReadLimit().Inc(1, req.source); err != nil { - return false, err + return err } req.shard = shard @@ -592,29 +618,9 @@ func (r *blockRetriever) streamRequest( // Ensure to finalize at the end of request ctx.RegisterFinalizer(req) - // Capture variable and RLock() because this slice can be modified in the - // Open() method - r.RLock() - // This should never happen unless caller tries to use Stream() before Open() - if r.seekerMgr == nil { - r.RUnlock() - return false, errNoSeekerMgr - } - r.RUnlock() - - idExists, err := r.seekerMgr.Test(id, shard, startTime) - if err != nil { - return false, err - } - - // If the ID is not in the seeker's bloom filter, then it's definitely not on - // disk and we can return immediately. - if !idExists { - return false, nil - } reqs, err := r.shardRequests(shard) if err != nil { - return false, err + return err } reqs.Lock() @@ -633,7 +639,7 @@ func (r *blockRetriever) streamRequest( // the data. This means that even though we're returning nil for error // here, the caller may still encounter an error when they attempt to // read the data. - return true, nil + return nil } func (r *blockRetriever) Stream( @@ -644,6 +650,16 @@ func (r *blockRetriever) Stream( onRetrieve block.OnRetrieveBlock, nsCtx namespace.Context, ) (xio.BlockReader, error) { + found, err := r.seriesPresentInBloomFilter(id, shard, startTime) + if err != nil { + return xio.EmptyBlockReader, err + } + // If the ID is not in the seeker's bloom filter, then it's definitely not on + // disk and we can return immediately. + if !found { + return xio.EmptyBlockReader, nil + } + req := r.reqPool.Get() req.onRetrieve = onRetrieve req.streamReqType = streamDataReq @@ -655,18 +671,12 @@ func (r *blockRetriever) Stream( } } - found, err := r.streamRequest(ctx, req, shard, id, startTime, nsCtx) + err = r.streamRequest(ctx, req, shard, id, startTime, nsCtx) if err != nil { req.resultWg.Done() return xio.EmptyBlockReader, err } - if !found { - req.onRetrieved(ts.Segment{}, namespace.Context{}) - req.success = true - req.onDone() - } - // The request may not have completed yet, but it has an internal // waitgroup which the caller will have to wait for before retrieving // the data. This means that even though we're returning nil for error @@ -683,22 +693,26 @@ func (r *blockRetriever) StreamWideEntry( filter schema.WideEntryFilter, nsCtx namespace.Context, ) (block.StreamedWideEntry, error) { + found, err := r.seriesPresentInBloomFilter(id, shard, startTime) + if err != nil { + return block.EmptyStreamedWideEntry, err + } + // If the ID is not in the seeker's bloom filter, then it's definitely not on + // disk and we can return immediately. + if !found { + return block.EmptyStreamedWideEntry, nil + } + req := r.reqPool.Get() req.streamReqType = streamWideEntryReq req.wideFilter = filter - found, err := r.streamRequest(ctx, req, shard, id, startTime, nsCtx) + err = r.streamRequest(ctx, req, shard, id, startTime, nsCtx) if err != nil { req.resultWg.Done() return block.EmptyStreamedWideEntry, err } - if !found { - req.wideEntry = xio.WideEntry{} - req.success = true - req.onDone() - } - // The request may not have completed yet, but it has an internal // waitgroup which the caller will have to wait for before retrieving // the data. This means that even though we're returning nil for error diff --git a/src/dbnode/persist/fs/retriever_test.go b/src/dbnode/persist/fs/retriever_test.go index 54d57c0947..5e7178050a 100644 --- a/src/dbnode/persist/fs/retriever_test.go +++ b/src/dbnode/persist/fs/retriever_test.go @@ -127,7 +127,7 @@ type streamResult struct { shard uint32 id string blockStart time.Time - stream xio.SegmentReader + stream xio.BlockReader } // TestBlockRetrieverHighConcurrentSeeks tests the retriever with high @@ -395,6 +395,14 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices } for _, r := range results { + compare.Head = shardData[r.shard][r.id][xtime.ToUnixNano(r.blockStart)] + + // If the stream is empty, assert that the expected result is also nil + if r.stream.IsEmpty() { + require.Nil(t, compare.Head) + continue + } + seg, err := r.stream.Segment() if err != nil { fmt.Printf("\nstream seg err: %v\n", err) @@ -404,7 +412,6 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices } require.NoError(t, err) - compare.Head = shardData[r.shard][r.id][xtime.ToUnixNano(r.blockStart)] require.True( t, seg.Equal(&compare), @@ -538,6 +545,8 @@ func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices // on the retriever in the case where the requested ID does not exist. In that // case, Stream() should return an empty segment. func TestBlockRetrieverIDDoesNotExist(t *testing.T) { + scope := tally.NewTestScope("test", nil) + // Make sure reader/writer are looking at the same test directory dir, err := ioutil.TempDir("", "testdb") require.NoError(t, err) @@ -555,7 +564,7 @@ func TestBlockRetrieverIDDoesNotExist(t *testing.T) { // Setup the reader opts := testBlockRetrieverOptions{ retrieverOpts: defaultTestBlockRetrieverOptions, - fsOpts: fsOpts, + fsOpts: fsOpts.SetInstrumentOptions(instrument.NewOptions().SetMetricsScope(scope)), shards: []uint32{shard}, } retriever, cleanup := newOpenTestBlockRetriever(t, testNs1Metadata(t), opts) @@ -572,17 +581,18 @@ func TestBlockRetrieverIDDoesNotExist(t *testing.T) { assert.NoError(t, err) closer() - // Make sure we return the correct error if the ID does not exist ctx := context.NewContext() defer ctx.Close() segmentReader, err := retriever.Stream(ctx, shard, ident.StringID("not-exists"), blockStart, nil, nsCtx) assert.NoError(t, err) - segment, err := segmentReader.Segment() - assert.NoError(t, err) - assert.Equal(t, nil, segment.Head) - assert.Equal(t, nil, segment.Tail) + assert.True(t, segmentReader.IsEmpty()) + + // Check that the bloom filter miss metric was incremented + snapshot := scope.Snapshot() + seriesRead := snapshot.Counters()["test.retriever.series-bloom-filter-misses+"] + require.Equal(t, int64(1), seriesRead.Value()) } // TestBlockRetrieverOnlyCreatesTagItersIfTagsExists verifies that the block retriever @@ -823,14 +833,12 @@ func TestLimitSeriesReadFromDisk(t *testing.T) { require.NoError(t, err) req := &retrieveRequest{} retriever := publicRetriever.(*blockRetriever) - _, _ = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now(), namespace.Context{}) - _, err = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now(), namespace.Context{}) + _ = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now(), namespace.Context{}) + err = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now(), namespace.Context{}) require.Error(t, err) require.Contains(t, err.Error(), "query aborted due to limit") snapshot := scope.Snapshot() - seriesRead := snapshot.Counters()["test.retriever.series-read+"] - require.Equal(t, int64(2), seriesRead.Value()) seriesLimit := snapshot.Counters()["test.query-limit.exceeded+limit=disk-series-read"] require.Equal(t, int64(1), seriesLimit.Value()) } From bc5c2a3561c2435222e63357f28b173c7dc7939a Mon Sep 17 00:00:00 2001 From: Ryan Hall Date: Tue, 19 Jan 2021 16:54:01 -0800 Subject: [PATCH 02/13] Capture seekerMgr instead Rlock (#3104) seekerMgr could change outside the lock, which might result in a nil pointer --- src/dbnode/persist/fs/retriever.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/dbnode/persist/fs/retriever.go b/src/dbnode/persist/fs/retriever.go index 9fd1f353f2..535ca71df9 100644 --- a/src/dbnode/persist/fs/retriever.go +++ b/src/dbnode/persist/fs/retriever.go @@ -568,14 +568,15 @@ func (r *blockRetriever) seriesPresentInBloomFilter( // Capture variable and RLock() because this slice can be modified in the // Open() method r.RLock() + seekerMgr := r.seekerMgr + r.RUnlock() + // This should never happen unless caller tries to use Stream() before Open() - if r.seekerMgr == nil { - r.RUnlock() + if seekerMgr == nil { return false, errNoSeekerMgr } - r.RUnlock() - idExists, err := r.seekerMgr.Test(id, shard, startTime) + idExists, err := seekerMgr.Test(id, shard, startTime) if err != nil { return false, err } From 99ea1f5ed79ba2bea2b86de709c11f8a44f58cbd Mon Sep 17 00:00:00 2001 From: Vilius Pranckaitis Date: Wed, 20 Jan 2021 18:44:08 +1100 Subject: [PATCH 03/13] Replace bytes.Compare() == 0 with bytes.Equal() (#3101) --- src/dbnode/storage/index.go | 4 ++-- src/query/models/tags.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dbnode/storage/index.go b/src/dbnode/storage/index.go index df35e4dd80..64d285f701 100644 --- a/src/dbnode/storage/index.go +++ b/src/dbnode/storage/index.go @@ -692,8 +692,8 @@ func (i *nsIndex) writeBatches( for _, matchField := range i.doNotIndexWithFields { matchedField := false for _, actualField := range d.Fields { - if bytes.Compare(actualField.Name, matchField.Name) == 0 { - matchedField = bytes.Compare(actualField.Value, matchField.Value) == 0 + if bytes.Equal(actualField.Name, matchField.Name) { + matchedField = bytes.Equal(actualField.Value, matchField.Value) break } } diff --git a/src/query/models/tags.go b/src/query/models/tags.go index 5f20e84884..03658757cd 100644 --- a/src/query/models/tags.go +++ b/src/query/models/tags.go @@ -306,7 +306,7 @@ func (t Tags) validate() error { } prev := tags.Tags[i-1] - if bytes.Compare(prev.Name, tag.Name) == 0 { + if bytes.Equal(prev.Name, tag.Name) { return fmt.Errorf("tags duplicate: '%s' appears more than once", tags.Tags[i-1].Name) } From a8e136867521003787a91ddeedb60e14e5fc3234 Mon Sep 17 00:00:00 2001 From: Vilius Pranckaitis Date: Wed, 20 Jan 2021 19:12:19 +1100 Subject: [PATCH 04/13] [dbnode] Faster search of tag bytes in convert.FromSeriesIDAndTags (#3075) --- src/dbnode/storage/index/convert/convert.go | 72 +++--- .../index/convert/convert_benchmark_test.go | 214 ++++++++++++++++++ .../storage/index/convert/convert_test.go | 116 +++++++++- 3 files changed, 366 insertions(+), 36 deletions(-) create mode 100644 src/dbnode/storage/index/convert/convert_benchmark_test.go diff --git a/src/dbnode/storage/index/convert/convert.go b/src/dbnode/storage/index/convert/convert.go index e6013362ee..023589a51e 100644 --- a/src/dbnode/storage/index/convert/convert.go +++ b/src/dbnode/storage/index/convert/convert.go @@ -32,6 +32,20 @@ import ( "github.com/m3db/m3/src/x/pool" ) +const ( + // NB: this assumes that series ID has a format: + // {tag1="value1",tag2="value2",...} + // + // Thus firstTagBytesPosition points to the 't' immediately after curly brace '{' + firstTagBytesPosition int = 1 + // distanceBetweenTagNameAndValue corresponds to '="' in series ID that separates tag name from + // it's value + distanceBetweenTagNameAndValue int = 2 + // distanceBetweenTagValueAndNextName corresponds to '",' in series ID that separates + // tag's value from the following tag name + distanceBetweenTagValueAndNextName int = 2 +) + var ( // ReservedFieldNameID is the field name used to index the ID in the // m3ninx subsytem. @@ -108,22 +122,19 @@ func ValidateSeriesTag(tag ident.Tag) error { // FromSeriesIDAndTags converts the provided series id+tags into a document. func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) { - clonedID := clone(id) - fields := make([]doc.Field, 0, len(tags.Values())) + var ( + clonedID = clone(id.Bytes()) + fields = make([]doc.Field, 0, len(tags.Values())) + expectedStart = firstTagBytesPosition + ) for _, tag := range tags.Values() { nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes() var clonedName, clonedValue []byte - if idx := bytes.Index(clonedID, nameBytes); idx != -1 { - clonedName = clonedID[idx : idx+len(nameBytes)] - } else { - clonedName = append([]byte(nil), nameBytes...) - } - if idx := bytes.Index(clonedID, valueBytes); idx != -1 { - clonedValue = clonedID[idx : idx+len(valueBytes)] - } else { - clonedValue = append([]byte(nil), valueBytes...) - } + clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart, + distanceBetweenTagNameAndValue) + clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart, + distanceBetweenTagValueAndNextName) fields = append(fields, doc.Field{ Name: clonedName, @@ -143,23 +154,20 @@ func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) { // FromSeriesIDAndTagIter converts the provided series id+tags into a document. func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, error) { - clonedID := clone(id) - fields := make([]doc.Field, 0, tags.Remaining()) + var ( + clonedID = clone(id.Bytes()) + fields = make([]doc.Field, 0, tags.Remaining()) + expectedStart = firstTagBytesPosition + ) for tags.Next() { tag := tags.Current() nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes() var clonedName, clonedValue []byte - if idx := bytes.Index(clonedID, nameBytes); idx != -1 { - clonedName = clonedID[idx : idx+len(nameBytes)] - } else { - clonedName = append([]byte(nil), nameBytes...) - } - if idx := bytes.Index(clonedID, valueBytes); idx != -1 { - clonedValue = clonedID[idx : idx+len(valueBytes)] - } else { - clonedValue = append([]byte(nil), valueBytes...) - } + clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart, + distanceBetweenTagNameAndValue) + clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart, + distanceBetweenTagValueAndNextName) fields = append(fields, doc.Field{ Name: clonedName, @@ -180,6 +188,19 @@ func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, return d, nil } +func findSliceOrClone(id, tag []byte, expectedStart, nextPositionDistance int) ([]byte, int) { //nolint:unparam + n := len(tag) + expectedEnd := expectedStart + n + if expectedStart != -1 && expectedEnd <= len(id) && + bytes.Equal(id[expectedStart:expectedEnd], tag) { + return id[expectedStart:expectedEnd], expectedEnd + nextPositionDistance + } else if idx := bytes.Index(id, tag); idx != -1 { + return id[idx : idx+n], expectedEnd + nextPositionDistance + } else { + return clone(tag), -1 + } +} + // TagsFromTagsIter returns an ident.Tags from a TagIterator. It also tries // to re-use bytes from the seriesID if they're also present in the tags // instead of re-allocating them. This requires that the ident.Tags that is @@ -252,8 +273,7 @@ func TagsFromTagsIter( // NB(prateek): we take an independent copy of the bytes underlying // any ids provided, as we need to maintain the lifecycle of the indexed // bytes separately from the rest of the storage subsystem. -func clone(id ident.ID) []byte { - original := id.Bytes() +func clone(original []byte) []byte { clone := make([]byte, len(original)) copy(clone, original) return clone diff --git a/src/dbnode/storage/index/convert/convert_benchmark_test.go b/src/dbnode/storage/index/convert/convert_benchmark_test.go new file mode 100644 index 0000000000..0847e557e3 --- /dev/null +++ b/src/dbnode/storage/index/convert/convert_benchmark_test.go @@ -0,0 +1,214 @@ +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package convert + +import ( + "encoding/base64" + "math/rand" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/m3db/m3/src/x/checked" + "github.com/m3db/m3/src/x/ident" + "github.com/m3db/m3/src/x/pool" + "github.com/m3db/m3/src/x/serialize" +) + +type idWithEncodedTags struct { + id ident.ID + encodedTags []byte +} + +type idWithTags struct { + id ident.ID + tags ident.Tags +} + +// Samples of series IDs with corresponding tags. Taken from metrics generated by promremotebench. +//nolint:lll +var samples = []struct { + id string + tags string +}{ + { + id: `{__name__="diskio",arch="x64",datacenter="us-west-2c",hostname="host_78",measurement="reads",os="Ubuntu15.10",rack="87",region="us-west-2",service="11",service_environment="production",service_version="1",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18GAGRpc2tpbwQAYXJjaAMAeDY0CgBkYXRhY2VudGVyCgB1cy13ZXN0LTJjCABob3N0bmFtZQcAaG9zdF83OAsAbWVhc3VyZW1lbnQFAHJlYWRzAgBvcwsAVWJ1bnR1MTUuMTAEAHJhY2sCADg3BgByZWdpb24JAHVzLXdlc3QtMgcAc2VydmljZQIAMTETAHNlcnZpY2VfZW52aXJvbm1lbnQKAHByb2R1Y3Rpb24PAHNlcnZpY2VfdmVyc2lvbgEAMQQAdGVhbQIAU0Y=", + }, + { + id: `{__name__="nginx",arch="x64",datacenter="us-west-1a",hostname="host_37",measurement="active",os="Ubuntu16.10",rack="78",region="us-west-1",service="10",service_environment="test",service_version="0",team="LON"}`, + tags: "dScMAAgAX19uYW1lX18FAG5naW54BABhcmNoAwB4NjQKAGRhdGFjZW50ZXIKAHVzLXdlc3QtMWEIAGhvc3RuYW1lBwBob3N0XzM3CwBtZWFzdXJlbWVudAYAYWN0aXZlAgBvcwsAVWJ1bnR1MTYuMTAEAHJhY2sCADc4BgByZWdpb24JAHVzLXdlc3QtMQcAc2VydmljZQIAMTATAHNlcnZpY2VfZW52aXJvbm1lbnQEAHRlc3QPAHNlcnZpY2VfdmVyc2lvbgEAMAQAdGVhbQMATE9O", + }, + { + id: `{__name__="disk",arch="x64",datacenter="sa-east-1b",hostname="host_54",measurement="inodes_total",os="Ubuntu16.10",rack="88",region="sa-east-1",service="15",service_environment="production",service_version="0",team="CHI"}`, + tags: "dScMAAgAX19uYW1lX18EAGRpc2sEAGFyY2gDAHg2NAoAZGF0YWNlbnRlcgoAc2EtZWFzdC0xYggAaG9zdG5hbWUHAGhvc3RfNTQLAG1lYXN1cmVtZW50DABpbm9kZXNfdG90YWwCAG9zCwBVYnVudHUxNi4xMAQAcmFjawIAODgGAHJlZ2lvbgkAc2EtZWFzdC0xBwBzZXJ2aWNlAgAxNRMAc2VydmljZV9lbnZpcm9ubWVudAoAcHJvZHVjdGlvbg8Ac2VydmljZV92ZXJzaW9uAQAwBAB0ZWFtAwBDSEk=", + }, + { + id: `{__name__="net",arch="x86",datacenter="us-east-1b",hostname="host_93",measurement="err_in",os="Ubuntu15.10",rack="37",region="us-east-1",service="12",service_environment="production",service_version="1",team="CHI"}`, + tags: "dScMAAgAX19uYW1lX18DAG5ldAQAYXJjaAMAeDg2CgBkYXRhY2VudGVyCgB1cy1lYXN0LTFiCABob3N0bmFtZQcAaG9zdF85MwsAbWVhc3VyZW1lbnQGAGVycl9pbgIAb3MLAFVidW50dTE1LjEwBAByYWNrAgAzNwYAcmVnaW9uCQB1cy1lYXN0LTEHAHNlcnZpY2UCADEyEwBzZXJ2aWNlX2Vudmlyb25tZW50CgBwcm9kdWN0aW9uDwBzZXJ2aWNlX3ZlcnNpb24BADEEAHRlYW0DAENISQ==", + }, + { + id: `{__name__="redis",arch="x86",datacenter="eu-central-1a",hostname="host_70",measurement="keyspace_misses",os="Ubuntu16.04LTS",rack="47",region="eu-central-1",service="12",service_environment="staging",service_version="1",team="LON"}`, + tags: "dScMAAgAX19uYW1lX18FAHJlZGlzBABhcmNoAwB4ODYKAGRhdGFjZW50ZXINAGV1LWNlbnRyYWwtMWEIAGhvc3RuYW1lBwBob3N0XzcwCwBtZWFzdXJlbWVudA8Aa2V5c3BhY2VfbWlzc2VzAgBvcw4AVWJ1bnR1MTYuMDRMVFMEAHJhY2sCADQ3BgByZWdpb24MAGV1LWNlbnRyYWwtMQcAc2VydmljZQIAMTITAHNlcnZpY2VfZW52aXJvbm1lbnQHAHN0YWdpbmcPAHNlcnZpY2VfdmVyc2lvbgEAMQQAdGVhbQMATE9O", + }, + { + id: `{__name__="nginx",arch="x86",datacenter="us-east-1b",hostname="host_84",measurement="requests",os="Ubuntu16.04LTS",rack="90",region="us-east-1",service="13",service_environment="test",service_version="0",team="NYC"}`, + tags: "dScMAAgAX19uYW1lX18FAG5naW54BABhcmNoAwB4ODYKAGRhdGFjZW50ZXIKAHVzLWVhc3QtMWIIAGhvc3RuYW1lBwBob3N0Xzg0CwBtZWFzdXJlbWVudAgAcmVxdWVzdHMCAG9zDgBVYnVudHUxNi4wNExUUwQAcmFjawIAOTAGAHJlZ2lvbgkAdXMtZWFzdC0xBwBzZXJ2aWNlAgAxMxMAc2VydmljZV9lbnZpcm9ubWVudAQAdGVzdA8Ac2VydmljZV92ZXJzaW9uAQAwBAB0ZWFtAwBOWUM=", + }, + { + id: `{__name__="mem",arch="x64",datacenter="eu-central-1b",hostname="host_27",measurement="buffered",os="Ubuntu16.04LTS",rack="58",region="eu-central-1",service="0",service_environment="test",service_version="0",team="NYC"}`, + tags: "dScMAAgAX19uYW1lX18DAG1lbQQAYXJjaAMAeDY0CgBkYXRhY2VudGVyDQBldS1jZW50cmFsLTFiCABob3N0bmFtZQcAaG9zdF8yNwsAbWVhc3VyZW1lbnQIAGJ1ZmZlcmVkAgBvcw4AVWJ1bnR1MTYuMDRMVFMEAHJhY2sCADU4BgByZWdpb24MAGV1LWNlbnRyYWwtMQcAc2VydmljZQEAMBMAc2VydmljZV9lbnZpcm9ubWVudAQAdGVzdA8Ac2VydmljZV92ZXJzaW9uAQAwBAB0ZWFtAwBOWUM=", + }, + { + id: `{__name__="kernel",arch="x86",datacenter="us-west-2a",hostname="host_80",measurement="disk_pages_in",os="Ubuntu16.10",rack="42",region="us-west-2",service="13",service_environment="test",service_version="1",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18GAGtlcm5lbAQAYXJjaAMAeDg2CgBkYXRhY2VudGVyCgB1cy13ZXN0LTJhCABob3N0bmFtZQcAaG9zdF84MAsAbWVhc3VyZW1lbnQNAGRpc2tfcGFnZXNfaW4CAG9zCwBVYnVudHUxNi4xMAQAcmFjawIANDIGAHJlZ2lvbgkAdXMtd2VzdC0yBwBzZXJ2aWNlAgAxMxMAc2VydmljZV9lbnZpcm9ubWVudAQAdGVzdA8Ac2VydmljZV92ZXJzaW9uAQAxBAB0ZWFtAgBTRg==", + }, + { + id: `{__name__="disk",arch="x64",datacenter="ap-northeast-1c",hostname="host_77",measurement="inodes_used",os="Ubuntu16.04LTS",rack="84",region="ap-northeast-1",service="5",service_environment="production",service_version="0",team="LON"}`, + tags: "dScMAAgAX19uYW1lX18EAGRpc2sEAGFyY2gDAHg2NAoAZGF0YWNlbnRlcg8AYXAtbm9ydGhlYXN0LTFjCABob3N0bmFtZQcAaG9zdF83NwsAbWVhc3VyZW1lbnQLAGlub2Rlc191c2VkAgBvcw4AVWJ1bnR1MTYuMDRMVFMEAHJhY2sCADg0BgByZWdpb24OAGFwLW5vcnRoZWFzdC0xBwBzZXJ2aWNlAQA1EwBzZXJ2aWNlX2Vudmlyb25tZW50CgBwcm9kdWN0aW9uDwBzZXJ2aWNlX3ZlcnNpb24BADAEAHRlYW0DAExPTg==", + }, + { + id: `{__name__="postgresl",arch="x64",datacenter="eu-central-1b",hostname="host_27",measurement="xact_rollback",os="Ubuntu16.04LTS",rack="58",region="eu-central-1",service="0",service_environment="test",service_version="0",team="NYC"}`, + tags: "dScMAAgAX19uYW1lX18JAHBvc3RncmVzbAQAYXJjaAMAeDY0CgBkYXRhY2VudGVyDQBldS1jZW50cmFsLTFiCABob3N0bmFtZQcAaG9zdF8yNwsAbWVhc3VyZW1lbnQNAHhhY3Rfcm9sbGJhY2sCAG9zDgBVYnVudHUxNi4wNExUUwQAcmFjawIANTgGAHJlZ2lvbgwAZXUtY2VudHJhbC0xBwBzZXJ2aWNlAQAwEwBzZXJ2aWNlX2Vudmlyb25tZW50BAB0ZXN0DwBzZXJ2aWNlX3ZlcnNpb24BADAEAHRlYW0DAE5ZQw==", + }, + { + id: `{__name__="cpu",arch="x64",datacenter="sa-east-1b",hostname="host_43",measurement="usage_nice",os="Ubuntu16.10",rack="95",region="sa-east-1",service="4",service_environment="test",service_version="0",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18DAGNwdQQAYXJjaAMAeDY0CgBkYXRhY2VudGVyCgBzYS1lYXN0LTFiCABob3N0bmFtZQcAaG9zdF80MwsAbWVhc3VyZW1lbnQKAHVzYWdlX25pY2UCAG9zCwBVYnVudHUxNi4xMAQAcmFjawIAOTUGAHJlZ2lvbgkAc2EtZWFzdC0xBwBzZXJ2aWNlAQA0EwBzZXJ2aWNlX2Vudmlyb25tZW50BAB0ZXN0DwBzZXJ2aWNlX3ZlcnNpb24BADAEAHRlYW0CAFNG", + }, + { + id: `{__name__="disk",arch="x64",datacenter="ap-northeast-1c",hostname="host_17",measurement="inodes_total",os="Ubuntu16.10",rack="94",region="ap-northeast-1",service="9",service_environment="staging",service_version="0",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18EAGRpc2sEAGFyY2gDAHg2NAoAZGF0YWNlbnRlcg8AYXAtbm9ydGhlYXN0LTFjCABob3N0bmFtZQcAaG9zdF8xNwsAbWVhc3VyZW1lbnQMAGlub2Rlc190b3RhbAIAb3MLAFVidW50dTE2LjEwBAByYWNrAgA5NAYAcmVnaW9uDgBhcC1ub3J0aGVhc3QtMQcAc2VydmljZQEAORMAc2VydmljZV9lbnZpcm9ubWVudAcAc3RhZ2luZw8Ac2VydmljZV92ZXJzaW9uAQAwBAB0ZWFtAgBTRg==", + }, + { + id: `{__name__="redis",arch="x86",datacenter="us-west-2a",hostname="host_80",measurement="sync_partial_err",os="Ubuntu16.10",rack="42",region="us-west-2",service="13",service_environment="test",service_version="1",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18FAHJlZGlzBABhcmNoAwB4ODYKAGRhdGFjZW50ZXIKAHVzLXdlc3QtMmEIAGhvc3RuYW1lBwBob3N0XzgwCwBtZWFzdXJlbWVudBAAc3luY19wYXJ0aWFsX2VycgIAb3MLAFVidW50dTE2LjEwBAByYWNrAgA0MgYAcmVnaW9uCQB1cy13ZXN0LTIHAHNlcnZpY2UCADEzEwBzZXJ2aWNlX2Vudmlyb25tZW50BAB0ZXN0DwBzZXJ2aWNlX3ZlcnNpb24BADEEAHRlYW0CAFNG", + }, + { + id: `{__name__="net",arch="x86",datacenter="us-east-1a",hostname="host_79",measurement="drop_out",os="Ubuntu16.04LTS",rack="17",region="us-east-1",service="17",service_environment="staging",service_version="1",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18DAG5ldAQAYXJjaAMAeDg2CgBkYXRhY2VudGVyCgB1cy1lYXN0LTFhCABob3N0bmFtZQcAaG9zdF83OQsAbWVhc3VyZW1lbnQIAGRyb3Bfb3V0AgBvcw4AVWJ1bnR1MTYuMDRMVFMEAHJhY2sCADE3BgByZWdpb24JAHVzLWVhc3QtMQcAc2VydmljZQIAMTcTAHNlcnZpY2VfZW52aXJvbm1lbnQHAHN0YWdpbmcPAHNlcnZpY2VfdmVyc2lvbgEAMQQAdGVhbQIAU0Y=", + }, + { + id: `{__name__="redis",arch="x86",datacenter="ap-southeast-2b",hostname="host_100",measurement="used_cpu_user_children",os="Ubuntu16.04LTS",rack="40",region="ap-southeast-2",service="14",service_environment="staging",service_version="1",team="NYC"}`, + tags: "dScMAAgAX19uYW1lX18FAHJlZGlzBABhcmNoAwB4ODYKAGRhdGFjZW50ZXIPAGFwLXNvdXRoZWFzdC0yYggAaG9zdG5hbWUIAGhvc3RfMTAwCwBtZWFzdXJlbWVudBYAdXNlZF9jcHVfdXNlcl9jaGlsZHJlbgIAb3MOAFVidW50dTE2LjA0TFRTBAByYWNrAgA0MAYAcmVnaW9uDgBhcC1zb3V0aGVhc3QtMgcAc2VydmljZQIAMTQTAHNlcnZpY2VfZW52aXJvbm1lbnQHAHN0YWdpbmcPAHNlcnZpY2VfdmVyc2lvbgEAMQQAdGVhbQMATllD", + }, + { + id: `{__name__="disk",arch="x64",datacenter="ap-southeast-1a",hostname="host_87",measurement="inodes_total",os="Ubuntu15.10",rack="0",region="ap-southeast-1",service="11",service_environment="staging",service_version="0",team="LON"}`, + tags: "dScMAAgAX19uYW1lX18EAGRpc2sEAGFyY2gDAHg2NAoAZGF0YWNlbnRlcg8AYXAtc291dGhlYXN0LTFhCABob3N0bmFtZQcAaG9zdF84NwsAbWVhc3VyZW1lbnQMAGlub2Rlc190b3RhbAIAb3MLAFVidW50dTE1LjEwBAByYWNrAQAwBgByZWdpb24OAGFwLXNvdXRoZWFzdC0xBwBzZXJ2aWNlAgAxMRMAc2VydmljZV9lbnZpcm9ubWVudAcAc3RhZ2luZw8Ac2VydmljZV92ZXJzaW9uAQAwBAB0ZWFtAwBMT04=", + }, + { + id: `{__name__="cpu",arch="x64",datacenter="us-west-2a",hostname="host_6",measurement="usage_idle",os="Ubuntu16.10",rack="10",region="us-west-2",service="6",service_environment="test",service_version="0",team="CHI"}`, + tags: "dScMAAgAX19uYW1lX18DAGNwdQQAYXJjaAMAeDY0CgBkYXRhY2VudGVyCgB1cy13ZXN0LTJhCABob3N0bmFtZQYAaG9zdF82CwBtZWFzdXJlbWVudAoAdXNhZ2VfaWRsZQIAb3MLAFVidW50dTE2LjEwBAByYWNrAgAxMAYAcmVnaW9uCQB1cy13ZXN0LTIHAHNlcnZpY2UBADYTAHNlcnZpY2VfZW52aXJvbm1lbnQEAHRlc3QPAHNlcnZpY2VfdmVyc2lvbgEAMAQAdGVhbQMAQ0hJ", + }, + { + id: `{__name__="nginx",arch="x86",datacenter="us-east-1a",hostname="host_44",measurement="handled",os="Ubuntu16.04LTS",rack="61",region="us-east-1",service="2",service_environment="staging",service_version="1",team="NYC"}`, + tags: "dScMAAgAX19uYW1lX18FAG5naW54BABhcmNoAwB4ODYKAGRhdGFjZW50ZXIKAHVzLWVhc3QtMWEIAGhvc3RuYW1lBwBob3N0XzQ0CwBtZWFzdXJlbWVudAcAaGFuZGxlZAIAb3MOAFVidW50dTE2LjA0TFRTBAByYWNrAgA2MQYAcmVnaW9uCQB1cy1lYXN0LTEHAHNlcnZpY2UBADITAHNlcnZpY2VfZW52aXJvbm1lbnQHAHN0YWdpbmcPAHNlcnZpY2VfdmVyc2lvbgEAMQQAdGVhbQMATllD", + }, + { + id: `{__name__="nginx",arch="x86",datacenter="us-west-1a",hostname="host_29",measurement="waiting",os="Ubuntu15.10",rack="15",region="us-west-1",service="4",service_environment="test",service_version="1",team="NYC"}`, + tags: "dScMAAgAX19uYW1lX18FAG5naW54BABhcmNoAwB4ODYKAGRhdGFjZW50ZXIKAHVzLXdlc3QtMWEIAGhvc3RuYW1lBwBob3N0XzI5CwBtZWFzdXJlbWVudAcAd2FpdGluZwIAb3MLAFVidW50dTE1LjEwBAByYWNrAgAxNQYAcmVnaW9uCQB1cy13ZXN0LTEHAHNlcnZpY2UBADQTAHNlcnZpY2VfZW52aXJvbm1lbnQEAHRlc3QPAHNlcnZpY2VfdmVyc2lvbgEAMQQAdGVhbQMATllD", + }, + { + id: `{__name__="diskio",arch="x64",datacenter="ap-northeast-1c",hostname="host_38",measurement="write_time",os="Ubuntu15.10",rack="20",region="ap-northeast-1",service="0",service_environment="staging",service_version="0",team="SF"}`, + tags: "dScMAAgAX19uYW1lX18GAGRpc2tpbwQAYXJjaAMAeDY0CgBkYXRhY2VudGVyDwBhcC1ub3J0aGVhc3QtMWMIAGhvc3RuYW1lBwBob3N0XzM4CwBtZWFzdXJlbWVudAoAd3JpdGVfdGltZQIAb3MLAFVidW50dTE1LjEwBAByYWNrAgAyMAYAcmVnaW9uDgBhcC1ub3J0aGVhc3QtMQcAc2VydmljZQEAMBMAc2VydmljZV9lbnZpcm9ubWVudAcAc3RhZ2luZw8Ac2VydmljZV92ZXJzaW9uAQAwBAB0ZWFtAgBTRg==", + }, +} + +// BenchmarkFromSeriesIDAndTagIter-12 772224 1649 ns/op +func BenchmarkFromSeriesIDAndTagIter(b *testing.B) { + testData, err := prepareIDAndTags(b) + require.NoError(b, err) + + b.ResetTimer() + for i := range testData { + _, err := FromSeriesIDAndTagIter(testData[i].id, ident.NewTagsIterator(testData[i].tags)) + require.NoError(b, err) + } +} + +// BenchmarkFromSeriesIDAndTags-12 1000000 1311 ns/op +func BenchmarkFromSeriesIDAndTags(b *testing.B) { + testData, err := prepareIDAndTags(b) + require.NoError(b, err) + + b.ResetTimer() + for i := range testData { + _, err := FromSeriesIDAndTags(testData[i].id, testData[i].tags) + require.NoError(b, err) + } +} + +func prepareIDAndEncodedTags(b *testing.B) ([]idWithEncodedTags, error) { + var ( + rnd = rand.New(rand.NewSource(42)) //nolint:gosec + b64 = base64.StdEncoding + result = make([]idWithEncodedTags, 0, b.N) + ) + + for i := 0; i < b.N; i++ { + k := rnd.Intn(len(samples)) + id := clone([]byte(samples[k].id)) + tags, err := b64.DecodeString(samples[k].tags) + if err != nil { + return nil, err + } + + result = append(result, idWithEncodedTags{ + id: ident.BytesID(id), + encodedTags: tags, + }) + } + + return result, nil +} + +func prepareIDAndTags(b *testing.B) ([]idWithTags, error) { + testData, err := prepareIDAndEncodedTags(b) + if err != nil { + return nil, err + } + + decoderPool := serialize.NewTagDecoderPool( + serialize.NewTagDecoderOptions(serialize.TagDecoderOptionsConfig{}), + pool.NewObjectPoolOptions(), + ) + decoderPool.Init() + + bytesPool := pool.NewCheckedBytesPool(nil, nil, func(s []pool.Bucket) pool.BytesPool { + return pool.NewBytesPool(s, nil) + }) + bytesPool.Init() + + identPool := ident.NewPool(bytesPool, ident.PoolOptions{}) + + tagDecoder := decoderPool.Get() + defer tagDecoder.Close() + + result := make([]idWithTags, 0, len(testData)) + for i := range testData { + tagDecoder.Reset(checked.NewBytes(testData[i].encodedTags, nil)) + tags, err := TagsFromTagsIter(testData[i].id, tagDecoder, identPool) + if err != nil { + return nil, err + } + result = append(result, idWithTags{id: testData[i].id, tags: tags}) + } + return result, nil +} diff --git a/src/dbnode/storage/index/convert/convert_test.go b/src/dbnode/storage/index/convert/convert_test.go index 07c5d37873..aa72449797 100644 --- a/src/dbnode/storage/index/convert/convert_test.go +++ b/src/dbnode/storage/index/convert/convert_test.go @@ -20,6 +20,7 @@ package convert_test import ( + "bytes" "encoding/hex" "testing" "unicode/utf8" @@ -29,6 +30,7 @@ import ( "github.com/m3db/m3/src/x/checked" "github.com/m3db/m3/src/x/ident" "github.com/m3db/m3/src/x/pool" + "github.com/m3db/m3/src/x/test" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -74,10 +76,48 @@ func TestFromSeriesIDAndTagsValid(t *testing.T) { ) d, err := convert.FromSeriesIDAndTags(id, tags) assert.NoError(t, err) - assert.Equal(t, "foo", string(d.ID)) - assert.Len(t, d.Fields, 1) - assert.Equal(t, "bar", string(d.Fields[0].Name)) - assert.Equal(t, "baz", string(d.Fields[0].Value)) + assertContentsMatch(t, id, tags.Values(), d) +} + +func TestFromSeriesIDAndTagsReuseBytesFromSeriesId(t *testing.T) { + tests := []struct { + name string + id string + }{ + { + name: "tags in ID", + id: "bar=baz,quip=quix", + }, + { + name: "tags in ID with specific format", + id: `{bar="baz",quip="quix"}`, + }, + { + name: "tags in ID with specific format reverse order", + id: `{quip="quix",bar="baz"}`, + }, + { + name: "inexact tag occurrence in ID", + id: "quixquip_bazillion_barometers", + }, + } + tags := ident.NewTags( + ident.StringTag("bar", "baz"), + ident.StringTag("quip", "quix"), + ) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + seriesID := ident.StringID(tt.id) + d, err := convert.FromSeriesIDAndTags(seriesID, tags) + assert.NoError(t, err) + assertContentsMatch(t, seriesID, tags.Values(), d) + for i := range d.Fields { + assertBackedBySameData(t, d.ID, d.Fields[i].Name) + assertBackedBySameData(t, d.ID, d.Fields[i].Value) + } + }) + } } func TestFromSeriesIDAndTagIterValid(t *testing.T) { @@ -87,18 +127,56 @@ func TestFromSeriesIDAndTagIterValid(t *testing.T) { ) d, err := convert.FromSeriesIDAndTagIter(id, ident.NewTagsIterator(tags)) assert.NoError(t, err) - assert.Equal(t, "foo", string(d.ID)) - assert.Len(t, d.Fields, 1) - assert.Equal(t, "bar", string(d.Fields[0].Name)) - assert.Equal(t, "baz", string(d.Fields[0].Value)) + assertContentsMatch(t, id, tags.Values(), d) +} + +func TestFromSeriesIDAndTagIterReuseBytesFromSeriesId(t *testing.T) { + tests := []struct { + name string + id string + }{ + { + name: "tags in ID", + id: "bar=baz,quip=quix", + }, + { + name: "tags in ID with specific format", + id: `{bar="baz",quip="quix"}`, + }, + { + name: "tags in ID with specific format reverse order", + id: `{quip="quix",bar="baz"}`, + }, + { + name: "inexact tag occurrence in ID", + id: "quixquip_bazillion_barometers", + }, + } + tags := ident.NewTags( + ident.StringTag("bar", "baz"), + ident.StringTag("quip", "quix"), + ) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + seriesID := ident.StringID(tt.id) + d, err := convert.FromSeriesIDAndTagIter(seriesID, ident.NewTagsIterator(tags)) + assert.NoError(t, err) + assertContentsMatch(t, seriesID, tags.Values(), d) + for i := range d.Fields { + assertBackedBySameData(t, d.ID, d.Fields[i].Name) + assertBackedBySameData(t, d.ID, d.Fields[i].Value) + } + }) + } } func TestToSeriesValid(t *testing.T) { d := doc.Metadata{ ID: []byte("foo"), Fields: []doc.Field{ - doc.Field{Name: []byte("bar"), Value: []byte("baz")}, - doc.Field{Name: []byte("some"), Value: []byte("others")}, + {Name: []byte("bar"), Value: []byte("baz")}, + {Name: []byte("some"), Value: []byte("others")}, }, } id, tags, err := convert.ToSeries(d, testOpts) @@ -215,3 +293,21 @@ func TestValidateSeries(t *testing.T) { } // TODO(prateek): add a test to ensure we're interacting with the Pools as expected + +func assertContentsMatch(t *testing.T, seriesID ident.ID, tags []ident.Tag, doc doc.Metadata) { + assert.Equal(t, seriesID.String(), string(doc.ID)) + assert.Len(t, doc.Fields, len(tags)) + for i, f := range doc.Fields { //nolint:gocritic + assert.Equal(t, tags[i].Name.String(), string(f.Name)) + assert.Equal(t, tags[i].Value.String(), string(f.Value)) + } +} + +func assertBackedBySameData(t *testing.T, outer, inner []byte) { + if idx := bytes.Index(outer, inner); idx != -1 { + subslice := outer[idx : idx+len(inner)] + assert.True(t, test.ByteSlicesBackedBySameData(subslice, inner)) + } else { + assert.Fail(t, "inner byte sequence wasn't found") + } +} From 24cbe1c0c9c665df37e141c25a5df530eab97189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Med=C5=BEi=C5=ABnas?= Date: Wed, 20 Jan 2021 11:52:07 +0200 Subject: [PATCH 05/13] [tests] Skip flaky TestWatchNoLeader (#3106) --- src/cluster/etcd/watchmanager/manager_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cluster/etcd/watchmanager/manager_test.go b/src/cluster/etcd/watchmanager/manager_test.go index 0a28d232df..69153a4362 100644 --- a/src/cluster/etcd/watchmanager/manager_test.go +++ b/src/cluster/etcd/watchmanager/manager_test.go @@ -165,6 +165,7 @@ func TestWatchRecreate(t *testing.T) { } func TestWatchNoLeader(t *testing.T) { + t.Skip("flaky, started to fail very consistently on CI") const ( watchInitAndRetryDelay = 200 * time.Millisecond watchCheckInterval = 50 * time.Millisecond From 611166b842b2e4a340539a21134ba6eabe100b38 Mon Sep 17 00:00:00 2001 From: Vilius Pranckaitis Date: Wed, 20 Jan 2021 21:11:41 +1100 Subject: [PATCH 06/13] [dbnode] Direct conversion of encoded tags to doc.Metadata (#3087) --- src/dbnode/storage/index/convert/convert.go | 72 ++++++++++++ .../index/convert/convert_benchmark_test.go | 36 +++++- .../storage/index/convert/convert_test.go | 110 +++++++++++++++++- src/x/serialize/decoder.go | 10 +- src/x/serialize/decoder_fast.go | 14 +-- src/x/serialize/encoder.go | 18 +-- src/x/serialize/types.go | 6 +- 7 files changed, 240 insertions(+), 26 deletions(-) diff --git a/src/dbnode/storage/index/convert/convert.go b/src/dbnode/storage/index/convert/convert.go index 023589a51e..a6178b3ac9 100644 --- a/src/dbnode/storage/index/convert/convert.go +++ b/src/dbnode/storage/index/convert/convert.go @@ -26,10 +26,12 @@ import ( "fmt" "unicode/utf8" + "github.com/m3db/m3/src/dbnode/ts" "github.com/m3db/m3/src/m3ninx/doc" "github.com/m3db/m3/src/query/graphite/graphite" "github.com/m3db/m3/src/x/ident" "github.com/m3db/m3/src/x/pool" + "github.com/m3db/m3/src/x/serialize" ) const ( @@ -188,6 +190,76 @@ func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, return d, nil } +// FromSeriesIDAndEncodedTags converts the provided series id and encoded tags into a doc.Metadata. +func FromSeriesIDAndEncodedTags(id ident.BytesID, encodedTags ts.EncodedTags) (doc.Metadata, error) { + var ( + byteOrder = serialize.ByteOrder + total = len(encodedTags) + ) + if total < 4 { + return doc.Metadata{}, fmt.Errorf("encoded tags too short: size=%d, need=%d", total, 4) + } + + header := byteOrder.Uint16(encodedTags[:2]) + encodedTags = encodedTags[2:] + if header != serialize.HeaderMagicNumber { + return doc.Metadata{}, serialize.ErrIncorrectHeader + } + + length := int(byteOrder.Uint16(encodedTags[:2])) + encodedTags = encodedTags[2:] + + var ( + clonedID = clone(id.Bytes()) + fields = make([]doc.Field, 0, length) + expectedStart = firstTagBytesPosition + ) + + for i := 0; i < length; i++ { + if len(encodedTags) < 2 { + return doc.Metadata{}, fmt.Errorf("missing size for tag name: index=%d", i) + } + numBytesName := int(byteOrder.Uint16(encodedTags[:2])) + if numBytesName == 0 { + return doc.Metadata{}, serialize.ErrEmptyTagNameLiteral + } + encodedTags = encodedTags[2:] + + bytesName := encodedTags[:numBytesName] + encodedTags = encodedTags[numBytesName:] + + if len(encodedTags) < 2 { + return doc.Metadata{}, fmt.Errorf("missing size for tag value: index=%d", i) + } + + numBytesValue := int(byteOrder.Uint16(encodedTags[:2])) + encodedTags = encodedTags[2:] + + bytesValue := encodedTags[:numBytesValue] + encodedTags = encodedTags[numBytesValue:] + + var clonedName, clonedValue []byte + clonedName, expectedStart = findSliceOrClone(clonedID, bytesName, expectedStart, + distanceBetweenTagNameAndValue) + clonedValue, expectedStart = findSliceOrClone(clonedID, bytesValue, expectedStart, + distanceBetweenTagValueAndNextName) + + fields = append(fields, doc.Field{ + Name: clonedName, + Value: clonedValue, + }) + } + + d := doc.Metadata{ + ID: clonedID, + Fields: fields, + } + if err := Validate(d); err != nil { + return doc.Metadata{}, err + } + return d, nil +} + func findSliceOrClone(id, tag []byte, expectedStart, nextPositionDistance int) ([]byte, int) { //nolint:unparam n := len(tag) expectedEnd := expectedStart + n diff --git a/src/dbnode/storage/index/convert/convert_benchmark_test.go b/src/dbnode/storage/index/convert/convert_benchmark_test.go index 0847e557e3..90781ed7ac 100644 --- a/src/dbnode/storage/index/convert/convert_benchmark_test.go +++ b/src/dbnode/storage/index/convert/convert_benchmark_test.go @@ -34,12 +34,12 @@ import ( ) type idWithEncodedTags struct { - id ident.ID + id ident.BytesID encodedTags []byte } type idWithTags struct { - id ident.ID + id ident.BytesID tags ident.Tags } @@ -155,6 +155,38 @@ func BenchmarkFromSeriesIDAndTags(b *testing.B) { } } +func BenchmarkFromSeriesIDAndEncodedTags(b *testing.B) { + testData, err := prepareIDAndEncodedTags(b) + require.NoError(b, err) + + b.ResetTimer() + for i := range testData { + _, err := FromSeriesIDAndEncodedTags(testData[i].id, testData[i].encodedTags) + require.NoError(b, err) + } +} + +func BenchmarkFromSeriesIDAndTagIter_TagDecoder(b *testing.B) { + testData, err := prepareIDAndEncodedTags(b) + require.NoError(b, err) + + decoderPool := serialize.NewTagDecoderPool( + serialize.NewTagDecoderOptions(serialize.TagDecoderOptionsConfig{}), + pool.NewObjectPoolOptions(), + ) + decoderPool.Init() + + decoder := decoderPool.Get() + defer decoder.Close() + + b.ResetTimer() + for i := range testData { + decoder.Reset(checked.NewBytes(testData[i].encodedTags, nil)) + _, err := FromSeriesIDAndTagIter(testData[i].id, decoder) + require.NoError(b, err) + } +} + func prepareIDAndEncodedTags(b *testing.B) ([]idWithEncodedTags, error) { var ( rnd = rand.New(rand.NewSource(42)) //nolint:gosec diff --git a/src/dbnode/storage/index/convert/convert_test.go b/src/dbnode/storage/index/convert/convert_test.go index aa72449797..8aa6c727d8 100644 --- a/src/dbnode/storage/index/convert/convert_test.go +++ b/src/dbnode/storage/index/convert/convert_test.go @@ -30,6 +30,7 @@ import ( "github.com/m3db/m3/src/x/checked" "github.com/m3db/m3/src/x/ident" "github.com/m3db/m3/src/x/pool" + "github.com/m3db/m3/src/x/serialize" "github.com/m3db/m3/src/x/test" "github.com/stretchr/testify/assert" @@ -171,6 +172,101 @@ func TestFromSeriesIDAndTagIterReuseBytesFromSeriesId(t *testing.T) { } } +func TestFromSeriesIDAndEncodedTags(t *testing.T) { + tests := []struct { + name string + id string + }{ + { + name: "no tags in ID", + id: "foo", + }, + { + name: "tags in ID", + id: "bar=baz,quip=quix", + }, + { + name: "tags in ID with specific format", + id: `{bar="baz",quip="quix"}`, + }, + { + name: "tags in ID with specific format reverse order", + id: `{quip="quix",bar="baz"}`, + }, + { + name: "inexact tag occurrence in ID", + id: "quixquip_bazillion_barometers", + }, + } + var ( + tags = ident.NewTags( + ident.StringTag("bar", "baz"), + ident.StringTag("quip", "quix"), + ) + encodedTags = toEncodedTags(t, tags) + ) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + seriesID := ident.BytesID(tt.id) + d, err := convert.FromSeriesIDAndEncodedTags(seriesID, encodedTags) + assert.NoError(t, err) + assertContentsMatch(t, seriesID, tags.Values(), d) + for i := range d.Fields { + assertBackedBySameData(t, d.ID, d.Fields[i].Name) + assertBackedBySameData(t, d.ID, d.Fields[i].Value) + } + }) + } +} + +func TestFromSeriesIDAndEncodedTagsInvalid(t *testing.T) { + var ( + validEncodedTags = []byte{117, 39, 1, 0, 3, 0, 98, 97, 114, 3, 0, 98, 97, 122} + tagsWithReservedName = toEncodedTags(t, ident.NewTags( + ident.StringTag(string(convert.ReservedFieldNameID), "some_value"), + )) + ) + + tests := []struct { + name string + encodedTags []byte + }{ + { + name: "reserved tag name", + encodedTags: tagsWithReservedName, + }, + { + name: "incomplete header", + encodedTags: validEncodedTags[:3], + }, + { + name: "incomplete tag name length", + encodedTags: validEncodedTags[:5], + }, + { + name: "incomplete tag value length", + encodedTags: validEncodedTags[:10], + }, + { + name: "invalid magic number", + encodedTags: []byte{42, 42, 0, 0}, + }, + { + name: "empty tag name", + encodedTags: []byte{117, 39, 1, 0, 0, 0, 3, 0, 98, 97, 122}, + }, + } + seriesID := ident.BytesID("foo") + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := convert.FromSeriesIDAndEncodedTags(seriesID, tt.encodedTags) + assert.Error(t, err) + }) + } +} + func TestToSeriesValid(t *testing.T) { d := doc.Metadata{ ID: []byte("foo"), @@ -307,7 +403,17 @@ func assertBackedBySameData(t *testing.T, outer, inner []byte) { if idx := bytes.Index(outer, inner); idx != -1 { subslice := outer[idx : idx+len(inner)] assert.True(t, test.ByteSlicesBackedBySameData(subslice, inner)) - } else { - assert.Fail(t, "inner byte sequence wasn't found") } } + +func toEncodedTags(t *testing.T, tags ident.Tags) []byte { + pool := serialize.NewTagEncoderPool(serialize.NewTagEncoderOptions(), nil) + pool.Init() + encoder := pool.Get() + defer encoder.Finalize() + + require.NoError(t, encoder.Encode(ident.NewTagsIterator(tags))) + data, ok := encoder.Data() + require.True(t, ok) + return append([]byte(nil), data.Bytes()...) +} diff --git a/src/x/serialize/decoder.go b/src/x/serialize/decoder.go index 9d1b5d9dce..574b4c1109 100644 --- a/src/x/serialize/decoder.go +++ b/src/x/serialize/decoder.go @@ -29,7 +29,9 @@ import ( ) var ( - errIncorrectHeader = errors.New("header magic number does not match expected value") + // ErrIncorrectHeader is an error when encoded tag byte sequence doesn't start with + // an expected magic number. + ErrIncorrectHeader = errors.New("header magic number does not match expected value") errInvalidByteStreamIDDecoding = errors.New("internal error, invalid byte stream while decoding ID") errInvalidByteStreamUintDecoding = errors.New("internal error, invalid byte stream while decoding uint") ) @@ -78,8 +80,8 @@ func (d *decoder) Reset(b checked.Bytes) { return } - if header != headerMagicNumber { - d.err = errIncorrectHeader + if header != HeaderMagicNumber { + d.err = ErrIncorrectHeader return } @@ -129,7 +131,7 @@ func (d *decoder) decodeTag() error { // safe to call Bytes() as d.current.Name has inc'd a ref if len(d.currentTagName.Bytes()) == 0 { d.releaseCurrent() - return errEmptyTagNameLiteral + return ErrEmptyTagNameLiteral } if err := d.decodeIDInto(d.currentTagValue); err != nil { diff --git a/src/x/serialize/decoder_fast.go b/src/x/serialize/decoder_fast.go index e9846ac8d1..064f99d4c1 100644 --- a/src/x/serialize/decoder_fast.go +++ b/src/x/serialize/decoder_fast.go @@ -37,22 +37,22 @@ func TagValueFromEncodedTagsFast( "encoded tags too short: size=%d, need=%d", total, 4) } - header := byteOrder.Uint16(encodedTags[:2]) + header := ByteOrder.Uint16(encodedTags[:2]) encodedTags = encodedTags[2:] - if header != headerMagicNumber { - return nil, false, errIncorrectHeader + if header != HeaderMagicNumber { + return nil, false, ErrIncorrectHeader } - length := int(byteOrder.Uint16(encodedTags[:2])) + length := int(ByteOrder.Uint16(encodedTags[:2])) encodedTags = encodedTags[2:] for i := 0; i < length; i++ { if len(encodedTags) < 2 { return nil, false, fmt.Errorf("missing size for tag name: index=%d", i) } - numBytesName := int(byteOrder.Uint16(encodedTags[:2])) + numBytesName := int(ByteOrder.Uint16(encodedTags[:2])) if numBytesName == 0 { - return nil, false, errEmptyTagNameLiteral + return nil, false, ErrEmptyTagNameLiteral } encodedTags = encodedTags[2:] @@ -63,7 +63,7 @@ func TagValueFromEncodedTagsFast( return nil, false, fmt.Errorf("missing size for tag value: index=%d", i) } - numBytesValue := int(byteOrder.Uint16(encodedTags[:2])) + numBytesValue := int(ByteOrder.Uint16(encodedTags[:2])) encodedTags = encodedTags[2:] bytesValue := encodedTags[:numBytesValue] diff --git a/src/x/serialize/encoder.go b/src/x/serialize/encoder.go index a682a639ee..19bd884a25 100644 --- a/src/x/serialize/encoder.go +++ b/src/x/serialize/encoder.go @@ -51,18 +51,20 @@ import ( */ var ( - byteOrder binary.ByteOrder = binary.LittleEndian + // ByteOrder is the byte order used for encoding tags into a byte sequence. + ByteOrder binary.ByteOrder = binary.LittleEndian headerMagicBytes = make([]byte, 2) ) func init() { - encodeUInt16(headerMagicNumber, headerMagicBytes) + encodeUInt16(HeaderMagicNumber, headerMagicBytes) } var ( - errTagEncoderInUse = errors.New("encoder already in use") - errTagLiteralTooLong = errors.New("literal is too long") - errEmptyTagNameLiteral = xerrors.NewInvalidParamsError(errors.New("tag name cannot be empty")) + errTagEncoderInUse = errors.New("encoder already in use") + errTagLiteralTooLong = errors.New("literal is too long") + // ErrEmptyTagNameLiteral is an error when encoded tag name is empty. + ErrEmptyTagNameLiteral = xerrors.NewInvalidParamsError(errors.New("tag name cannot be empty")) ) type newCheckedBytesFn func([]byte, checked.BytesOptions) checked.Bytes @@ -166,7 +168,7 @@ func (e *encoder) Finalize() { func (e *encoder) encodeTag(t ident.Tag) error { if len(t.Name.Bytes()) == 0 { - return errEmptyTagNameLiteral + return ErrEmptyTagNameLiteral } if err := e.encodeID(t.Name); err != nil { @@ -204,10 +206,10 @@ func (e *encoder) encodeUInt16(v uint16) []byte { } func encodeUInt16(v uint16, dest []byte) []byte { - byteOrder.PutUint16(dest, v) + ByteOrder.PutUint16(dest, v) return dest } func decodeUInt16(b []byte) uint16 { - return byteOrder.Uint16(b) + return ByteOrder.Uint16(b) } diff --git a/src/x/serialize/types.go b/src/x/serialize/types.go index a694ae7dc7..e3b31e6e7f 100644 --- a/src/x/serialize/types.go +++ b/src/x/serialize/types.go @@ -27,10 +27,10 @@ import ( "github.com/m3db/m3/src/x/ident" ) -var ( - // headerMagicNumber is an internal header used to denote the beginning of +const ( + // HeaderMagicNumber is an internal header used to denote the beginning of // an encoded stream. - headerMagicNumber uint16 = 10101 + HeaderMagicNumber uint16 = 10101 ) // TagEncoder encodes provided Tag iterators. From 6c4ed008c59f73b5a010ea0f90078c82e39c8d07 Mon Sep 17 00:00:00 2001 From: Dean Wahle <60762514+DeanWahle@users.noreply.github.com> Date: Wed, 20 Jan 2021 19:49:44 -0500 Subject: [PATCH 07/13] [query] Implemented Graphite's pow function (#3048) --- .../graphite/native/builtin_functions.go | 23 ++++++++++ .../graphite/native/builtin_functions_test.go | 42 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/query/graphite/native/builtin_functions.go b/src/query/graphite/native/builtin_functions.go index 2d5073208f..61beb1ff90 100644 --- a/src/query/graphite/native/builtin_functions.go +++ b/src/query/graphite/native/builtin_functions.go @@ -1143,6 +1143,28 @@ func exclude(_ *common.Context, input singlePathSpec, pattern string) (ts.Series return r, nil } +// pow takes one metric or a wildcard seriesList followed by a constant, +// and raises the datapoint by the power of the constant provided at each point +// nolint: gocritic +func pow(ctx *common.Context, input singlePathSpec, factor float64) (ts.SeriesList, error) { + results := make([]*ts.Series, 0, len(input.Values)) + + for _, series := range input.Values { + numSteps := series.Len() + millisPerStep := series.MillisPerStep() + vals := ts.NewValues(ctx, millisPerStep, numSteps) + for i := 0; i < numSteps; i++ { + vals.SetValueAt(i, math.Pow(series.ValueAt(i), factor)) + } + newName := fmt.Sprintf("pow(%s, %f)", series.Name(), factor) + results = append(results, ts.NewSeries(ctx, newName, series.StartTime(), vals)) + } + + r := ts.SeriesList(input) + r.Values = results + return r, nil +} + // logarithm takes one metric or a wildcard seriesList, and draws the y-axis in // logarithmic format. func logarithm(ctx *common.Context, input singlePathSpec, base int) (ts.SeriesList, error) { @@ -2471,6 +2493,7 @@ func init() { MustRegisterFunction(perSecond).WithDefaultParams(map[uint8]interface{}{ 2: math.NaN(), // maxValue }) + MustRegisterFunction(pow) MustRegisterFunction(powSeries) MustRegisterFunction(rangeOfSeries) MustRegisterFunction(randomWalkFunction).WithDefaultParams(map[uint8]interface{}{ diff --git a/src/query/graphite/native/builtin_functions_test.go b/src/query/graphite/native/builtin_functions_test.go index 41dfcd3a36..5521e93645 100644 --- a/src/query/graphite/native/builtin_functions_test.go +++ b/src/query/graphite/native/builtin_functions_test.go @@ -3193,6 +3193,47 @@ func TestConsolidateBy(t *testing.T) { require.NotNil(t, err) } +func TestPow(t *testing.T) { + var ( + ctx = common.NewTestContext() + millisPerStep = 10000 + output = []float64{1.0, 4.0, 9.0, 16.0, 25.0} + output2 = []float64{0.0, 4.0, 16.0, 36.0, 64.0} + ) + + defer func() { _ = ctx.Close() }() + + series := ts.NewSeries( + ctx, + "foo", + ctx.StartTime, + common.NewTestSeriesValues(ctx, millisPerStep, []float64{1.0, 2.0, 3.0, 4.0, 5.0}), + ) + results, err := pow(ctx, singlePathSpec{ + Values: []*ts.Series{series}, + }, 2) + require.Nil(t, err) + expected := common.TestSeries{Name: `pow(foo, 2.000000)`, Data: output} + require.Nil(t, err) + common.CompareOutputsAndExpected(t, millisPerStep, ctx.StartTime, + []common.TestSeries{expected}, results.Values) + + series2 := ts.NewSeries( + ctx, + "foo", + ctx.StartTime, + common.NewTestSeriesValues(ctx, millisPerStep, []float64{0.0, 2.0, 4.0, 6.0, 8.0}), + ) + results2, err := pow(ctx, singlePathSpec{ + Values: []*ts.Series{series, series2}, + }, 2) + require.Nil(t, err) + expected2 := common.TestSeries{Name: `pow(foo, 2.000000)`, Data: output2} + require.Nil(t, err) + common.CompareOutputsAndExpected(t, millisPerStep, ctx.StartTime, + []common.TestSeries{expected, expected2}, results2.Values) +} + func TestCumulative(t *testing.T) { ctx := common.NewTestContext() defer ctx.Close() @@ -3528,6 +3569,7 @@ func TestFunctionsRegistered(t *testing.T) { "offset", "offsetToZero", "perSecond", + "pow", "powSeries", "randomWalk", "randomWalkFunction", From 938782685af444855e4f0044d17cb26164bf58fa Mon Sep 17 00:00:00 2001 From: Gediminas Guoba Date: Thu, 21 Jan 2021 10:22:50 +0200 Subject: [PATCH 08/13] [tests] test setups exported to allow us to use it from other packages (#3042) --- .../integration/cluster_add_one_node_test.go | 12 ++-- .../commitlog_bootstrap_unowned_shard_test.go | 8 +-- src/dbnode/integration/integration.go | 58 +++++++++++-------- src/dbnode/integration/options.go | 17 ++++++ .../peers_bootstrap_high_concurrency_test.go | 12 ++-- .../peers_bootstrap_index_aggregate_test.go | 8 +-- .../integration/peers_bootstrap_index_test.go | 8 +-- .../peers_bootstrap_merge_local_test.go | 14 ++--- .../peers_bootstrap_merge_peer_blocks_test.go | 10 ++-- .../peers_bootstrap_node_down_test.go | 10 ++-- .../peers_bootstrap_none_available_test.go | 12 ++-- .../peers_bootstrap_select_best_test.go | 10 ++-- .../peers_bootstrap_simple_test.go | 8 +-- .../peers_bootstrap_single_node_test.go | 6 +- src/dbnode/integration/repair_test.go | 10 ++-- src/dbnode/integration/setup.go | 24 +++++--- 16 files changed, 131 insertions(+), 96 deletions(-) diff --git a/src/dbnode/integration/cluster_add_one_node_test.go b/src/dbnode/integration/cluster_add_one_node_test.go index d2b4c1beb9..1d04c92d25 100644 --- a/src/dbnode/integration/cluster_add_one_node_test.go +++ b/src/dbnode/integration/cluster_add_one_node_test.go @@ -112,17 +112,17 @@ func testClusterAddOneNode(t *testing.T, verifyCommitlogCanBootstrapAfterNodeJoi topoOpts := topology.NewDynamicOptions(). SetConfigServiceClient(fake.NewM3ClusterClient(svcs, nil)) topoInit := topology.NewDynamicInitializer(topoOpts) - setupOpts := []bootstrappableTestSetupOptions{ + setupOpts := []BootstrappableTestSetupOptions{ { - disablePeersBootstrapper: true, - topologyInitializer: topoInit, + DisablePeersBootstrapper: true, + TopologyInitializer: topoInit, }, { - disablePeersBootstrapper: false, - topologyInitializer: topoInit, + DisablePeersBootstrapper: false, + TopologyInitializer: topoInit, }, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node. diff --git a/src/dbnode/integration/commitlog_bootstrap_unowned_shard_test.go b/src/dbnode/integration/commitlog_bootstrap_unowned_shard_test.go index 74c4b19162..fc062f5b26 100644 --- a/src/dbnode/integration/commitlog_bootstrap_unowned_shard_test.go +++ b/src/dbnode/integration/commitlog_bootstrap_unowned_shard_test.go @@ -89,12 +89,12 @@ func TestCommitLogBootstrapUnownedShard(t *testing.T) { opts := NewTestOptions(t). SetNamespaces([]namespace.Metadata{ns1}). SetNumShards(numShards) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true, topologyInitializer: topoInit}, - {disablePeersBootstrapper: true, topologyInitializer: topoInit}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true, TopologyInitializer: topoInit}, + {DisablePeersBootstrapper: true, TopologyInitializer: topoInit}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Only set this up for the first setup because we're only writing commit diff --git a/src/dbnode/integration/integration.go b/src/dbnode/integration/integration.go index 1aa2d56d13..cdd307dbb1 100644 --- a/src/dbnode/integration/integration.go +++ b/src/dbnode/integration/integration.go @@ -93,6 +93,7 @@ func newMultiAddrAdminClient( topologyInitializer topology.Initializer, origin topology.Host, instrumentOpts instrument.Options, + customOpts ...client.CustomAdminOption, ) client.AdminClient { if adminOpts == nil { adminOpts = client.NewAdminOptions() @@ -105,22 +106,28 @@ func newMultiAddrAdminClient( SetTopologyInitializer(topologyInitializer). SetClusterConnectTimeout(time.Second).(client.AdminOptions) + for _, o := range customOpts { + adminOpts = o(adminOpts) + } + adminClient, err := client.NewAdminClient(adminOpts) require.NoError(t, err) return adminClient } -type bootstrappableTestSetupOptions struct { - finalBootstrapper string - bootstrapBlocksBatchSize int - bootstrapBlocksConcurrency int - bootstrapConsistencyLevel topology.ReadConsistencyLevel - topologyInitializer topology.Initializer - testStatsReporter xmetrics.TestStatsReporter - disablePeersBootstrapper bool - useTChannelClientForWriting bool - enableRepairs bool +// BootstrappableTestSetupOptions defines options for test setups. +type BootstrappableTestSetupOptions struct { + FinalBootstrapper string + BootstrapBlocksBatchSize int + BootstrapBlocksConcurrency int + BootstrapConsistencyLevel topology.ReadConsistencyLevel + TopologyInitializer topology.Initializer + TestStatsReporter xmetrics.TestStatsReporter + DisablePeersBootstrapper bool + UseTChannelClientForWriting bool + EnableRepairs bool + AdminClientCustomOpts []client.CustomAdminOption } type closeFn func() @@ -135,10 +142,11 @@ func newDefaulTestResultOptions( SetSeriesCachePolicy(storageOpts.SeriesCachePolicy()) } -func newDefaultBootstrappableTestSetups( +// NewDefaultBootstrappableTestSetups creates dbnode test setups. +func NewDefaultBootstrappableTestSetups( // nolint:gocyclo t *testing.T, opts TestOptions, - setupOpts []bootstrappableTestSetupOptions, + setupOpts []BootstrappableTestSetupOptions, ) (testSetups, closeFn) { var ( replicas = len(setupOpts) @@ -158,17 +166,18 @@ func newDefaultBootstrappableTestSetups( for i := 0; i < replicas; i++ { var ( instance = i - usingPeersBootstrapper = !setupOpts[i].disablePeersBootstrapper - finalBootstrapperToUse = setupOpts[i].finalBootstrapper - useTChannelClientForWriting = setupOpts[i].useTChannelClientForWriting - bootstrapBlocksBatchSize = setupOpts[i].bootstrapBlocksBatchSize - bootstrapBlocksConcurrency = setupOpts[i].bootstrapBlocksConcurrency - bootstrapConsistencyLevel = setupOpts[i].bootstrapConsistencyLevel - topologyInitializer = setupOpts[i].topologyInitializer - testStatsReporter = setupOpts[i].testStatsReporter - enableRepairs = setupOpts[i].enableRepairs + usingPeersBootstrapper = !setupOpts[i].DisablePeersBootstrapper + finalBootstrapperToUse = setupOpts[i].FinalBootstrapper + useTChannelClientForWriting = setupOpts[i].UseTChannelClientForWriting + bootstrapBlocksBatchSize = setupOpts[i].BootstrapBlocksBatchSize + bootstrapBlocksConcurrency = setupOpts[i].BootstrapBlocksConcurrency + bootstrapConsistencyLevel = setupOpts[i].BootstrapConsistencyLevel + topologyInitializer = setupOpts[i].TopologyInitializer + testStatsReporter = setupOpts[i].TestStatsReporter + enableRepairs = setupOpts[i].EnableRepairs origin topology.Host instanceOpts = newMultiAddrTestOptions(opts, instance) + adminClientCustomOpts = setupOpts[i].AdminClientCustomOpts ) if finalBootstrapperToUse == "" { @@ -211,7 +220,7 @@ func newDefaultBootstrappableTestSetups( // claim manager instances after the initial node. persistfs.ResetIndexClaimsManagersUnsafe() } - setup, err := NewTestSetup(t, instanceOpts, nil) + setup, err := NewTestSetup(t, instanceOpts, nil, opts.StorageOptsFn()) require.NoError(t, err) topologyInitializer = setup.TopologyInitializer() @@ -260,8 +269,11 @@ func newDefaultBootstrappableTestSetups( adminOpts = adminOpts.SetFetchSeriesBlocksBatchConcurrency(bootstrapBlocksConcurrency) } adminOpts = adminOpts.SetStreamBlocksRetrier(retrier) + adminClient := newMultiAddrAdminClient( - t, adminOpts, topologyInitializer, origin, instrumentOpts) + t, adminOpts, topologyInitializer, origin, instrumentOpts, adminClientCustomOpts...) + setup.SetStorageOpts(setup.StorageOpts().SetAdminClient(adminClient)) + storageIdxOpts := setup.StorageOpts().IndexOptions() fsOpts := setup.StorageOpts().CommitLogOptions().FilesystemOptions() if usingPeersBootstrapper { diff --git a/src/dbnode/integration/options.go b/src/dbnode/integration/options.go index 355b745c59..f2432bd0b4 100644 --- a/src/dbnode/integration/options.go +++ b/src/dbnode/integration/options.go @@ -293,6 +293,12 @@ type TestOptions interface { // ReportInterval returns the time between reporting metrics within the system. ReportInterval() time.Duration + + // SetStorageOptsFn sets the StorageOpts modifier. + SetStorageOptsFn(StorageOption) TestOptions + + // StorageOptsFn returns the StorageOpts modifier. + StorageOptsFn() StorageOption } type options struct { @@ -327,6 +333,7 @@ type options struct { assertEqual assertTestDataEqual nowFn func() time.Time reportInterval time.Duration + storageOptsFn StorageOption } // NewTestOptions returns a new set of integration test options. @@ -676,3 +683,13 @@ func (o *options) SetReportInterval(value time.Duration) TestOptions { func (o *options) ReportInterval() time.Duration { return o.reportInterval } + +func (o *options) SetStorageOptsFn(storageOptsFn StorageOption) TestOptions { + opts := *o + opts.storageOptsFn = storageOptsFn + return &opts +} + +func (o *options) StorageOptsFn() StorageOption { + return o.storageOptsFn +} diff --git a/src/dbnode/integration/peers_bootstrap_high_concurrency_test.go b/src/dbnode/integration/peers_bootstrap_high_concurrency_test.go index 0394153c95..98e9ffca8e 100644 --- a/src/dbnode/integration/peers_bootstrap_high_concurrency_test.go +++ b/src/dbnode/integration/peers_bootstrap_high_concurrency_test.go @@ -102,17 +102,17 @@ func testPeersBootstrapHighConcurrency( batchSize := 16 concurrency := 64 - setupOpts := []bootstrappableTestSetupOptions{ + setupOpts := []BootstrappableTestSetupOptions{ { - disablePeersBootstrapper: true, + DisablePeersBootstrapper: true, }, { - disablePeersBootstrapper: false, - bootstrapBlocksBatchSize: batchSize, - bootstrapBlocksConcurrency: concurrency, + DisablePeersBootstrapper: false, + BootstrapBlocksBatchSize: batchSize, + BootstrapBlocksConcurrency: concurrency, }, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node diff --git a/src/dbnode/integration/peers_bootstrap_index_aggregate_test.go b/src/dbnode/integration/peers_bootstrap_index_aggregate_test.go index 10a2d2890b..498fc538d6 100644 --- a/src/dbnode/integration/peers_bootstrap_index_aggregate_test.go +++ b/src/dbnode/integration/peers_bootstrap_index_aggregate_test.go @@ -65,11 +65,11 @@ func TestPeersBootstrapIndexAggregateQuery(t *testing.T) { SetUseTChannelClientForWriting(true). SetUseTChannelClientForReading(true) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node diff --git a/src/dbnode/integration/peers_bootstrap_index_test.go b/src/dbnode/integration/peers_bootstrap_index_test.go index d5907d9dc1..5b5e58761e 100644 --- a/src/dbnode/integration/peers_bootstrap_index_test.go +++ b/src/dbnode/integration/peers_bootstrap_index_test.go @@ -70,11 +70,11 @@ func TestPeersBootstrapIndexWithIndexingEnabled(t *testing.T) { SetUseTChannelClientForWriting(true). SetUseTChannelClientForReading(true) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node diff --git a/src/dbnode/integration/peers_bootstrap_merge_local_test.go b/src/dbnode/integration/peers_bootstrap_merge_local_test.go index b6638d7476..845ab95497 100644 --- a/src/dbnode/integration/peers_bootstrap_merge_local_test.go +++ b/src/dbnode/integration/peers_bootstrap_merge_local_test.go @@ -73,15 +73,15 @@ func testPeersBootstrapMergeLocal(t *testing.T, setTestOpts setTestOptions, upda // Enable useTchannelClientForWriting because this test relies upon being // able to write data to a single node, and the M3DB client does not support // that, but we can accomplish it by using an individual nodes TChannel endpoints. - setupOpts = []bootstrappableTestSetupOptions{ + setupOpts = []BootstrappableTestSetupOptions{ { - disablePeersBootstrapper: true, - useTChannelClientForWriting: true, + DisablePeersBootstrapper: true, + UseTChannelClientForWriting: true, }, { - disablePeersBootstrapper: false, - useTChannelClientForWriting: true, - testStatsReporter: reporter, + DisablePeersBootstrapper: false, + UseTChannelClientForWriting: true, + TestStatsReporter: reporter, }, } ) @@ -91,7 +91,7 @@ func testPeersBootstrapMergeLocal(t *testing.T, setTestOpts setTestOptions, upda namesp = opts.Namespaces()[0] } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node, ensure to overflow past diff --git a/src/dbnode/integration/peers_bootstrap_merge_peer_blocks_test.go b/src/dbnode/integration/peers_bootstrap_merge_peer_blocks_test.go index 40f1daca66..52e671f958 100644 --- a/src/dbnode/integration/peers_bootstrap_merge_peer_blocks_test.go +++ b/src/dbnode/integration/peers_bootstrap_merge_peer_blocks_test.go @@ -69,12 +69,12 @@ func testPeersBootstrapMergePeerBlocks(t *testing.T, setTestOpts setTestOptions, opts = setTestOpts(t, opts) namesp = opts.Namespaces()[0] } - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data alternating missing data for left/right nodes diff --git a/src/dbnode/integration/peers_bootstrap_node_down_test.go b/src/dbnode/integration/peers_bootstrap_node_down_test.go index 5518b1bb44..45c0f3753f 100644 --- a/src/dbnode/integration/peers_bootstrap_node_down_test.go +++ b/src/dbnode/integration/peers_bootstrap_node_down_test.go @@ -57,12 +57,12 @@ func TestPeersBootstrapNodeDown(t *testing.T) { SetUseTChannelClientForWriting(true). SetUseTChannelClientForReading(true) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node diff --git a/src/dbnode/integration/peers_bootstrap_none_available_test.go b/src/dbnode/integration/peers_bootstrap_none_available_test.go index b4d929c391..17a41c4c49 100644 --- a/src/dbnode/integration/peers_bootstrap_none_available_test.go +++ b/src/dbnode/integration/peers_bootstrap_none_available_test.go @@ -89,17 +89,17 @@ func TestPeersBootstrapNoneAvailable(t *testing.T) { SetShardSet(shardSet) topoInit := topology.NewStaticInitializer(topoOpts) - setupOpts := []bootstrappableTestSetupOptions{ + setupOpts := []BootstrappableTestSetupOptions{ { - disablePeersBootstrapper: false, - topologyInitializer: topoInit, + DisablePeersBootstrapper: false, + TopologyInitializer: topoInit, }, { - disablePeersBootstrapper: false, - topologyInitializer: topoInit, + DisablePeersBootstrapper: false, + TopologyInitializer: topoInit, }, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() serversAreUp := &sync.WaitGroup{} diff --git a/src/dbnode/integration/peers_bootstrap_select_best_test.go b/src/dbnode/integration/peers_bootstrap_select_best_test.go index 62efd41497..58caec15f2 100644 --- a/src/dbnode/integration/peers_bootstrap_select_best_test.go +++ b/src/dbnode/integration/peers_bootstrap_select_best_test.go @@ -56,12 +56,12 @@ func TestPeersBootstrapSelectBest(t *testing.T) { SetUseTChannelClientForWriting(true). SetUseTChannelClientForReading(true) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data alternating missing data for left/right nodes diff --git a/src/dbnode/integration/peers_bootstrap_simple_test.go b/src/dbnode/integration/peers_bootstrap_simple_test.go index 620bdc0b15..38bba77f9c 100644 --- a/src/dbnode/integration/peers_bootstrap_simple_test.go +++ b/src/dbnode/integration/peers_bootstrap_simple_test.go @@ -67,11 +67,11 @@ func testPeersBootstrapSimple(t *testing.T, setTestOpts setTestOptions, updateIn namesp = opts.Namespaces()[0] } - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true}, - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true}, + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data for first node diff --git a/src/dbnode/integration/peers_bootstrap_single_node_test.go b/src/dbnode/integration/peers_bootstrap_single_node_test.go index 288d3c293c..7e48530f8c 100644 --- a/src/dbnode/integration/peers_bootstrap_single_node_test.go +++ b/src/dbnode/integration/peers_bootstrap_single_node_test.go @@ -57,10 +57,10 @@ func TestPeersBootstrapSingleNode(t *testing.T) { SetUseTChannelClientForWriting(true). SetUseTChannelClientForReading(true) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: false}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: false}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Write test data diff --git a/src/dbnode/integration/repair_test.go b/src/dbnode/integration/repair_test.go index b3a93e7bca..d469773777 100644 --- a/src/dbnode/integration/repair_test.go +++ b/src/dbnode/integration/repair_test.go @@ -196,12 +196,12 @@ func testRepair( SetUseTChannelClientForWriting(true). SetUseTChannelClientForReading(true) - setupOpts := []bootstrappableTestSetupOptions{ - {disablePeersBootstrapper: true, enableRepairs: true}, - {disablePeersBootstrapper: true, enableRepairs: true}, - {disablePeersBootstrapper: true, enableRepairs: true}, + setupOpts := []BootstrappableTestSetupOptions{ + {DisablePeersBootstrapper: true, EnableRepairs: true}, + {DisablePeersBootstrapper: true, EnableRepairs: true}, + {DisablePeersBootstrapper: true, EnableRepairs: true}, } - setups, closeFn := newDefaultBootstrappableTestSetups(t, opts, setupOpts) + setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) defer closeFn() // Ensure that the current time is set such that the previous block is flushable. diff --git a/src/dbnode/integration/setup.go b/src/dbnode/integration/setup.go index e4d27478cf..3c4c05ce6c 100644 --- a/src/dbnode/integration/setup.go +++ b/src/dbnode/integration/setup.go @@ -186,14 +186,15 @@ type TestSetup interface { InitializeBootstrappers(opts InitializeBootstrappersOptions) error } -type storageOption func(storage.Options) storage.Options +// StorageOption is a reference to storage options function. +type StorageOption func(storage.Options) storage.Options // NewTestSetup returns a new test setup for non-dockerized integration tests. func NewTestSetup( t *testing.T, opts TestOptions, fsOpts fs.Options, - storageOptFns ...storageOption, + storageOptFns ...StorageOption, ) (TestSetup, error) { if opts == nil { opts = NewTestOptions(t) @@ -465,7 +466,12 @@ func NewTestSetup( } for _, fn := range storageOptFns { - storageOpts = fn(storageOpts) + if fn != nil { + storageOpts = fn(storageOpts) + } + } + if storageOpts != nil && storageOpts.AdminClient() == nil { + storageOpts = storageOpts.SetAdminClient(adminClient) } return &testSetup{ @@ -1098,18 +1104,18 @@ func newNodes( SetConfigServiceClient(fake.NewM3ClusterClient(svcs, nil)) topoInit := topology.NewDynamicInitializer(topoOpts) - nodeOpt := bootstrappableTestSetupOptions{ - disablePeersBootstrapper: true, - finalBootstrapper: bootstrapper.NoOpAllBootstrapperName, - topologyInitializer: topoInit, + nodeOpt := BootstrappableTestSetupOptions{ + DisablePeersBootstrapper: true, + FinalBootstrapper: bootstrapper.NoOpAllBootstrapperName, + TopologyInitializer: topoInit, } - nodeOpts := make([]bootstrappableTestSetupOptions, len(instances)) + nodeOpts := make([]BootstrappableTestSetupOptions, len(instances)) for i := range instances { nodeOpts[i] = nodeOpt } - nodes, closeFn := newDefaultBootstrappableTestSetups(t, opts, nodeOpts) + nodes, closeFn := NewDefaultBootstrappableTestSetups(t, opts, nodeOpts) nodeClose := func() { // Clean up running servers at end of test log.Debug("servers closing") From 188655d18212ec24f08c47edc620c27ba6f209f5 Mon Sep 17 00:00:00 2001 From: Ryan Allen Date: Thu, 21 Jan 2021 11:04:37 -0500 Subject: [PATCH 09/13] Add support for dynamic query limit overriding (#3090) --- go.mod | 1 + .../operational_guide/resource_limits.md | 63 + src/cluster/generated/proto/kvpb/kv.pb.go | 1097 +++++++++++++++++ src/cluster/generated/proto/kvpb/kv.proto | 46 + src/dbnode/generated/thrift/rpc/rpc.go | 2 +- src/dbnode/kvconfig/keys.go | 3 + src/dbnode/persist/fs/retriever.go | 5 +- src/dbnode/persist/fs/retriever_test.go | 6 +- src/dbnode/server/server.go | 96 ++ .../storage/limits/noop_query_limits.go | 8 + src/dbnode/storage/limits/query_limits.go | 154 ++- .../storage/limits/query_limits_test.go | 70 +- src/dbnode/storage/limits/types.go | 7 + src/query/api/v1/handler/database/common.go | 10 + src/query/api/v1/handler/database/kvstore.go | 204 +++ .../api/v1/handler/database/kvstore_test.go | 186 +++ 16 files changed, 1902 insertions(+), 56 deletions(-) create mode 100644 src/cluster/generated/proto/kvpb/kv.pb.go create mode 100644 src/cluster/generated/proto/kvpb/kv.proto create mode 100644 src/query/api/v1/handler/database/kvstore.go create mode 100644 src/query/api/v1/handler/database/kvstore_test.go diff --git a/go.mod b/go.mod index 7cec7ccf03..b223fe63be 100644 --- a/go.mod +++ b/go.mod @@ -110,6 +110,7 @@ require ( golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634 golang.org/x/tools v0.0.0-20201013201025-64a9e34f3752 google.golang.org/grpc v1.29.1 + google.golang.org/protobuf v1.23.0 gopkg.in/go-ini/ini.v1 v1.57.0 // indirect gopkg.in/go-playground/assert.v1 v1.2.1 // indirect gopkg.in/go-playground/validator.v9 v9.7.0 diff --git a/site/content/operational_guide/resource_limits.md b/site/content/operational_guide/resource_limits.md index f19268009f..3b6041388c 100644 --- a/site/content/operational_guide/resource_limits.md +++ b/site/content/operational_guide/resource_limits.md @@ -53,6 +53,15 @@ per second safely with your deployment and you want to use the default lookback of `15s` then you would multiply 10,000 by 15 to get 150,000 as a max value with a 15s lookback. +The third limit `maxRecentlyQueriedSeriesDiskRead` caps the series IDs matched by incoming +queries. This originally was distinct from the limit `maxRecentlyQueriedSeriesBlocks`, which +also limits the memory cost of specific series matched, because of an inefficiency +in how allocations would occur even for series known to not be present on disk for a given +shard. This inefficiency has been resolved https://github.com/m3db/m3/pull/3103 and therefore +this limit should be tracking memory cost linearly relative to `maxRecentlyQueriedSeriesBlocks`. +It is recommended to defer to using `maxRecentlyQueriedSeriesBlocks` over +`maxRecentlyQueriedSeriesDiskRead` given both should cap the resources similarly. + ### Annotated configuration ```yaml @@ -82,6 +91,18 @@ limits: # and read until the lookback period resets. lookback: 15s + # If set, will enforce a maximum on the series read from disk. + # This limit can be used to ensure queries that match an extremely high + # volume of series can be limited before even reading the underlying series data from disk. + maxRecentlyQueriedSeriesDiskRead: + # Value sets the maximum number of series read from disk. + value: 0 + # Lookback sets the time window that this limit is enforced over, every + # lookback period the global count is reset to zero and when the limit + # is reached it will reject any further time series blocks being matched + # and read until the lookback period resets. + lookback: 15s + # If set then will limit the number of parallel write batch requests to the # database and return errors if hit. maxOutstandingWriteRequests: 0 @@ -94,6 +115,48 @@ limits: maxOutstandingReadRequests: 0 ``` +### Dynamic configuration + +Query limits can be dynamically driven by etcd to adjust limits without redeploying. By updating the `m3db.query.limits` key in etcd, specific limits can be overriden. M3Coordinator exposes an API for updating etcd key/value pairs and so this API can be used for modifying these dynamic overrides. For example, + +``` +curl -vvvsSf -X POST 0.0.0.0:7201/api/v1/kvstore -d '{ + "key": "m3db.query.limits", + "value":{ + "maxRecentlyQueriedSeriesDiskBytesRead": { + "limit":0, + "lookbackSeconds":15, + "forceExceeded":false + }, + "maxRecentlyQueriedSeriesBlocks": { + "limit":0, + "lookbackSeconds":15, + "forceExceeded":false + }, + "maxRecentlyQueriedSeriesDiskRead": { + "limit":0, + "lookbackSeconds":15, + "forceExceeded":false + } + }, + "commit":true +}' +``` + +To remove all overrides, omit all limits from the `value` +``` +curl -vvvsSf -X POST 0.0.0.0:7201/api/v1/kvstore -d '{ + "key": "m3db.query.limits", + "value":{}, + "commit":true +}' +``` + +Usage notes: +- Setting the `commit` flag to false allows for dry-run API calls to see the old and new limits that would be applied. +- Omitting a limit from the `value` results in that limit to be driven by the config-based settings. +- The `forceExceeded` flag makes the limit behave as though it is permanently exceeded, thus failing all queries. This is useful for dynamically shutting down all queries in cases where load may be exceeding provisioned resources. + ## M3 Query and M3 Coordinator ### Deployment diff --git a/src/cluster/generated/proto/kvpb/kv.pb.go b/src/cluster/generated/proto/kvpb/kv.pb.go new file mode 100644 index 0000000000..d3c7c43f87 --- /dev/null +++ b/src/cluster/generated/proto/kvpb/kv.pb.go @@ -0,0 +1,1097 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: github.com/m3db/m3/src/cluster/generated/proto/kvpb/kv.proto + +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/* + Package kvpb is a generated protocol buffer package. + + It is generated from these files: + github.com/m3db/m3/src/cluster/generated/proto/kvpb/kv.proto + + It has these top-level messages: + KeyValueUpdate + KeyValueUpdateResult + QueryLimits + QueryLimit +*/ +package kvpb + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type KeyValueUpdate struct { + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + Commit bool `protobuf:"varint,3,opt,name=commit,proto3" json:"commit,omitempty"` +} + +func (m *KeyValueUpdate) Reset() { *m = KeyValueUpdate{} } +func (m *KeyValueUpdate) String() string { return proto.CompactTextString(m) } +func (*KeyValueUpdate) ProtoMessage() {} +func (*KeyValueUpdate) Descriptor() ([]byte, []int) { return fileDescriptorKv, []int{0} } + +func (m *KeyValueUpdate) GetKey() string { + if m != nil { + return m.Key + } + return "" +} + +func (m *KeyValueUpdate) GetValue() string { + if m != nil { + return m.Value + } + return "" +} + +func (m *KeyValueUpdate) GetCommit() bool { + if m != nil { + return m.Commit + } + return false +} + +type KeyValueUpdateResult struct { + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Old string `protobuf:"bytes,2,opt,name=old,proto3" json:"old,omitempty"` + New string `protobuf:"bytes,3,opt,name=new,proto3" json:"new,omitempty"` +} + +func (m *KeyValueUpdateResult) Reset() { *m = KeyValueUpdateResult{} } +func (m *KeyValueUpdateResult) String() string { return proto.CompactTextString(m) } +func (*KeyValueUpdateResult) ProtoMessage() {} +func (*KeyValueUpdateResult) Descriptor() ([]byte, []int) { return fileDescriptorKv, []int{1} } + +func (m *KeyValueUpdateResult) GetKey() string { + if m != nil { + return m.Key + } + return "" +} + +func (m *KeyValueUpdateResult) GetOld() string { + if m != nil { + return m.Old + } + return "" +} + +func (m *KeyValueUpdateResult) GetNew() string { + if m != nil { + return m.New + } + return "" +} + +type QueryLimits struct { + MaxRecentlyQueriedSeriesBlocks *QueryLimit `protobuf:"bytes,1,opt,name=maxRecentlyQueriedSeriesBlocks" json:"maxRecentlyQueriedSeriesBlocks,omitempty"` + MaxRecentlyQueriedSeriesDiskBytesRead *QueryLimit `protobuf:"bytes,2,opt,name=maxRecentlyQueriedSeriesDiskBytesRead" json:"maxRecentlyQueriedSeriesDiskBytesRead,omitempty"` + MaxRecentlyQueriedSeriesDiskRead *QueryLimit `protobuf:"bytes,3,opt,name=maxRecentlyQueriedSeriesDiskRead" json:"maxRecentlyQueriedSeriesDiskRead,omitempty"` +} + +func (m *QueryLimits) Reset() { *m = QueryLimits{} } +func (m *QueryLimits) String() string { return proto.CompactTextString(m) } +func (*QueryLimits) ProtoMessage() {} +func (*QueryLimits) Descriptor() ([]byte, []int) { return fileDescriptorKv, []int{2} } + +func (m *QueryLimits) GetMaxRecentlyQueriedSeriesBlocks() *QueryLimit { + if m != nil { + return m.MaxRecentlyQueriedSeriesBlocks + } + return nil +} + +func (m *QueryLimits) GetMaxRecentlyQueriedSeriesDiskBytesRead() *QueryLimit { + if m != nil { + return m.MaxRecentlyQueriedSeriesDiskBytesRead + } + return nil +} + +func (m *QueryLimits) GetMaxRecentlyQueriedSeriesDiskRead() *QueryLimit { + if m != nil { + return m.MaxRecentlyQueriedSeriesDiskRead + } + return nil +} + +type QueryLimit struct { + Limit int64 `protobuf:"varint,1,opt,name=limit,proto3" json:"limit,omitempty"` + LookbackSeconds int64 `protobuf:"varint,2,opt,name=lookbackSeconds,proto3" json:"lookbackSeconds,omitempty"` + ForceExceeded bool `protobuf:"varint,3,opt,name=forceExceeded,proto3" json:"forceExceeded,omitempty"` +} + +func (m *QueryLimit) Reset() { *m = QueryLimit{} } +func (m *QueryLimit) String() string { return proto.CompactTextString(m) } +func (*QueryLimit) ProtoMessage() {} +func (*QueryLimit) Descriptor() ([]byte, []int) { return fileDescriptorKv, []int{3} } + +func (m *QueryLimit) GetLimit() int64 { + if m != nil { + return m.Limit + } + return 0 +} + +func (m *QueryLimit) GetLookbackSeconds() int64 { + if m != nil { + return m.LookbackSeconds + } + return 0 +} + +func (m *QueryLimit) GetForceExceeded() bool { + if m != nil { + return m.ForceExceeded + } + return false +} + +func init() { + proto.RegisterType((*KeyValueUpdate)(nil), "kvpb.KeyValueUpdate") + proto.RegisterType((*KeyValueUpdateResult)(nil), "kvpb.KeyValueUpdateResult") + proto.RegisterType((*QueryLimits)(nil), "kvpb.QueryLimits") + proto.RegisterType((*QueryLimit)(nil), "kvpb.QueryLimit") +} +func (m *KeyValueUpdate) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *KeyValueUpdate) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Key) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintKv(dAtA, i, uint64(len(m.Key))) + i += copy(dAtA[i:], m.Key) + } + if len(m.Value) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintKv(dAtA, i, uint64(len(m.Value))) + i += copy(dAtA[i:], m.Value) + } + if m.Commit { + dAtA[i] = 0x18 + i++ + if m.Commit { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func (m *KeyValueUpdateResult) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *KeyValueUpdateResult) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Key) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintKv(dAtA, i, uint64(len(m.Key))) + i += copy(dAtA[i:], m.Key) + } + if len(m.Old) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintKv(dAtA, i, uint64(len(m.Old))) + i += copy(dAtA[i:], m.Old) + } + if len(m.New) > 0 { + dAtA[i] = 0x1a + i++ + i = encodeVarintKv(dAtA, i, uint64(len(m.New))) + i += copy(dAtA[i:], m.New) + } + return i, nil +} + +func (m *QueryLimits) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *QueryLimits) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.MaxRecentlyQueriedSeriesBlocks != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintKv(dAtA, i, uint64(m.MaxRecentlyQueriedSeriesBlocks.Size())) + n1, err := m.MaxRecentlyQueriedSeriesBlocks.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + } + if m.MaxRecentlyQueriedSeriesDiskBytesRead != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintKv(dAtA, i, uint64(m.MaxRecentlyQueriedSeriesDiskBytesRead.Size())) + n2, err := m.MaxRecentlyQueriedSeriesDiskBytesRead.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n2 + } + if m.MaxRecentlyQueriedSeriesDiskRead != nil { + dAtA[i] = 0x1a + i++ + i = encodeVarintKv(dAtA, i, uint64(m.MaxRecentlyQueriedSeriesDiskRead.Size())) + n3, err := m.MaxRecentlyQueriedSeriesDiskRead.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n3 + } + return i, nil +} + +func (m *QueryLimit) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *QueryLimit) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Limit != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintKv(dAtA, i, uint64(m.Limit)) + } + if m.LookbackSeconds != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintKv(dAtA, i, uint64(m.LookbackSeconds)) + } + if m.ForceExceeded { + dAtA[i] = 0x18 + i++ + if m.ForceExceeded { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func encodeVarintKv(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *KeyValueUpdate) Size() (n int) { + var l int + _ = l + l = len(m.Key) + if l > 0 { + n += 1 + l + sovKv(uint64(l)) + } + l = len(m.Value) + if l > 0 { + n += 1 + l + sovKv(uint64(l)) + } + if m.Commit { + n += 2 + } + return n +} + +func (m *KeyValueUpdateResult) Size() (n int) { + var l int + _ = l + l = len(m.Key) + if l > 0 { + n += 1 + l + sovKv(uint64(l)) + } + l = len(m.Old) + if l > 0 { + n += 1 + l + sovKv(uint64(l)) + } + l = len(m.New) + if l > 0 { + n += 1 + l + sovKv(uint64(l)) + } + return n +} + +func (m *QueryLimits) Size() (n int) { + var l int + _ = l + if m.MaxRecentlyQueriedSeriesBlocks != nil { + l = m.MaxRecentlyQueriedSeriesBlocks.Size() + n += 1 + l + sovKv(uint64(l)) + } + if m.MaxRecentlyQueriedSeriesDiskBytesRead != nil { + l = m.MaxRecentlyQueriedSeriesDiskBytesRead.Size() + n += 1 + l + sovKv(uint64(l)) + } + if m.MaxRecentlyQueriedSeriesDiskRead != nil { + l = m.MaxRecentlyQueriedSeriesDiskRead.Size() + n += 1 + l + sovKv(uint64(l)) + } + return n +} + +func (m *QueryLimit) Size() (n int) { + var l int + _ = l + if m.Limit != 0 { + n += 1 + sovKv(uint64(m.Limit)) + } + if m.LookbackSeconds != 0 { + n += 1 + sovKv(uint64(m.LookbackSeconds)) + } + if m.ForceExceeded { + n += 2 + } + return n +} + +func sovKv(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozKv(x uint64) (n int) { + return sovKv(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *KeyValueUpdate) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: KeyValueUpdate: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: KeyValueUpdate: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Key = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Value = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Commit", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Commit = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipKv(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthKv + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *KeyValueUpdateResult) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: KeyValueUpdateResult: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: KeyValueUpdateResult: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Key = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Old", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Old = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field New", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.New = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipKv(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthKv + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *QueryLimits) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: QueryLimits: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: QueryLimits: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxRecentlyQueriedSeriesBlocks", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.MaxRecentlyQueriedSeriesBlocks == nil { + m.MaxRecentlyQueriedSeriesBlocks = &QueryLimit{} + } + if err := m.MaxRecentlyQueriedSeriesBlocks.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxRecentlyQueriedSeriesDiskBytesRead", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.MaxRecentlyQueriedSeriesDiskBytesRead == nil { + m.MaxRecentlyQueriedSeriesDiskBytesRead = &QueryLimit{} + } + if err := m.MaxRecentlyQueriedSeriesDiskBytesRead.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxRecentlyQueriedSeriesDiskRead", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.MaxRecentlyQueriedSeriesDiskRead == nil { + m.MaxRecentlyQueriedSeriesDiskRead = &QueryLimit{} + } + if err := m.MaxRecentlyQueriedSeriesDiskRead.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipKv(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthKv + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *QueryLimit) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: QueryLimit: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: QueryLimit: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + m.Limit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Limit |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field LookbackSeconds", wireType) + } + m.LookbackSeconds = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.LookbackSeconds |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ForceExceeded", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.ForceExceeded = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipKv(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthKv + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipKv(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowKv + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowKv + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowKv + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthKv + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowKv + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipKv(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthKv = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowKv = fmt.Errorf("proto: integer overflow") +) + +func init() { + proto.RegisterFile("github.com/m3db/m3/src/cluster/generated/proto/kvpb/kv.proto", fileDescriptorKv) +} + +var fileDescriptorKv = []byte{ + // 361 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x52, 0xcf, 0x6e, 0xda, 0x30, + 0x18, 0x5f, 0xc8, 0x86, 0xc6, 0x87, 0xb6, 0x45, 0x16, 0x9a, 0x38, 0x45, 0x28, 0xda, 0x24, 0x4e, + 0xb1, 0x34, 0xae, 0x3b, 0xa1, 0xed, 0x54, 0x0e, 0xad, 0x51, 0xab, 0x1e, 0x7a, 0x49, 0xec, 0x0f, + 0x1a, 0xc5, 0x89, 0x51, 0xec, 0x50, 0xf2, 0x16, 0x7d, 0x91, 0xbe, 0x47, 0x8f, 0x7d, 0x84, 0x8a, + 0xbe, 0x48, 0x65, 0x83, 0x84, 0xa8, 0xa0, 0xf4, 0x12, 0x7d, 0xbf, 0x5f, 0x7e, 0x7f, 0xe2, 0x7c, + 0x86, 0xbf, 0xf3, 0xcc, 0xdc, 0xd6, 0x69, 0xcc, 0x55, 0x41, 0x8b, 0x91, 0x48, 0x69, 0x31, 0xa2, + 0xba, 0xe2, 0x94, 0xcb, 0x5a, 0x1b, 0xac, 0xe8, 0x1c, 0x4b, 0xac, 0x12, 0x83, 0x82, 0x2e, 0x2a, + 0x65, 0x14, 0xcd, 0x97, 0x8b, 0x94, 0xe6, 0xcb, 0xd8, 0x21, 0xf2, 0xd9, 0xc2, 0xe8, 0x1c, 0xbe, + 0x9f, 0x61, 0x73, 0x95, 0xc8, 0x1a, 0x2f, 0x17, 0x22, 0x31, 0x48, 0x02, 0xf0, 0x73, 0x6c, 0xfa, + 0xde, 0xc0, 0x1b, 0x76, 0x98, 0x1d, 0x49, 0x0f, 0xbe, 0x2c, 0xad, 0xa0, 0xdf, 0x72, 0xdc, 0x06, + 0x90, 0x9f, 0xd0, 0xe6, 0xaa, 0x28, 0x32, 0xd3, 0xf7, 0x07, 0xde, 0xf0, 0x2b, 0xdb, 0xa2, 0x68, + 0x02, 0xbd, 0xfd, 0x44, 0x86, 0xba, 0x96, 0xe6, 0x40, 0x6e, 0x00, 0xbe, 0x92, 0x62, 0x9b, 0x6a, + 0x47, 0xcb, 0x94, 0x78, 0xe7, 0x02, 0x3b, 0xcc, 0x8e, 0xd1, 0x43, 0x0b, 0xba, 0x17, 0x35, 0x56, + 0xcd, 0x24, 0x2b, 0x32, 0xa3, 0xc9, 0x35, 0x84, 0x45, 0xb2, 0x62, 0xc8, 0xb1, 0x34, 0xb2, 0xb1, + 0x6f, 0x32, 0x14, 0x53, 0xfb, 0xd4, 0x63, 0xa9, 0x78, 0xae, 0x5d, 0x41, 0xf7, 0x4f, 0x10, 0xdb, + 0xe3, 0xc5, 0x3b, 0x2b, 0x3b, 0xe1, 0x23, 0x33, 0xf8, 0x7d, 0x4c, 0xf1, 0x2f, 0xd3, 0xf9, 0xb8, + 0x31, 0xa8, 0x19, 0x26, 0x9b, 0xef, 0x3d, 0x54, 0xf0, 0x31, 0x3b, 0xb9, 0x81, 0xc1, 0x7b, 0x42, + 0x57, 0xe1, 0x1f, 0xa9, 0x38, 0xe9, 0x8c, 0x2a, 0x80, 0x9d, 0xde, 0x6e, 0x4e, 0xda, 0xc1, 0xfd, + 0x14, 0x9f, 0x6d, 0x00, 0x19, 0xc2, 0x0f, 0xa9, 0x54, 0x9e, 0x26, 0x3c, 0x9f, 0x22, 0x57, 0xa5, + 0xd0, 0xee, 0x4c, 0x3e, 0x7b, 0x4b, 0x93, 0x5f, 0xf0, 0x6d, 0xa6, 0x2a, 0x8e, 0xff, 0x57, 0x1c, + 0x51, 0xa0, 0xd8, 0xae, 0x7a, 0x9f, 0x1c, 0x07, 0x8f, 0xeb, 0xd0, 0x7b, 0x5a, 0x87, 0xde, 0xf3, + 0x3a, 0xf4, 0xee, 0x5f, 0xc2, 0x4f, 0x69, 0xdb, 0x5d, 0xb1, 0xd1, 0x6b, 0x00, 0x00, 0x00, 0xff, + 0xff, 0x4e, 0xb0, 0xcd, 0x62, 0xa2, 0x02, 0x00, 0x00, +} diff --git a/src/cluster/generated/proto/kvpb/kv.proto b/src/cluster/generated/proto/kvpb/kv.proto new file mode 100644 index 0000000000..ef2b2f60d5 --- /dev/null +++ b/src/cluster/generated/proto/kvpb/kv.proto @@ -0,0 +1,46 @@ +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +syntax = "proto3"; + +package kvpb; + +message KeyValueUpdate { + string key = 1; + string value = 2; + bool commit = 3; +} + +message KeyValueUpdateResult { + string key = 1; + string old = 2; + string new = 3; +} + +message QueryLimits { + QueryLimit maxRecentlyQueriedSeriesBlocks = 1; + QueryLimit maxRecentlyQueriedSeriesDiskBytesRead = 2; + QueryLimit maxRecentlyQueriedSeriesDiskRead = 3; +} + +message QueryLimit { + int64 limit = 1; + int64 lookbackSeconds = 2; + bool forceExceeded = 3; +} diff --git a/src/dbnode/generated/thrift/rpc/rpc.go b/src/dbnode/generated/thrift/rpc/rpc.go index 678aa3b183..ea8421cec3 100644 --- a/src/dbnode/generated/thrift/rpc/rpc.go +++ b/src/dbnode/generated/thrift/rpc/rpc.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/dbnode/kvconfig/keys.go b/src/dbnode/kvconfig/keys.go index e607be8d38..7d90d48cab 100644 --- a/src/dbnode/kvconfig/keys.go +++ b/src/dbnode/kvconfig/keys.go @@ -49,4 +49,7 @@ const ( // ClientWriteConsistencyLevel is the KV config key for the runtime // configuration specifying the client write consistency level ClientWriteConsistencyLevel = "m3db.client.write-consistency-level" + + // QueryLimits is the KV config key for query limits enforced on each dbnode. + QueryLimits = "m3db.query.limits" ) diff --git a/src/dbnode/persist/fs/retriever.go b/src/dbnode/persist/fs/retriever.go index 535ca71df9..93dcce34e8 100644 --- a/src/dbnode/persist/fs/retriever.go +++ b/src/dbnode/persist/fs/retriever.go @@ -595,7 +595,6 @@ func (r *blockRetriever) streamRequest( shard uint32, id ident.ID, startTime time.Time, - nsCtx namespace.Context, ) error { req.resultWg.Add(1) if err := r.queryLimits.DiskSeriesReadLimit().Inc(1, req.source); err != nil { @@ -672,7 +671,7 @@ func (r *blockRetriever) Stream( } } - err = r.streamRequest(ctx, req, shard, id, startTime, nsCtx) + err = r.streamRequest(ctx, req, shard, id, startTime) if err != nil { req.resultWg.Done() return xio.EmptyBlockReader, err @@ -708,7 +707,7 @@ func (r *blockRetriever) StreamWideEntry( req.streamReqType = streamWideEntryReq req.wideFilter = filter - err = r.streamRequest(ctx, req, shard, id, startTime, nsCtx) + err = r.streamRequest(ctx, req, shard, id, startTime) if err != nil { req.resultWg.Done() return block.EmptyStreamedWideEntry, err diff --git a/src/dbnode/persist/fs/retriever_test.go b/src/dbnode/persist/fs/retriever_test.go index 5e7178050a..0fdca0196c 100644 --- a/src/dbnode/persist/fs/retriever_test.go +++ b/src/dbnode/persist/fs/retriever_test.go @@ -820,7 +820,7 @@ func TestLimitSeriesReadFromDisk(t *testing.T) { SetBytesReadLimitOpts(limits.DefaultLookbackLimitOptions()). SetDocsLimitOpts(limits.DefaultLookbackLimitOptions()). SetDiskSeriesReadLimitOpts(limits.LookbackLimitOptions{ - Limit: 1, + Limit: 2, Lookback: time.Second * 1, }) queryLimits, err := limits.NewQueryLimits(limitOpts) @@ -833,8 +833,8 @@ func TestLimitSeriesReadFromDisk(t *testing.T) { require.NoError(t, err) req := &retrieveRequest{} retriever := publicRetriever.(*blockRetriever) - _ = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now(), namespace.Context{}) - err = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now(), namespace.Context{}) + _ = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now()) + err = retriever.streamRequest(context.NewContext(), req, 0, ident.StringID("id"), time.Now()) require.Error(t, err) require.Contains(t, err.Error(), "query aborted due to limit") diff --git a/src/dbnode/server/server.go b/src/dbnode/server/server.go index 7b50475ec1..1a9e5cd378 100644 --- a/src/dbnode/server/server.go +++ b/src/dbnode/server/server.go @@ -39,6 +39,7 @@ import ( clusterclient "github.com/m3db/m3/src/cluster/client" "github.com/m3db/m3/src/cluster/client/etcd" "github.com/m3db/m3/src/cluster/generated/proto/commonpb" + "github.com/m3db/m3/src/cluster/generated/proto/kvpb" "github.com/m3db/m3/src/cluster/kv" "github.com/m3db/m3/src/cmd/services/m3dbnode/config" queryconfig "github.com/m3db/m3/src/cmd/services/m3query/config" @@ -993,6 +994,7 @@ func Run(runOpts RunOptions) { runtimeOptsMgr, cfg.Limits.WriteNewSeriesPerSecond) kvWatchEncodersPerBlockLimit(syncCfg.KVStore, logger, runtimeOptsMgr, cfg.Limits.MaxEncodersPerBlock) + kvWatchQueryLimit(syncCfg.KVStore, logger, queryLimits, limitOpts) }() // Wait for process interrupt. @@ -1165,6 +1167,100 @@ func kvWatchEncodersPerBlockLimit( }() } +func kvWatchQueryLimit( + store kv.Store, + logger *zap.Logger, + limits limits.QueryLimits, + defaultOpts limits.Options, +) { + value, err := store.Get(kvconfig.QueryLimits) + if err == nil { + dynamicLimits := &kvpb.QueryLimits{} + err = value.Unmarshal(dynamicLimits) + if err == nil { + updateQueryLimits(logger, limits, dynamicLimits, defaultOpts) + } + } else if !errors.Is(err, kv.ErrNotFound) { + logger.Warn("error resolving query limit", zap.Error(err)) + } + + watch, err := store.Watch(kvconfig.QueryLimits) + if err != nil { + logger.Error("could not watch query limit", zap.Error(err)) + return + } + + go func() { + dynamicLimits := &kvpb.QueryLimits{} + for range watch.C() { + if newValue := watch.Get(); newValue != nil { + if err := newValue.Unmarshal(dynamicLimits); err != nil { + logger.Warn("unable to parse new query limits", zap.Error(err)) + continue + } + updateQueryLimits(logger, limits, dynamicLimits, defaultOpts) + } + } + }() +} + +func updateQueryLimits(logger *zap.Logger, + queryLimits limits.QueryLimits, + dynamicOpts *kvpb.QueryLimits, + configOpts limits.Options, +) { + var ( + // Default to the config-based limits if unset in dynamic limits. + // Otherwise, use the dynamic limit. + docsLimitOpts = configOpts.DocsLimitOpts() + diskSeriesReadLimitOpts = configOpts.DiskSeriesReadLimitOpts() + bytesReadLimitOpts = configOpts.BytesReadLimitOpts() + ) + if dynamicOpts != nil { + if dynamicOpts.MaxRecentlyQueriedSeriesBlocks != nil { + docsLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesBlocks) + } + if dynamicOpts.MaxRecentlyQueriedSeriesDiskRead != nil { + diskSeriesReadLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesDiskRead) + } + if dynamicOpts.MaxRecentlyQueriedSeriesDiskBytesRead != nil { + bytesReadLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesDiskBytesRead) + } + } + + if err := updateQueryLimit(queryLimits.DocsLimit(), docsLimitOpts); err != nil { + logger.Error("error updating docs limit", zap.Error(err)) + } + + if err := updateQueryLimit(queryLimits.DiskSeriesReadLimit(), diskSeriesReadLimitOpts); err != nil { + logger.Error("error updating series read limit", zap.Error(err)) + } + + if err := updateQueryLimit(queryLimits.BytesReadLimit(), bytesReadLimitOpts); err != nil { + logger.Error("error updating bytes read limit", zap.Error(err)) + } +} + +func updateQueryLimit( + limit limits.LookbackLimit, + newOpts limits.LookbackLimitOptions, +) error { + old := limit.Options() + if old.Equals(newOpts) { + return nil + } + + return limit.Update(newOpts) +} + +func dynamicLimitToLimitOpts(dynamicLimit *kvpb.QueryLimit) limits.LookbackLimitOptions { + return limits.LookbackLimitOptions{ + Limit: dynamicLimit.Limit, + Lookback: time.Duration(dynamicLimit.LookbackSeconds) * time.Second, + ForceExceeded: dynamicLimit.ForceExceeded, + } +} + func kvWatchClientConsistencyLevels( store kv.Store, logger *zap.Logger, diff --git a/src/dbnode/storage/limits/noop_query_limits.go b/src/dbnode/storage/limits/noop_query_limits.go index cf0a9497a6..b1c1787254 100644 --- a/src/dbnode/storage/limits/noop_query_limits.go +++ b/src/dbnode/storage/limits/noop_query_limits.go @@ -58,6 +58,14 @@ func (q *noOpQueryLimits) Stop() { func (q *noOpQueryLimits) Start() { } +func (q *noOpLookbackLimit) Options() LookbackLimitOptions { + return LookbackLimitOptions{} +} + +func (q *noOpLookbackLimit) Update(LookbackLimitOptions) error { + return nil +} + func (q *noOpLookbackLimit) Inc(int, []byte) error { return nil } diff --git a/src/dbnode/storage/limits/query_limits.go b/src/dbnode/storage/limits/query_limits.go index 3b4201729c..5de39875a5 100644 --- a/src/dbnode/storage/limits/query_limits.go +++ b/src/dbnode/storage/limits/query_limits.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,17 +22,20 @@ package limits import ( "fmt" + "sync" "time" - xerrors "github.com/m3db/m3/src/x/errors" - "github.com/m3db/m3/src/x/instrument" - "github.com/uber-go/tally" "go.uber.org/atomic" + "go.uber.org/zap" + + xerrors "github.com/m3db/m3/src/x/errors" + "github.com/m3db/m3/src/x/instrument" ) const ( - defaultLookback = time.Second * 15 + disabledLimitValue = 0 + defaultLookback = time.Second * 15 ) type queryLimits struct { @@ -42,18 +45,23 @@ type queryLimits struct { } type lookbackLimit struct { - name string - options LookbackLimitOptions - metrics lookbackLimitMetrics - recent *atomic.Int64 - stopCh chan struct{} + name string + options LookbackLimitOptions + metrics lookbackLimitMetrics + logger *zap.Logger + recent *atomic.Int64 + stopCh chan struct{} + stoppedCh chan struct{} + lock sync.RWMutex } type lookbackLimitMetrics struct { - recentCount tally.Gauge - recentMax tally.Gauge - total tally.Counter - exceeded tally.Counter + optionsLimit tally.Gauge + optionsLookback tally.Gauge + recentCount tally.Gauge + recentMax tally.Gauge + total tally.Counter + exceeded tally.Counter sourceLogger SourceLogger } @@ -67,7 +75,7 @@ var ( func DefaultLookbackLimitOptions() LookbackLimitOptions { return LookbackLimitOptions{ // Default to no limit. - Limit: 0, + Limit: disabledLimitValue, Lookback: defaultLookback, } } @@ -107,11 +115,13 @@ func newLookbackLimit( sourceLoggerBuilder SourceLoggerBuilder, ) *lookbackLimit { return &lookbackLimit{ - name: name, - options: opts, - metrics: newLookbackLimitMetrics(instrumentOpts, name, sourceLoggerBuilder), - recent: atomic.NewInt64(0), - stopCh: make(chan struct{}), + name: name, + options: opts, + metrics: newLookbackLimitMetrics(instrumentOpts, name, sourceLoggerBuilder), + logger: instrumentOpts.Logger(), + recent: atomic.NewInt64(0), + stopCh: make(chan struct{}), + stoppedCh: make(chan struct{}), } } @@ -128,10 +138,12 @@ func newLookbackLimitMetrics( instrumentOpts.SetMetricsScope(scope)) return lookbackLimitMetrics{ - recentCount: scope.Gauge(fmt.Sprintf("recent-count-%s", name)), - recentMax: scope.Gauge(fmt.Sprintf("recent-max-%s", name)), - total: scope.Counter(fmt.Sprintf("total-%s", name)), - exceeded: scope.Tagged(map[string]string{"limit": name}).Counter("exceeded"), + optionsLimit: scope.Gauge(fmt.Sprintf("current-limit%s", name)), + optionsLookback: scope.Gauge(fmt.Sprintf("current-lookback-%s", name)), + recentCount: scope.Gauge(fmt.Sprintf("recent-count-%s", name)), + recentMax: scope.Gauge(fmt.Sprintf("recent-max-%s", name)), + total: scope.Counter(fmt.Sprintf("total-%s", name)), + exceeded: scope.Tagged(map[string]string{"limit": name}).Counter("exceeded"), sourceLogger: sourceLogger, } @@ -150,15 +162,19 @@ func (q *queryLimits) DiskSeriesReadLimit() LookbackLimit { } func (q *queryLimits) Start() { - q.docsLimit.start() - q.seriesDiskReadLimit.start() - q.bytesReadLimit.start() + // Lock on explicit start to avoid any collision with asynchronous updating + // which will call stop/start if the lookback has changed. + q.docsLimit.startWithLock() + q.seriesDiskReadLimit.startWithLock() + q.bytesReadLimit.startWithLock() } func (q *queryLimits) Stop() { - q.docsLimit.stop() - q.seriesDiskReadLimit.stop() - q.bytesReadLimit.stop() + // Lock on explicit stop to avoid any collision with asynchronous updating + // which will call stop/start if the lookback has changed. + q.docsLimit.stopWithLock() + q.seriesDiskReadLimit.stopWithLock() + q.bytesReadLimit.stopWithLock() } func (q *queryLimits) AnyExceeded() error { @@ -171,6 +187,39 @@ func (q *queryLimits) AnyExceeded() error { return q.bytesReadLimit.exceeded() } +func (q *lookbackLimit) Options() LookbackLimitOptions { + q.lock.RLock() + o := q.options + q.lock.RUnlock() + return o +} + +// Update updates the limit. +func (q *lookbackLimit) Update(opts LookbackLimitOptions) error { + if err := opts.validate(); err != nil { + return err + } + + q.lock.Lock() + defer q.lock.Unlock() + + old := q.options + q.options = opts + + // If the lookback changed, replace the background goroutine that manages the periodic resetting. + if q.options.Lookback != old.Lookback { + q.stop() + q.start() + } + + q.logger.Info("query limit options updated", + zap.String("name", q.name), + zap.Any("new", opts), + zap.Any("old", old)) + + return nil +} + // Inc increments the current value and returns an error if above the limit. func (q *lookbackLimit) Inc(val int, source []byte) error { if val < 0 { @@ -199,7 +248,21 @@ func (q *lookbackLimit) exceeded() error { } func (q *lookbackLimit) checkLimit(recent int64) error { - if q.options.Limit > 0 && recent > q.options.Limit { + q.lock.RLock() + currentOpts := q.options + q.lock.RUnlock() + + if currentOpts.ForceExceeded { + q.metrics.exceeded.Inc(1) + return xerrors.NewInvalidParamsError(NewQueryLimitExceededError(fmt.Sprintf( + "query aborted due to forced limit: name=%s", q.name))) + } + + if currentOpts.Limit == disabledLimitValue { + return nil + } + + if recent >= currentOpts.Limit { q.metrics.exceeded.Inc(1) return xerrors.NewInvalidParamsError(NewQueryLimitExceededError(fmt.Sprintf( "query aborted due to limit: name=%s, limit=%d, current=%d, within=%s", @@ -208,23 +271,45 @@ func (q *lookbackLimit) checkLimit(recent int64) error { return nil } +func (q *lookbackLimit) startWithLock() { + q.lock.Lock() + defer q.lock.Unlock() + q.start() +} + +func (q *lookbackLimit) stopWithLock() { + q.lock.Lock() + defer q.lock.Unlock() + q.stop() +} + func (q *lookbackLimit) start() { ticker := time.NewTicker(q.options.Lookback) go func() { + q.logger.Info("query limit interval started", zap.String("name", q.name)) for { select { case <-ticker.C: q.reset() case <-q.stopCh: ticker.Stop() + q.stoppedCh <- struct{}{} return } } }() + + q.metrics.optionsLimit.Update(float64(q.options.Limit)) + q.metrics.optionsLookback.Update(q.options.Lookback.Seconds()) } func (q *lookbackLimit) stop() { close(q.stopCh) + <-q.stoppedCh + q.stopCh = make(chan struct{}) + q.stoppedCh = make(chan struct{}) + + q.logger.Info("query limit interval stopped", zap.String("name", q.name)) } func (q *lookbackLimit) current() int64 { @@ -243,6 +328,13 @@ func (q *lookbackLimit) reset() { q.recent.Store(0) } +// Equals returns true if the other options match the current. +func (opts LookbackLimitOptions) Equals(other LookbackLimitOptions) bool { + return opts.Limit == other.Limit && + opts.Lookback == other.Lookback && + opts.ForceExceeded == other.ForceExceeded +} + func (opts LookbackLimitOptions) validate() error { if opts.Limit < 0 { return fmt.Errorf("query limit requires limit >= 0 (%d)", opts.Limit) diff --git a/src/dbnode/storage/limits/query_limits_test.go b/src/dbnode/storage/limits/query_limits_test.go index 23ee91c2ef..6ce32708a9 100644 --- a/src/dbnode/storage/limits/query_limits_test.go +++ b/src/dbnode/storage/limits/query_limits_test.go @@ -48,16 +48,17 @@ func testQueryLimitOptions( } func TestQueryLimits(t *testing.T) { + l := int64(1) docOpts := LookbackLimitOptions{ - Limit: 1, + Limit: l, Lookback: time.Second, } bytesOpts := LookbackLimitOptions{ - Limit: 1, + Limit: l, Lookback: time.Second, } seriesOpts := LookbackLimitOptions{ - Limit: 1, + Limit: l, Lookback: time.Second, } opts := testQueryLimitOptions(docOpts, bytesOpts, seriesOpts, instrument.NewOptions()) @@ -110,41 +111,50 @@ func TestQueryLimits(t *testing.T) { func TestLookbackLimit(t *testing.T) { for _, test := range []struct { - name string - limit int64 + name string + limit int64 + forceExceeded bool }{ {name: "no limit", limit: 0}, {name: "limit", limit: 5}, + {name: "force exceeded limit", limit: 5, forceExceeded: true}, } { t.Run(test.name, func(t *testing.T) { scope := tally.NewTestScope("", nil) iOpts := instrument.NewOptions().SetMetricsScope(scope) opts := LookbackLimitOptions{ - Limit: test.limit, - Lookback: time.Millisecond * 100, + Limit: test.limit, + Lookback: time.Millisecond * 100, + ForceExceeded: test.forceExceeded, } name := "test" limit := newLookbackLimit(iOpts, opts, name, &sourceLoggerBuilder{}) require.Equal(t, int64(0), limit.current()) + + var exceededCount int64 err := limit.exceeded() - require.NoError(t, err) + if test.limit >= 0 && !test.forceExceeded { + require.NoError(t, err) + } else { + require.Error(t, err) + exceededCount++ + } // Validate ascending while checking limits. - var exceededCount int64 - exceededCount += verifyLimit(t, limit, 3, test.limit) + exceededCount += verifyLimit(t, limit, 3, test.limit, test.forceExceeded) require.Equal(t, int64(3), limit.current()) verifyMetrics(t, scope, name, 3, 0, 3, exceededCount) - exceededCount += verifyLimit(t, limit, 2, test.limit) + exceededCount += verifyLimit(t, limit, 2, test.limit, test.forceExceeded) require.Equal(t, int64(5), limit.current()) verifyMetrics(t, scope, name, 5, 0, 5, exceededCount) - exceededCount += verifyLimit(t, limit, 1, test.limit) + exceededCount += verifyLimit(t, limit, 1, test.limit, test.forceExceeded) require.Equal(t, int64(6), limit.current()) verifyMetrics(t, scope, name, 6, 0, 6, exceededCount) - exceededCount += verifyLimit(t, limit, 4, test.limit) + exceededCount += verifyLimit(t, limit, 4, test.limit, test.forceExceeded) require.Equal(t, int64(10), limit.current()) verifyMetrics(t, scope, name, 10, 0, 10, exceededCount) @@ -154,11 +164,11 @@ func TestLookbackLimit(t *testing.T) { verifyMetrics(t, scope, name, 0, 10, 10, exceededCount) // Validate ascending again post-reset. - exceededCount += verifyLimit(t, limit, 2, test.limit) + exceededCount += verifyLimit(t, limit, 2, test.limit, test.forceExceeded) require.Equal(t, int64(2), limit.current()) verifyMetrics(t, scope, name, 2, 10, 12, exceededCount) - exceededCount += verifyLimit(t, limit, 5, test.limit) + exceededCount += verifyLimit(t, limit, 5, test.limit, test.forceExceeded) require.Equal(t, int64(7), limit.current()) verifyMetrics(t, scope, name, 7, 10, 17, exceededCount) @@ -173,14 +183,37 @@ func TestLookbackLimit(t *testing.T) { require.Equal(t, int64(0), limit.current()) verifyMetrics(t, scope, name, 0, 0, 17, exceededCount) + + limit.reset() + + opts.Limit = 0 + require.NoError(t, limit.Update(opts)) + + exceededCount += verifyLimit(t, limit, 0, opts.Limit, test.forceExceeded) + require.Equal(t, int64(0), limit.current()) + + opts.Limit = 2 + require.NoError(t, limit.Update(opts)) + + exceededCount += verifyLimit(t, limit, 1, opts.Limit, test.forceExceeded) + require.Equal(t, int64(1), limit.current()) + verifyMetrics(t, scope, name, 1, 0, 18, exceededCount) + + exceededCount += verifyLimit(t, limit, 1, opts.Limit, test.forceExceeded) + require.Equal(t, int64(2), limit.current()) + verifyMetrics(t, scope, name, 2, 0, 19, exceededCount) + + exceededCount += verifyLimit(t, limit, 1, opts.Limit, test.forceExceeded) + require.Equal(t, int64(3), limit.current()) + verifyMetrics(t, scope, name, 3, 0, 20, exceededCount) }) } } -func verifyLimit(t *testing.T, limit *lookbackLimit, inc int, expectedLimit int64) int64 { +func verifyLimit(t *testing.T, limit *lookbackLimit, inc int, expectedLimit int64, forceExceeded bool) int64 { var exceededCount int64 err := limit.Inc(inc, nil) - if limit.current() <= expectedLimit || expectedLimit == 0 { + if (expectedLimit == 0 || limit.current() < expectedLimit) && !forceExceeded { require.NoError(t, err) } else { require.Error(t, err) @@ -188,8 +221,9 @@ func verifyLimit(t *testing.T, limit *lookbackLimit, inc int, expectedLimit int6 require.True(t, IsQueryLimitExceededError(err)) exceededCount++ } + err = limit.exceeded() - if limit.current() <= expectedLimit || expectedLimit == 0 { + if (expectedLimit == 0 || limit.current() < expectedLimit) && !forceExceeded { require.NoError(t, err) } else { require.Error(t, err) diff --git a/src/dbnode/storage/limits/types.go b/src/dbnode/storage/limits/types.go index aa87c493bf..22b46b4b57 100644 --- a/src/dbnode/storage/limits/types.go +++ b/src/dbnode/storage/limits/types.go @@ -52,16 +52,23 @@ type QueryLimits interface { // LookbackLimit provides an interface for a specific query limit. type LookbackLimit interface { + // Options returns the current limit options. + Options() LookbackLimitOptions // Inc increments the recent value for the limit. Inc(new int, source []byte) error + // Update changes the lookback limit settings. + Update(opts LookbackLimitOptions) error } // LookbackLimitOptions holds options for a lookback limit to be enforced. type LookbackLimitOptions struct { // Limit past which errors will be returned. + // Zero disables the limit. Limit int64 // Lookback is the period over which the limit is enforced. Lookback time.Duration + // ForceExceeded, if true, makes all calls to the limit behave as though the limit is exceeded. + ForceExceeded bool } // SourceLoggerBuilder builds a SourceLogger given instrument options. diff --git a/src/query/api/v1/handler/database/common.go b/src/query/api/v1/handler/database/common.go index 70854ea06b..5572141176 100644 --- a/src/query/api/v1/handler/database/common.go +++ b/src/query/api/v1/handler/database/common.go @@ -18,6 +18,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +// Package database contains API endpoints for managing the database. package database import ( @@ -54,6 +55,8 @@ func RegisterRoutes( return err } + kvStoreHandler := NewKeyValueStoreHandler(client, instrumentOpts) + // Register the same handler under two different endpoints. This just makes explaining things in // our documentation easier so we can separate out concepts, but share the underlying code. if err := r.Register(queryhttp.RegisterOptions{ @@ -70,6 +73,13 @@ func RegisterRoutes( }); err != nil { return err } + if err := r.Register(queryhttp.RegisterOptions{ + Path: KeyValueStoreURL, + Handler: kvStoreHandler, + Methods: []string{KeyValueStoreHTTPMethod}, + }); err != nil { + return err + } return nil } diff --git a/src/query/api/v1/handler/database/kvstore.go b/src/query/api/v1/handler/database/kvstore.go new file mode 100644 index 0000000000..8b3c5a9b0e --- /dev/null +++ b/src/query/api/v1/handler/database/kvstore.go @@ -0,0 +1,204 @@ +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package database + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net/http" + + "github.com/gogo/protobuf/jsonpb" + "go.uber.org/zap" + "google.golang.org/protobuf/runtime/protoiface" + + clusterclient "github.com/m3db/m3/src/cluster/client" + "github.com/m3db/m3/src/cluster/generated/proto/commonpb" + "github.com/m3db/m3/src/cluster/generated/proto/kvpb" + "github.com/m3db/m3/src/cluster/kv" + nsproto "github.com/m3db/m3/src/dbnode/generated/proto/namespace" + "github.com/m3db/m3/src/dbnode/kvconfig" + "github.com/m3db/m3/src/query/api/v1/handler" + "github.com/m3db/m3/src/query/util/logging" + xerrors "github.com/m3db/m3/src/x/errors" + "github.com/m3db/m3/src/x/instrument" + xhttp "github.com/m3db/m3/src/x/net/http" +) + +const ( + // KeyValueStoreURL is the url to edit key/value configuration values. + KeyValueStoreURL = handler.RoutePrefixV1 + "/kvstore" + // KeyValueStoreHTTPMethod is the HTTP method used with this resource. + KeyValueStoreHTTPMethod = http.MethodPost +) + +// KeyValueUpdate defines an update to a key's value. +type KeyValueUpdate struct { + // Key to update. + Key string `json:"key"` + // Value to update the key to. + Value json.RawMessage `json:"value"` + // Commit, if false, will not persist the update. If true, the + // update will be persisted. Used to test format of inputs. + Commit bool `json:"commit"` +} + +// KeyValueUpdateResult defines the result of an update to a key's value. +type KeyValueUpdateResult struct { + // Key to update. + Key string `json:"key"` + // Old is the value before the update. + Old string `json:"old"` + // New is the value after the update. + New string `json:"new"` + // Version of the key. + Version int `json:"version"` +} + +// KeyValueStoreHandler represents a handler for the key/value store endpoint +type KeyValueStoreHandler struct { + client clusterclient.Client + instrumentOpts instrument.Options +} + +// NewKeyValueStoreHandler returns a new instance of handler +func NewKeyValueStoreHandler( + client clusterclient.Client, + instrumentOpts instrument.Options, +) http.Handler { + return &KeyValueStoreHandler{ + client: client, + instrumentOpts: instrumentOpts, + } +} + +func (h *KeyValueStoreHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + logger := logging.WithContext(r.Context(), h.instrumentOpts) + + update, err := h.parseBody(r) + if err != nil { + logger.Error("unable to parse request", zap.Error(err)) + xhttp.WriteError(w, err) + return + } + + kvStore, err := h.client.KV() + if err != nil { + logger.Error("unable to get kv store", zap.Error(err)) + xhttp.WriteError(w, err) + return + } + + results, err := h.update(logger, kvStore, update) + if err != nil { + logger.Error("kv store error", + zap.Error(err), + zap.Any("update", update)) + xhttp.WriteError(w, err) + return + } + + xhttp.WriteJSONResponse(w, results, logger) +} + +func (h *KeyValueStoreHandler) parseBody(r *http.Request) (*KeyValueUpdate, error) { + body, err := ioutil.ReadAll(r.Body) + if err != nil { + return nil, xerrors.NewInvalidParamsError(err) + } + defer r.Body.Close() + + var parsed KeyValueUpdate + if err := json.Unmarshal(body, &parsed); err != nil { + return nil, xerrors.NewInvalidParamsError(err) + } + + return &parsed, nil +} + +func (h *KeyValueStoreHandler) update( + logger *zap.Logger, + kvStore kv.Store, + update *KeyValueUpdate, +) (*KeyValueUpdateResult, error) { + old, err := kvStore.Get(update.Key) + if err != nil && !errors.Is(err, kv.ErrNotFound) { + return nil, err + } + + oldProto, err := newKVProtoMessage(update.Key) + if err != nil { + return nil, err + } + + if old != nil { + if err := old.Unmarshal(oldProto); err != nil { + // Only log so we can overwrite corrupt existing entries. + logger.Error("cannot unmarshal old kv proto", zap.Error(err), zap.String("key", update.Key)) + } + } + + newProto, err := newKVProtoMessage(update.Key) + if err != nil { + return nil, err + } + + if err := jsonpb.UnmarshalString(string([]byte(update.Value)), newProto); err != nil { + return nil, err + } + + var version int + if update.Commit { + version, err = kvStore.Set(update.Key, newProto) + if err != nil { + return nil, err + } + } + + result := KeyValueUpdateResult{ + Key: update.Key, + Old: oldProto.String(), + New: newProto.String(), + Version: version, + } + + logger.Info("kv store", zap.Any("update", *update), zap.Any("result", result)) + + return &result, nil +} + +func newKVProtoMessage(key string) (protoiface.MessageV1, error) { + switch key { + case kvconfig.NamespacesKey: + return &nsproto.Registry{}, nil + case kvconfig.ClusterNewSeriesInsertLimitKey: + case kvconfig.EncodersPerBlockLimitKey: + return &commonpb.Int64Proto{}, nil + case kvconfig.ClientBootstrapConsistencyLevel: + case kvconfig.ClientReadConsistencyLevel: + case kvconfig.ClientWriteConsistencyLevel: + return &commonpb.StringProto{}, nil + case kvconfig.QueryLimits: + return &kvpb.QueryLimits{}, nil + } + return nil, fmt.Errorf("unsupported kvstore key %s", key) +} diff --git a/src/query/api/v1/handler/database/kvstore_test.go b/src/query/api/v1/handler/database/kvstore_test.go new file mode 100644 index 0000000000..4a6f047bab --- /dev/null +++ b/src/query/api/v1/handler/database/kvstore_test.go @@ -0,0 +1,186 @@ +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package database + +import ( + "encoding/json" + "testing" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/m3db/m3/src/cluster/generated/proto/kvpb" + "github.com/m3db/m3/src/cluster/kv" + "github.com/m3db/m3/src/dbnode/kvconfig" +) + +func TestUpdateQueryLimits(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + tests := []struct { + name string + limits *kvpb.QueryLimits + commit bool + expectedJSON string + expectedError string + }{ + { + name: `nil`, + limits: nil, + commit: true, + expectedJSON: "", + }, + { + name: `empty`, + limits: &kvpb.QueryLimits{}, + commit: true, + expectedJSON: "", + }, + { + name: `only block - commit`, + limits: &kvpb.QueryLimits{ + MaxRecentlyQueriedSeriesBlocks: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + }, + commit: true, + expectedJSON: `maxRecentlyQueriedSeriesBlocks: `, + }, + { + name: `only block - no commit`, + limits: &kvpb.QueryLimits{ + MaxRecentlyQueriedSeriesBlocks: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + }, + commit: false, + expectedJSON: `maxRecentlyQueriedSeriesBlocks: `, + }, + { + name: `all - commit`, + limits: &kvpb.QueryLimits{ + MaxRecentlyQueriedSeriesBlocks: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + MaxRecentlyQueriedSeriesDiskBytesRead: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + MaxRecentlyQueriedSeriesDiskRead: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + }, + commit: true, + // nolint: lll + expectedJSON: `maxRecentlyQueriedSeriesBlocks: maxRecentlyQueriedSeriesDiskBytesRead: maxRecentlyQueriedSeriesDiskRead: `, + }, + { + name: `all - no commit`, + limits: &kvpb.QueryLimits{ + MaxRecentlyQueriedSeriesBlocks: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + MaxRecentlyQueriedSeriesDiskBytesRead: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + MaxRecentlyQueriedSeriesDiskRead: &kvpb.QueryLimit{ + Limit: 1, + LookbackSeconds: 15, + ForceExceeded: true, + }, + }, + commit: false, + // nolint: lll + expectedJSON: `maxRecentlyQueriedSeriesBlocks: maxRecentlyQueriedSeriesDiskBytesRead: maxRecentlyQueriedSeriesDiskRead: `, + }, + } + + for _, test := range tests { + limitJSON, err := json.Marshal(test.limits) + require.NoError(t, err) + + update := &KeyValueUpdate{ + Key: kvconfig.QueryLimits, + Value: json.RawMessage(limitJSON), + Commit: test.commit, + } + + storeMock := kv.NewMockStore(ctrl) + + // (A) test no old value. + storeMock.EXPECT().Get(kvconfig.QueryLimits).Return(nil, kv.ErrNotFound) + if test.commit { + storeMock.EXPECT().Set(kvconfig.QueryLimits, gomock.Any()).Return(0, nil) + } + + handler := &KeyValueStoreHandler{} + r, err := handler.update(zap.NewNop(), storeMock, update) + require.NoError(t, err) + require.Equal(t, kvconfig.QueryLimits, r.Key) + require.Equal(t, "", r.Old) + require.Equal(t, test.expectedJSON, r.New) + require.Equal(t, 0, r.Version) + + // (B) test old value. + mockVal := kv.NewMockValue(ctrl) + storeMock.EXPECT().Get(kvconfig.QueryLimits).Return(mockVal, nil) + mockVal.EXPECT().Unmarshal(gomock.Any()).DoAndReturn(func(v *kvpb.QueryLimits) error { + v.MaxRecentlyQueriedSeriesBlocks = &kvpb.QueryLimit{ + Limit: 10, + LookbackSeconds: 30, + ForceExceeded: false, + } + v.MaxRecentlyQueriedSeriesDiskBytesRead = &kvpb.QueryLimit{ + Limit: 100, + LookbackSeconds: 300, + ForceExceeded: false, + } + return nil + }) + if test.commit { + storeMock.EXPECT().Set(kvconfig.QueryLimits, gomock.Any()).Return(0, nil) + } + + handler = &KeyValueStoreHandler{} + r, err = handler.update(zap.NewNop(), storeMock, update) + require.NoError(t, err) + require.Equal(t, kvconfig.QueryLimits, r.Key) + // nolint: lll + require.Equal(t, `maxRecentlyQueriedSeriesBlocks: maxRecentlyQueriedSeriesDiskBytesRead: `, r.Old) + require.Equal(t, test.expectedJSON, r.New) + require.Equal(t, 0, r.Version) + } +} From 7ae7b3ca9dbab3695e1db4e42baad93ab9f807f7 Mon Sep 17 00:00:00 2001 From: Chris Chinchilla Date: Thu, 21 Jan 2021 17:23:13 +0100 Subject: [PATCH 10/13] Add warning to changing blocksize (#3096) Signed-off-by: ChrisChinchilla --- site/content/operational_guide/namespace_configuration.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/site/content/operational_guide/namespace_configuration.md b/site/content/operational_guide/namespace_configuration.md index ba8a0c26e3..79ce3e6cc7 100644 --- a/site/content/operational_guide/namespace_configuration.md +++ b/site/content/operational_guide/namespace_configuration.md @@ -115,9 +115,14 @@ Deleting a namespace is a simple as using the `DELETE` `/api/v1/services/m3db/na `curl -X DELETE :/api/v1/services/m3db/namespace/` Note that deleting a namespace will not have any effect on the M3DB nodes until they are all restarted. + ### Modifying a Namespace -There is currently no atomic namespace modification endpoint. Instead, you will need to delete a namespace and then add it back again with the same name, but modified settings. Review the individual namespace settings above to determine whether or not a given setting is safe to modify. For example, it is never safe to modify the blockSize of a namespace. +There is currently no atomic namespace modification endpoint. Instead, you will need to delete a namespace and then add it back again with the same name, but modified settings. Review the individual namespace settings above to determine whether or not a given setting is safe to modify. + +{{% notice warning %}} +For example, it is never safe to modify the blockSize of a namespace. +{{% /notice %}} Also, be very careful not to restart the M3DB nodes after deleting the namespace, but before adding it back. If you do this, the M3DB nodes may detect the existing data files on disk and delete them since they are not configured to retain that namespace. From 9be8ef90884794f301589bfa4b37f681adc70304 Mon Sep 17 00:00:00 2001 From: arnikola Date: Thu, 21 Jan 2021 17:34:31 -0500 Subject: [PATCH 11/13] [query] Take bounds into account for list endpoints (#3110) --- src/query/api/v1/handler/prometheus/common.go | 40 ++++++++++--- .../api/v1/handler/prometheus/common_test.go | 60 ++++++++++++++++++- .../v1/handler/prometheus/native/list_tags.go | 19 +++--- .../prometheus/native/list_tags_test.go | 20 +++---- .../handler/prometheus/remote/tag_values.go | 21 ++++--- .../prometheus/remote/tag_values_test.go | 15 ++--- src/query/parser/promql/options.go | 35 +++++++++-- 7 files changed, 158 insertions(+), 52 deletions(-) diff --git a/src/query/api/v1/handler/prometheus/common.go b/src/query/api/v1/handler/prometheus/common.go index ded4732681..87859f9caf 100644 --- a/src/query/api/v1/handler/prometheus/common.go +++ b/src/query/api/v1/handler/prometheus/common.go @@ -176,6 +176,35 @@ func parseTagCompletionQueries(r *http.Request) ([]string, error) { return queries, nil } +// ParseStartAndEnd parses start and end params from the request. +func ParseStartAndEnd( + r *http.Request, + parseOpts xpromql.ParseOptions, +) (time.Time, time.Time, error) { + if err := r.ParseForm(); err != nil { + return time.Time{}, time.Time{}, xerrors.NewInvalidParamsError(err) + } + + start, err := util.ParseTimeStringWithDefault(r.FormValue("start"), + time.Unix(0, 0)) + if err != nil { + return time.Time{}, time.Time{}, xerrors.NewInvalidParamsError(err) + } + + end, err := util.ParseTimeStringWithDefault(r.FormValue("end"), + parseOpts.NowFn()()) + if err != nil { + return time.Time{}, time.Time{}, xerrors.NewInvalidParamsError(err) + } + + if start.After(end) { + err := fmt.Errorf("start %v must be after end %v", start, end) + return time.Time{}, time.Time{}, xerrors.NewInvalidParamsError(err) + } + + return start, end, nil +} + // ParseSeriesMatchQuery parses all params from the GET request. func ParseSeriesMatchQuery( r *http.Request, @@ -188,16 +217,9 @@ func ParseSeriesMatchQuery( return nil, xerrors.NewInvalidParamsError(errors.ErrInvalidMatchers) } - start, err := util.ParseTimeStringWithDefault(r.FormValue("start"), - time.Unix(0, 0)) - if err != nil { - return nil, xerrors.NewInvalidParamsError(err) - } - - end, err := util.ParseTimeStringWithDefault(r.FormValue("end"), - time.Now()) + start, end, err := ParseStartAndEnd(r, parseOpts) if err != nil { - return nil, xerrors.NewInvalidParamsError(err) + return nil, err } queries := make([]*storage.FetchQuery, len(matcherValues)) diff --git a/src/query/api/v1/handler/prometheus/common_test.go b/src/query/api/v1/handler/prometheus/common_test.go index 7e7e0d091d..b8479ae4ab 100644 --- a/src/query/api/v1/handler/prometheus/common_test.go +++ b/src/query/api/v1/handler/prometheus/common_test.go @@ -22,16 +22,22 @@ package prometheus import ( "bytes" + "context" "fmt" + "net/http" "net/http/httptest" "strings" "testing" + "time" "github.com/m3db/m3/src/query/models" + "github.com/m3db/m3/src/query/parser/promql" "github.com/m3db/m3/src/query/test" xerrors "github.com/m3db/m3/src/x/errors" + xhttp "github.com/m3db/m3/src/x/net/http" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestPromCompressedReadSuccess(t *testing.T) { @@ -102,7 +108,7 @@ func TestRenderSeriesMatchResultsNoTags(t *testing.T) { } seriesMatchResult := []models.Metrics{ - models.Metrics{ + { toTags("name", tag{name: "a", value: "b"}, tag{name: "role", value: "appears"}), toTags("name2", tag{name: "c", value: "d"}, tag{name: "e", value: "f"}), }, @@ -135,3 +141,55 @@ func TestRenderSeriesMatchResultsNoTags(t *testing.T) { assert.Equal(t, expected, w.value) } } + +func TestParseStartAndEnd(t *testing.T) { + endTime := time.Now().Truncate(time.Hour) + opts := promql.NewParseOptions().SetNowFn(func() time.Time { return endTime }) + + tests := []struct { + querystring string + exStart time.Time + exEnd time.Time + exErr bool + }{ + {querystring: "", exStart: time.Unix(0, 0), exEnd: endTime}, + {querystring: "start=100", exStart: time.Unix(100, 0), exEnd: endTime}, + {querystring: "start=100&end=200", exStart: time.Unix(100, 0), exEnd: time.Unix(200, 0)}, + {querystring: "start=200&end=100", exErr: true}, + {querystring: "start=foo&end=100", exErr: true}, + {querystring: "start=100&end=bar", exErr: true}, + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("GET_%s", tt.querystring), func(t *testing.T) { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + fmt.Sprintf("/?%s", tt.querystring), nil) + require.NoError(t, err) + + start, end, err := ParseStartAndEnd(req, opts) + if tt.exErr { + require.Error(t, err) + } else { + assert.Equal(t, tt.exStart, start) + assert.Equal(t, tt.exEnd, end) + } + }) + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("POST_%s", tt.querystring), func(t *testing.T) { + b := bytes.NewBuffer([]byte(tt.querystring)) + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, "/", b) + require.NoError(t, err) + req.Header.Add(xhttp.HeaderContentType, xhttp.ContentTypeFormURLEncoded) + + start, end, err := ParseStartAndEnd(req, opts) + if tt.exErr { + require.Error(t, err) + } else { + assert.Equal(t, tt.exStart, start) + assert.Equal(t, tt.exEnd, end) + } + }) + } +} diff --git a/src/query/api/v1/handler/prometheus/native/list_tags.go b/src/query/api/v1/handler/prometheus/native/list_tags.go index 7841a0dca7..23aa9268c8 100644 --- a/src/query/api/v1/handler/prometheus/native/list_tags.go +++ b/src/query/api/v1/handler/prometheus/native/list_tags.go @@ -23,16 +23,15 @@ package native import ( "context" "net/http" - "time" "github.com/m3db/m3/src/query/api/v1/handler" "github.com/m3db/m3/src/query/api/v1/handler/prometheus" "github.com/m3db/m3/src/query/api/v1/handler/prometheus/handleroptions" "github.com/m3db/m3/src/query/api/v1/options" "github.com/m3db/m3/src/query/models" + "github.com/m3db/m3/src/query/parser/promql" "github.com/m3db/m3/src/query/storage" "github.com/m3db/m3/src/query/util/logging" - "github.com/m3db/m3/src/x/clock" "github.com/m3db/m3/src/x/instrument" xhttp "github.com/m3db/m3/src/x/net/http" @@ -53,7 +52,7 @@ var ( type ListTagsHandler struct { storage storage.Storage fetchOptionsBuilder handleroptions.FetchOptionsBuilder - nowFn clock.NowFn + parseOpts promql.ParseOptions instrumentOpts instrument.Options } @@ -62,7 +61,7 @@ func NewListTagsHandler(opts options.HandlerOptions) http.Handler { return &ListTagsHandler{ storage: opts.Storage(), fetchOptionsBuilder: opts.FetchOptionsBuilder(), - nowFn: opts.NowFn(), + parseOpts: promql.NewParseOptions().SetNowFn(opts.NowFn()), instrumentOpts: opts.InstrumentOpts(), } } @@ -72,13 +71,17 @@ func (h *ListTagsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { logger := logging.WithContext(ctx, h.instrumentOpts) w.Header().Set(xhttp.HeaderContentType, xhttp.ContentTypeJSON) + start, end, err := prometheus.ParseStartAndEnd(r, h.parseOpts) + if err != nil { + xhttp.WriteError(w, err) + return + } + query := &storage.CompleteTagsQuery{ CompleteNameOnly: true, TagMatchers: models.Matchers{{Type: models.MatchAll}}, - - // NB: necessarily spans entire possible query range. - Start: time.Time{}, - End: h.nowFn(), + Start: start, + End: end, } opts, rErr := h.fetchOptionsBuilder.NewFetchOptions(r) diff --git a/src/query/api/v1/handler/prometheus/native/list_tags_test.go b/src/query/api/v1/handler/prometheus/native/list_tags_test.go index f33ef27b18..f0e2e24c4b 100644 --- a/src/query/api/v1/handler/prometheus/native/list_tags_test.go +++ b/src/query/api/v1/handler/prometheus/native/list_tags_test.go @@ -42,7 +42,7 @@ import ( ) type listTagsMatcher struct { - now time.Time + start, end time.Time } func (m *listTagsMatcher) String() string { return "list tags query" } @@ -52,12 +52,12 @@ func (m *listTagsMatcher) Matches(x interface{}) bool { return false } - if !q.Start.Equal(time.Time{}) { + if !q.Start.Equal(m.start) { return false } // NB: end time for the query should be roughly `Now` - if !q.End.Equal(m.now) { + if !q.End.Equal(m.end) { return false } @@ -119,7 +119,7 @@ func testListTags(t *testing.T, meta block.ResultMetadata, header string) { SetNowFn(nowFn) h := NewListTagsHandler(opts) for _, method := range []string{"GET", "POST"} { - matcher := &listTagsMatcher{now: now} + matcher := &listTagsMatcher{start: time.Unix(0, 0), end: now} store.EXPECT().CompleteTags(gomock.Any(), matcher, gomock.Any()). Return(storeResult, nil) @@ -150,25 +150,19 @@ func TestListErrorTags(t *testing.T) { // setup storage and handler store := storage.NewMockStorage(ctrl) - now := time.Now() - nowFn := func() time.Time { - return now - } - fb, err := handleroptions.NewFetchOptionsBuilder( handleroptions.FetchOptionsBuilderOptions{Timeout: 15 * time.Second}) require.NoError(t, err) opts := options.EmptyHandlerOptions(). SetStorage(store). - SetFetchOptionsBuilder(fb). - SetNowFn(nowFn) + SetFetchOptionsBuilder(fb) handler := NewListTagsHandler(opts) for _, method := range []string{"GET", "POST"} { - matcher := &listTagsMatcher{now: now} + matcher := &listTagsMatcher{start: time.Unix(100, 0), end: time.Unix(1000, 0)} store.EXPECT().CompleteTags(gomock.Any(), matcher, gomock.Any()). Return(nil, errors.New("err")) - req := httptest.NewRequest(method, "/labels", nil) + req := httptest.NewRequest(method, "/labels?start=100&end=1000", nil) w := httptest.NewRecorder() handler.ServeHTTP(w, req) diff --git a/src/query/api/v1/handler/prometheus/remote/tag_values.go b/src/query/api/v1/handler/prometheus/remote/tag_values.go index f146ffb649..61c5935130 100644 --- a/src/query/api/v1/handler/prometheus/remote/tag_values.go +++ b/src/query/api/v1/handler/prometheus/remote/tag_values.go @@ -23,7 +23,6 @@ package remote import ( "context" "net/http" - "time" "github.com/m3db/m3/src/query/api/v1/handler" "github.com/m3db/m3/src/query/api/v1/handler/prometheus" @@ -31,10 +30,10 @@ import ( "github.com/m3db/m3/src/query/api/v1/options" "github.com/m3db/m3/src/query/errors" "github.com/m3db/m3/src/query/models" + "github.com/m3db/m3/src/query/parser/promql" "github.com/m3db/m3/src/query/storage" "github.com/m3db/m3/src/query/storage/m3/consolidators" "github.com/m3db/m3/src/query/util/logging" - "github.com/m3db/m3/src/x/clock" "github.com/m3db/m3/src/x/instrument" xhttp "github.com/m3db/m3/src/x/net/http" @@ -58,7 +57,7 @@ const ( type TagValuesHandler struct { storage storage.Storage fetchOptionsBuilder handleroptions.FetchOptionsBuilder - nowFn clock.NowFn + parseOpts promql.ParseOptions instrumentOpts instrument.Options } @@ -72,7 +71,7 @@ func NewTagValuesHandler(options options.HandlerOptions) http.Handler { return &TagValuesHandler{ storage: options.Storage(), fetchOptionsBuilder: options.FetchOptionsBuilder(), - nowFn: options.NowFn(), + parseOpts: promql.NewParseOptions().SetNowFn(options.NowFn()), instrumentOpts: options.InstrumentOpts(), } } @@ -85,7 +84,7 @@ func (h *TagValuesHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { query, err := h.parseTagValuesToQuery(r) if err != nil { logger.Error("unable to parse tag values to query", zap.Error(err)) - xhttp.WriteError(w, xhttp.NewError(err, http.StatusBadRequest)) + xhttp.WriteError(w, err) return } @@ -117,14 +116,18 @@ func (h *TagValuesHandler) parseTagValuesToQuery( vars := mux.Vars(r) name, ok := vars[NameReplace] if !ok || len(name) == 0 { - return nil, errors.ErrNoName + return nil, xhttp.NewError(errors.ErrNoName, http.StatusBadRequest) + } + + start, end, err := prometheus.ParseStartAndEnd(r, h.parseOpts) + if err != nil { + return nil, err } nameBytes := []byte(name) return &storage.CompleteTagsQuery{ - // NB: necessarily spans the entire timerange for the index. - Start: time.Time{}, - End: h.nowFn(), + Start: start, + End: end, CompleteNameOnly: false, FilterNameTags: [][]byte{nameBytes}, TagMatchers: models.Matchers{ diff --git a/src/query/api/v1/handler/prometheus/remote/tag_values_test.go b/src/query/api/v1/handler/prometheus/remote/tag_values_test.go index 120d158fdf..cf617060ae 100644 --- a/src/query/api/v1/handler/prometheus/remote/tag_values_test.go +++ b/src/query/api/v1/handler/prometheus/remote/tag_values_test.go @@ -44,8 +44,8 @@ import ( ) type tagValuesMatcher struct { - now time.Time - filterTag string + start, end time.Time + filterTag string } func (m *tagValuesMatcher) String() string { return "tag values query" } @@ -55,11 +55,11 @@ func (m *tagValuesMatcher) Matches(x interface{}) bool { return false } - if !q.Start.Equal(time.Time{}) { + if !q.Start.Equal(m.start) { return false } - if !q.End.Equal(m.now) { + if !q.End.Equal(m.end) { return false } @@ -123,7 +123,7 @@ func TestTagValues(t *testing.T) { url := fmt.Sprintf("/label/{%s}/values", NameReplace) for _, tt := range names { - path := fmt.Sprintf("/label/%s/values", tt.name) + path := fmt.Sprintf("/label/%s/values?start=100", tt.name) req, err := http.NewRequest("GET", path, nil) if err != nil { t.Fatal(err) @@ -132,7 +132,8 @@ func TestTagValues(t *testing.T) { rr := httptest.NewRecorder() router := mux.NewRouter() matcher := &tagValuesMatcher{ - now: now, + start: time.Unix(100, 0), + end: now, filterTag: tt.name, } @@ -146,7 +147,7 @@ func TestTagValues(t *testing.T) { }, Metadata: block.ResultMetadata{ Exhaustive: false, - Warnings: []block.Warning{block.Warning{Name: "foo", Message: "bar"}}, + Warnings: []block.Warning{{Name: "foo", Message: "bar"}}, }, } diff --git a/src/query/parser/promql/options.go b/src/query/parser/promql/options.go index 036da45718..1feb5105ce 100644 --- a/src/query/parser/promql/options.go +++ b/src/query/parser/promql/options.go @@ -21,10 +21,14 @@ package promql import ( - "github.com/m3db/m3/src/query/models" - "github.com/m3db/m3/src/query/parser" + "time" + "github.com/prometheus/prometheus/pkg/labels" pql "github.com/prometheus/prometheus/promql/parser" + + "github.com/m3db/m3/src/query/models" + "github.com/m3db/m3/src/query/parser" + xclock "github.com/m3db/m3/src/x/clock" ) // ParseFunctionExpr parses arguments to a function expression, returning @@ -53,28 +57,38 @@ func defaultMetricSelectorFn(query string) ([]*labels.Matcher, error) { return pql.ParseMetricSelector(query) } +func defaultNowFn() time.Time { + return time.Now() +} + // ParseOptions are options for the Prometheus parser. type ParseOptions interface { // ParseFn gets the parse function. ParseFn() ParseFn // SetParseFn sets the parse function. - SetParseFn(f ParseFn) ParseOptions + SetParseFn(ParseFn) ParseOptions // MetricSelectorFn gets the metric selector function. MetricSelectorFn() MetricSelectorFn // SetMetricSelectorFn sets the metric selector function. - SetMetricSelectorFn(f MetricSelectorFn) ParseOptions + SetMetricSelectorFn(MetricSelectorFn) ParseOptions // FunctionParseExpr gets the parsing function. FunctionParseExpr() ParseFunctionExpr // SetFunctionParseExpr sets the parsing function. - SetFunctionParseExpr(f ParseFunctionExpr) ParseOptions + SetFunctionParseExpr(ParseFunctionExpr) ParseOptions + + // NowFn gets the now function. + NowFn() xclock.NowFn + // SetNowFn sets the now function. + SetNowFn(xclock.NowFn) ParseOptions } type parseOptions struct { parseFn ParseFn selectorFn MetricSelectorFn fnParseExpr ParseFunctionExpr + nowFn xclock.NowFn } // NewParseOptions creates a new parse options. @@ -83,6 +97,7 @@ func NewParseOptions() ParseOptions { parseFn: defaultParseFn, selectorFn: defaultMetricSelectorFn, fnParseExpr: NewFunctionExpr, + nowFn: defaultNowFn, } } @@ -115,3 +130,13 @@ func (o *parseOptions) SetFunctionParseExpr(f ParseFunctionExpr) ParseOptions { opts.fnParseExpr = f return &opts } + +func (o *parseOptions) NowFn() xclock.NowFn { + return o.nowFn +} + +func (o *parseOptions) SetNowFn(f xclock.NowFn) ParseOptions { + opts := *o + opts.nowFn = f + return &opts +} From 45e5e4aad0f172a72b6c18ef4aa8666ff6881324 Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 21 Jan 2021 18:42:38 -0500 Subject: [PATCH 12/13] [dbnode][coordinator] Ensure docs limit is propagated for search and aggregate RPCs (#3108) * [dbnode][coordinator] Ensure docs limit is propagated for search and aggregate RPCs * Fixup convert_test * Fixup compilation issues * Update mocks * Fixup service test Co-authored-by: Wesley Kim Co-authored-by: arnikola --- src/dbnode/client/aggregate_op.go | 6 +- src/dbnode/client/fetch_state.go | 6 +- src/dbnode/client/fetch_tagged_op.go | 6 +- .../fetch_tagged_results_accumulator.go | 1 + src/dbnode/generated/thrift/rpc.thrift | 8 +- src/dbnode/generated/thrift/rpc/rpc.go | 188 +++++++++++++----- src/dbnode/generated/thrift/rpc/rpc_mock.go | 2 +- .../server/tchannelthrift/convert/convert.go | 23 ++- .../tchannelthrift/convert/convert_test.go | 40 ++-- .../tchannelthrift/node/service_test.go | 152 ++++++++------ 10 files changed, 295 insertions(+), 137 deletions(-) diff --git a/src/dbnode/client/aggregate_op.go b/src/dbnode/client/aggregate_op.go index 2acc042fb6..6fb5a79fa2 100644 --- a/src/dbnode/client/aggregate_op.go +++ b/src/dbnode/client/aggregate_op.go @@ -45,11 +45,11 @@ func (f *aggregateOp) update(req rpc.AggregateQueryRawRequest, fn completionFn) f.completionFn = fn } -func (f *aggregateOp) requestLimit(defaultValue int) int { - if f.request.Limit == nil { +func (f *aggregateOp) requestSeriesLimit(defaultValue int) int { + if f.request.SeriesLimit == nil { return defaultValue } - return int(*f.request.Limit) + return int(*f.request.SeriesLimit) } func (f *aggregateOp) close() { diff --git a/src/dbnode/client/fetch_state.go b/src/dbnode/client/fetch_state.go index c991e19db5..baa99daa97 100644 --- a/src/dbnode/client/fetch_state.go +++ b/src/dbnode/client/fetch_state.go @@ -205,7 +205,7 @@ func (f *fetchState) asTaggedIDsIterator( return nil, FetchResponseMetadata{}, err } - limit := f.fetchTaggedOp.requestLimit(maxInt) + limit := f.fetchTaggedOp.requestSeriesLimit(maxInt) return f.tagResultAccumulator.AsTaggedIDsIterator(limit, pools) } @@ -231,7 +231,7 @@ func (f *fetchState) asEncodingSeriesIterators( return nil, FetchResponseMetadata{}, err } - limit := f.fetchTaggedOp.requestLimit(maxInt) + limit := f.fetchTaggedOp.requestSeriesLimit(maxInt) return f.tagResultAccumulator.AsEncodingSeriesIterators(limit, pools, descr, opts) } @@ -253,7 +253,7 @@ func (f *fetchState) asAggregatedTagsIterator(pools fetchTaggedPools) (Aggregate return nil, FetchResponseMetadata{}, err } - limit := f.aggregateOp.requestLimit(maxInt) + limit := f.aggregateOp.requestSeriesLimit(maxInt) return f.tagResultAccumulator.AsAggregatedTagsIterator(limit, pools) } diff --git a/src/dbnode/client/fetch_tagged_op.go b/src/dbnode/client/fetch_tagged_op.go index b9996da9d9..6de8b0f4e3 100644 --- a/src/dbnode/client/fetch_tagged_op.go +++ b/src/dbnode/client/fetch_tagged_op.go @@ -45,11 +45,11 @@ func (f *fetchTaggedOp) update(req rpc.FetchTaggedRequest, fn completionFn) { f.completionFn = fn } -func (f *fetchTaggedOp) requestLimit(defaultValue int) int { - if f.request.Limit == nil { +func (f *fetchTaggedOp) requestSeriesLimit(defaultValue int) int { + if f.request.SeriesLimit == nil { return defaultValue } - return int(*f.request.Limit) + return int(*f.request.SeriesLimit) } func (f *fetchTaggedOp) close() { diff --git a/src/dbnode/client/fetch_tagged_results_accumulator.go b/src/dbnode/client/fetch_tagged_results_accumulator.go index 35256b903e..977d37dd81 100644 --- a/src/dbnode/client/fetch_tagged_results_accumulator.go +++ b/src/dbnode/client/fetch_tagged_results_accumulator.go @@ -482,6 +482,7 @@ func (accum *fetchTaggedResultAccumulator) AsAggregatedTagsIterator( } moreElems = hasMore + // Would count ever be above limit? return count < limit }) diff --git a/src/dbnode/generated/thrift/rpc.thrift b/src/dbnode/generated/thrift/rpc.thrift index e83cc7335f..e6876db393 100644 --- a/src/dbnode/generated/thrift/rpc.thrift +++ b/src/dbnode/generated/thrift/rpc.thrift @@ -178,7 +178,7 @@ struct FetchTaggedRequest { 3: required i64 rangeStart 4: required i64 rangeEnd 5: required bool fetchData - 6: optional i64 limit + 6: optional i64 seriesLimit 7: optional TimeType rangeTimeType = TimeType.UNIX_SECONDS 8: optional bool requireExhaustive = true 9: optional i64 docsLimit @@ -398,11 +398,12 @@ struct AggregateQueryRawRequest { 2: required i64 rangeStart 3: required i64 rangeEnd 4: required binary nameSpace - 5: optional i64 limit + 5: optional i64 seriesLimit 6: optional list tagNameFilter 7: optional AggregateQueryType aggregateQueryType = AggregateQueryType.AGGREGATE_BY_TAG_NAME_VALUE 8: optional TimeType rangeType = TimeType.UNIX_SECONDS 9: optional binary source + 10: optional i64 docsLimit } struct AggregateQueryRawResult { @@ -425,11 +426,12 @@ struct AggregateQueryRequest { 2: required i64 rangeStart 3: required i64 rangeEnd 4: required string nameSpace - 5: optional i64 limit + 5: optional i64 seriesLimit 6: optional list tagNameFilter 7: optional AggregateQueryType aggregateQueryType = AggregateQueryType.AGGREGATE_BY_TAG_NAME_VALUE 8: optional TimeType rangeType = TimeType.UNIX_SECONDS 9: optional binary source + 10: optional i64 docsLimit } struct AggregateQueryResult { diff --git a/src/dbnode/generated/thrift/rpc/rpc.go b/src/dbnode/generated/thrift/rpc/rpc.go index ea8421cec3..c15f440908 100644 --- a/src/dbnode/generated/thrift/rpc/rpc.go +++ b/src/dbnode/generated/thrift/rpc/rpc.go @@ -3277,7 +3277,7 @@ func (p *Segment) String() string { // - RangeStart // - RangeEnd // - FetchData -// - Limit +// - SeriesLimit // - RangeTimeType // - RequireExhaustive // - DocsLimit @@ -3288,7 +3288,7 @@ type FetchTaggedRequest struct { RangeStart int64 `thrift:"rangeStart,3,required" db:"rangeStart" json:"rangeStart"` RangeEnd int64 `thrift:"rangeEnd,4,required" db:"rangeEnd" json:"rangeEnd"` FetchData bool `thrift:"fetchData,5,required" db:"fetchData" json:"fetchData"` - Limit *int64 `thrift:"limit,6" db:"limit" json:"limit,omitempty"` + SeriesLimit *int64 `thrift:"seriesLimit,6" db:"seriesLimit" json:"seriesLimit,omitempty"` RangeTimeType TimeType `thrift:"rangeTimeType,7" db:"rangeTimeType" json:"rangeTimeType,omitempty"` RequireExhaustive bool `thrift:"requireExhaustive,8" db:"requireExhaustive" json:"requireExhaustive,omitempty"` DocsLimit *int64 `thrift:"docsLimit,9" db:"docsLimit" json:"docsLimit,omitempty"` @@ -3323,13 +3323,13 @@ func (p *FetchTaggedRequest) GetFetchData() bool { return p.FetchData } -var FetchTaggedRequest_Limit_DEFAULT int64 +var FetchTaggedRequest_SeriesLimit_DEFAULT int64 -func (p *FetchTaggedRequest) GetLimit() int64 { - if !p.IsSetLimit() { - return FetchTaggedRequest_Limit_DEFAULT +func (p *FetchTaggedRequest) GetSeriesLimit() int64 { + if !p.IsSetSeriesLimit() { + return FetchTaggedRequest_SeriesLimit_DEFAULT } - return *p.Limit + return *p.SeriesLimit } var FetchTaggedRequest_RangeTimeType_DEFAULT TimeType = 0 @@ -3358,8 +3358,8 @@ var FetchTaggedRequest_Source_DEFAULT []byte func (p *FetchTaggedRequest) GetSource() []byte { return p.Source } -func (p *FetchTaggedRequest) IsSetLimit() bool { - return p.Limit != nil +func (p *FetchTaggedRequest) IsSetSeriesLimit() bool { + return p.SeriesLimit != nil } func (p *FetchTaggedRequest) IsSetRangeTimeType() bool { @@ -3522,7 +3522,7 @@ func (p *FetchTaggedRequest) ReadField6(iprot thrift.TProtocol) error { if v, err := iprot.ReadI64(); err != nil { return thrift.PrependError("error reading field 6: ", err) } else { - p.Limit = &v + p.SeriesLimit = &v } return nil } @@ -3675,15 +3675,15 @@ func (p *FetchTaggedRequest) writeField5(oprot thrift.TProtocol) (err error) { } func (p *FetchTaggedRequest) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetLimit() { - if err := oprot.WriteFieldBegin("limit", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:limit: ", p), err) + if p.IsSetSeriesLimit() { + if err := oprot.WriteFieldBegin("seriesLimit", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:seriesLimit: ", p), err) } - if err := oprot.WriteI64(int64(*p.Limit)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.limit (6) field write error: ", p), err) + if err := oprot.WriteI64(int64(*p.SeriesLimit)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.seriesLimit (6) field write error: ", p), err) } if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:limit: ", p), err) + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:seriesLimit: ", p), err) } } return err @@ -9610,21 +9610,23 @@ func (p *HealthResult_) String() string { // - RangeStart // - RangeEnd // - NameSpace -// - Limit +// - SeriesLimit // - TagNameFilter // - AggregateQueryType // - RangeType // - Source +// - DocsLimit type AggregateQueryRawRequest struct { Query []byte `thrift:"query,1,required" db:"query" json:"query"` RangeStart int64 `thrift:"rangeStart,2,required" db:"rangeStart" json:"rangeStart"` RangeEnd int64 `thrift:"rangeEnd,3,required" db:"rangeEnd" json:"rangeEnd"` NameSpace []byte `thrift:"nameSpace,4,required" db:"nameSpace" json:"nameSpace"` - Limit *int64 `thrift:"limit,5" db:"limit" json:"limit,omitempty"` + SeriesLimit *int64 `thrift:"seriesLimit,5" db:"seriesLimit" json:"seriesLimit,omitempty"` TagNameFilter [][]byte `thrift:"tagNameFilter,6" db:"tagNameFilter" json:"tagNameFilter,omitempty"` AggregateQueryType AggregateQueryType `thrift:"aggregateQueryType,7" db:"aggregateQueryType" json:"aggregateQueryType,omitempty"` RangeType TimeType `thrift:"rangeType,8" db:"rangeType" json:"rangeType,omitempty"` Source []byte `thrift:"source,9" db:"source" json:"source,omitempty"` + DocsLimit *int64 `thrift:"docsLimit,10" db:"docsLimit" json:"docsLimit,omitempty"` } func NewAggregateQueryRawRequest() *AggregateQueryRawRequest { @@ -9651,13 +9653,13 @@ func (p *AggregateQueryRawRequest) GetNameSpace() []byte { return p.NameSpace } -var AggregateQueryRawRequest_Limit_DEFAULT int64 +var AggregateQueryRawRequest_SeriesLimit_DEFAULT int64 -func (p *AggregateQueryRawRequest) GetLimit() int64 { - if !p.IsSetLimit() { - return AggregateQueryRawRequest_Limit_DEFAULT +func (p *AggregateQueryRawRequest) GetSeriesLimit() int64 { + if !p.IsSetSeriesLimit() { + return AggregateQueryRawRequest_SeriesLimit_DEFAULT } - return *p.Limit + return *p.SeriesLimit } var AggregateQueryRawRequest_TagNameFilter_DEFAULT [][]byte @@ -9683,8 +9685,17 @@ var AggregateQueryRawRequest_Source_DEFAULT []byte func (p *AggregateQueryRawRequest) GetSource() []byte { return p.Source } -func (p *AggregateQueryRawRequest) IsSetLimit() bool { - return p.Limit != nil + +var AggregateQueryRawRequest_DocsLimit_DEFAULT int64 + +func (p *AggregateQueryRawRequest) GetDocsLimit() int64 { + if !p.IsSetDocsLimit() { + return AggregateQueryRawRequest_DocsLimit_DEFAULT + } + return *p.DocsLimit +} +func (p *AggregateQueryRawRequest) IsSetSeriesLimit() bool { + return p.SeriesLimit != nil } func (p *AggregateQueryRawRequest) IsSetTagNameFilter() bool { @@ -9703,6 +9714,10 @@ func (p *AggregateQueryRawRequest) IsSetSource() bool { return p.Source != nil } +func (p *AggregateQueryRawRequest) IsSetDocsLimit() bool { + return p.DocsLimit != nil +} + func (p *AggregateQueryRawRequest) Read(iprot thrift.TProtocol) error { if _, err := iprot.ReadStructBegin(); err != nil { return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) @@ -9762,6 +9777,10 @@ func (p *AggregateQueryRawRequest) Read(iprot thrift.TProtocol) error { if err := p.ReadField9(iprot); err != nil { return err } + case 10: + if err := p.ReadField10(iprot); err != nil { + return err + } default: if err := iprot.Skip(fieldTypeId); err != nil { return err @@ -9829,7 +9848,7 @@ func (p *AggregateQueryRawRequest) ReadField5(iprot thrift.TProtocol) error { if v, err := iprot.ReadI64(); err != nil { return thrift.PrependError("error reading field 5: ", err) } else { - p.Limit = &v + p.SeriesLimit = &v } return nil } @@ -9885,6 +9904,15 @@ func (p *AggregateQueryRawRequest) ReadField9(iprot thrift.TProtocol) error { return nil } +func (p *AggregateQueryRawRequest) ReadField10(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 10: ", err) + } else { + p.DocsLimit = &v + } + return nil +} + func (p *AggregateQueryRawRequest) Write(oprot thrift.TProtocol) error { if err := oprot.WriteStructBegin("AggregateQueryRawRequest"); err != nil { return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) @@ -9917,6 +9945,9 @@ func (p *AggregateQueryRawRequest) Write(oprot thrift.TProtocol) error { if err := p.writeField9(oprot); err != nil { return err } + if err := p.writeField10(oprot); err != nil { + return err + } } if err := oprot.WriteFieldStop(); err != nil { return thrift.PrependError("write field stop error: ", err) @@ -9980,15 +10011,15 @@ func (p *AggregateQueryRawRequest) writeField4(oprot thrift.TProtocol) (err erro } func (p *AggregateQueryRawRequest) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetLimit() { - if err := oprot.WriteFieldBegin("limit", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:limit: ", p), err) + if p.IsSetSeriesLimit() { + if err := oprot.WriteFieldBegin("seriesLimit", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:seriesLimit: ", p), err) } - if err := oprot.WriteI64(int64(*p.Limit)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.limit (5) field write error: ", p), err) + if err := oprot.WriteI64(int64(*p.SeriesLimit)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.seriesLimit (5) field write error: ", p), err) } if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:limit: ", p), err) + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:seriesLimit: ", p), err) } } return err @@ -10062,6 +10093,21 @@ func (p *AggregateQueryRawRequest) writeField9(oprot thrift.TProtocol) (err erro return err } +func (p *AggregateQueryRawRequest) writeField10(oprot thrift.TProtocol) (err error) { + if p.IsSetDocsLimit() { + if err := oprot.WriteFieldBegin("docsLimit", thrift.I64, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:docsLimit: ", p), err) + } + if err := oprot.WriteI64(int64(*p.DocsLimit)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.docsLimit (10) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:docsLimit: ", p), err) + } + } + return err +} + func (p *AggregateQueryRawRequest) String() string { if p == nil { return "" @@ -10492,21 +10538,23 @@ func (p *AggregateQueryRawResultTagValueElement) String() string { // - RangeStart // - RangeEnd // - NameSpace -// - Limit +// - SeriesLimit // - TagNameFilter // - AggregateQueryType // - RangeType // - Source +// - DocsLimit type AggregateQueryRequest struct { Query *Query `thrift:"query,1" db:"query" json:"query,omitempty"` RangeStart int64 `thrift:"rangeStart,2,required" db:"rangeStart" json:"rangeStart"` RangeEnd int64 `thrift:"rangeEnd,3,required" db:"rangeEnd" json:"rangeEnd"` NameSpace string `thrift:"nameSpace,4,required" db:"nameSpace" json:"nameSpace"` - Limit *int64 `thrift:"limit,5" db:"limit" json:"limit,omitempty"` + SeriesLimit *int64 `thrift:"seriesLimit,5" db:"seriesLimit" json:"seriesLimit,omitempty"` TagNameFilter []string `thrift:"tagNameFilter,6" db:"tagNameFilter" json:"tagNameFilter,omitempty"` AggregateQueryType AggregateQueryType `thrift:"aggregateQueryType,7" db:"aggregateQueryType" json:"aggregateQueryType,omitempty"` RangeType TimeType `thrift:"rangeType,8" db:"rangeType" json:"rangeType,omitempty"` Source []byte `thrift:"source,9" db:"source" json:"source,omitempty"` + DocsLimit *int64 `thrift:"docsLimit,10" db:"docsLimit" json:"docsLimit,omitempty"` } func NewAggregateQueryRequest() *AggregateQueryRequest { @@ -10538,13 +10586,13 @@ func (p *AggregateQueryRequest) GetNameSpace() string { return p.NameSpace } -var AggregateQueryRequest_Limit_DEFAULT int64 +var AggregateQueryRequest_SeriesLimit_DEFAULT int64 -func (p *AggregateQueryRequest) GetLimit() int64 { - if !p.IsSetLimit() { - return AggregateQueryRequest_Limit_DEFAULT +func (p *AggregateQueryRequest) GetSeriesLimit() int64 { + if !p.IsSetSeriesLimit() { + return AggregateQueryRequest_SeriesLimit_DEFAULT } - return *p.Limit + return *p.SeriesLimit } var AggregateQueryRequest_TagNameFilter_DEFAULT []string @@ -10570,12 +10618,21 @@ var AggregateQueryRequest_Source_DEFAULT []byte func (p *AggregateQueryRequest) GetSource() []byte { return p.Source } + +var AggregateQueryRequest_DocsLimit_DEFAULT int64 + +func (p *AggregateQueryRequest) GetDocsLimit() int64 { + if !p.IsSetDocsLimit() { + return AggregateQueryRequest_DocsLimit_DEFAULT + } + return *p.DocsLimit +} func (p *AggregateQueryRequest) IsSetQuery() bool { return p.Query != nil } -func (p *AggregateQueryRequest) IsSetLimit() bool { - return p.Limit != nil +func (p *AggregateQueryRequest) IsSetSeriesLimit() bool { + return p.SeriesLimit != nil } func (p *AggregateQueryRequest) IsSetTagNameFilter() bool { @@ -10594,6 +10651,10 @@ func (p *AggregateQueryRequest) IsSetSource() bool { return p.Source != nil } +func (p *AggregateQueryRequest) IsSetDocsLimit() bool { + return p.DocsLimit != nil +} + func (p *AggregateQueryRequest) Read(iprot thrift.TProtocol) error { if _, err := iprot.ReadStructBegin(); err != nil { return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) @@ -10651,6 +10712,10 @@ func (p *AggregateQueryRequest) Read(iprot thrift.TProtocol) error { if err := p.ReadField9(iprot); err != nil { return err } + case 10: + if err := p.ReadField10(iprot); err != nil { + return err + } default: if err := iprot.Skip(fieldTypeId); err != nil { return err @@ -10714,7 +10779,7 @@ func (p *AggregateQueryRequest) ReadField5(iprot thrift.TProtocol) error { if v, err := iprot.ReadI64(); err != nil { return thrift.PrependError("error reading field 5: ", err) } else { - p.Limit = &v + p.SeriesLimit = &v } return nil } @@ -10770,6 +10835,15 @@ func (p *AggregateQueryRequest) ReadField9(iprot thrift.TProtocol) error { return nil } +func (p *AggregateQueryRequest) ReadField10(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 10: ", err) + } else { + p.DocsLimit = &v + } + return nil +} + func (p *AggregateQueryRequest) Write(oprot thrift.TProtocol) error { if err := oprot.WriteStructBegin("AggregateQueryRequest"); err != nil { return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) @@ -10802,6 +10876,9 @@ func (p *AggregateQueryRequest) Write(oprot thrift.TProtocol) error { if err := p.writeField9(oprot); err != nil { return err } + if err := p.writeField10(oprot); err != nil { + return err + } } if err := oprot.WriteFieldStop(); err != nil { return thrift.PrependError("write field stop error: ", err) @@ -10867,15 +10944,15 @@ func (p *AggregateQueryRequest) writeField4(oprot thrift.TProtocol) (err error) } func (p *AggregateQueryRequest) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetLimit() { - if err := oprot.WriteFieldBegin("limit", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:limit: ", p), err) + if p.IsSetSeriesLimit() { + if err := oprot.WriteFieldBegin("seriesLimit", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:seriesLimit: ", p), err) } - if err := oprot.WriteI64(int64(*p.Limit)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.limit (5) field write error: ", p), err) + if err := oprot.WriteI64(int64(*p.SeriesLimit)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.seriesLimit (5) field write error: ", p), err) } if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:limit: ", p), err) + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:seriesLimit: ", p), err) } } return err @@ -10949,6 +11026,21 @@ func (p *AggregateQueryRequest) writeField9(oprot thrift.TProtocol) (err error) return err } +func (p *AggregateQueryRequest) writeField10(oprot thrift.TProtocol) (err error) { + if p.IsSetDocsLimit() { + if err := oprot.WriteFieldBegin("docsLimit", thrift.I64, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:docsLimit: ", p), err) + } + if err := oprot.WriteI64(int64(*p.DocsLimit)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.docsLimit (10) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:docsLimit: ", p), err) + } + } + return err +} + func (p *AggregateQueryRequest) String() string { if p == nil { return "" diff --git a/src/dbnode/generated/thrift/rpc/rpc_mock.go b/src/dbnode/generated/thrift/rpc/rpc_mock.go index a7be5ca909..3d5f1f0ff3 100644 --- a/src/dbnode/generated/thrift/rpc/rpc_mock.go +++ b/src/dbnode/generated/thrift/rpc/rpc_mock.go @@ -1,7 +1,7 @@ // Code generated by MockGen. DO NOT EDIT. // Source: github.com/m3db/m3/src/dbnode/generated/thrift/rpc/tchan-go -// Copyright (c) 2020 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/dbnode/network/server/tchannelthrift/convert/convert.go b/src/dbnode/network/server/tchannelthrift/convert/convert.go index ed7b9a7fd0..f62e60101b 100644 --- a/src/dbnode/network/server/tchannelthrift/convert/convert.go +++ b/src/dbnode/network/server/tchannelthrift/convert/convert.go @@ -227,7 +227,7 @@ func FromRPCFetchTaggedRequest( EndExclusive: end, RequireExhaustive: req.RequireExhaustive, } - if l := req.Limit; l != nil { + if l := req.SeriesLimit; l != nil { opts.SeriesLimit = int(*l) } if l := req.DocsLimit; l != nil { @@ -285,7 +285,7 @@ func ToRPCFetchTaggedRequest( if opts.SeriesLimit > 0 { l := int64(opts.SeriesLimit) - request.Limit = &l + request.SeriesLimit = &l } if opts.DocsLimit > 0 { @@ -320,9 +320,13 @@ func FromRPCAggregateQueryRequest( EndExclusive: end, }, } - if l := req.Limit; l != nil { + if l := req.SeriesLimit; l != nil { opts.SeriesLimit = int(*l) } + if l := req.DocsLimit; l != nil { + opts.DocsLimit = int(*l) + } + if len(req.Source) > 0 { opts.Source = req.Source } @@ -368,12 +372,17 @@ func FromRPCAggregateQueryRawRequest( EndExclusive: end, }, } - if l := req.Limit; l != nil { + if l := req.SeriesLimit; l != nil { opts.SeriesLimit = int(*l) } + if l := req.DocsLimit; l != nil { + opts.DocsLimit = int(*l) + } + if len(req.Source) > 0 { opts.Source = req.Source } + query, err := idx.Unmarshal(req.Query) if err != nil { return nil, index.Query{}, index.AggregationOptions{}, err @@ -420,7 +429,11 @@ func ToRPCAggregateQueryRawRequest( if opts.SeriesLimit > 0 { l := int64(opts.SeriesLimit) - request.Limit = &l + request.SeriesLimit = &l + } + if opts.DocsLimit > 0 { + l := int64(opts.DocsLimit) + request.DocsLimit = &l } if len(opts.Source) > 0 { diff --git a/src/dbnode/network/server/tchannelthrift/convert/convert_test.go b/src/dbnode/network/server/tchannelthrift/convert/convert_test.go index 2c33c2dd06..f150827389 100644 --- a/src/dbnode/network/server/tchannelthrift/convert/convert_test.go +++ b/src/dbnode/network/server/tchannelthrift/convert/convert_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2018 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -101,20 +101,25 @@ func conjunctionQueryATestCase(t *testing.T) (idx.Query, []byte) { } func TestConvertFetchTaggedRequest(t *testing.T) { + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) ns := ident.StringID("abc") opts := index.QueryOptions{ StartInclusive: time.Now().Add(-900 * time.Hour), EndExclusive: time.Now(), - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), } fetchData := true - var limit int64 = 10 requestSkeleton := &rpc.FetchTaggedRequest{ - NameSpace: ns.Bytes(), - RangeStart: mustToRpcTime(t, opts.StartInclusive), - RangeEnd: mustToRpcTime(t, opts.EndExclusive), - FetchData: fetchData, - Limit: &limit, + NameSpace: ns.Bytes(), + RangeStart: mustToRpcTime(t, opts.StartInclusive), + RangeEnd: mustToRpcTime(t, opts.EndExclusive), + FetchData: fetchData, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, } requireEqual := func(a, b interface{}) { d := cmp.Diff(a, b) @@ -166,12 +171,17 @@ func TestConvertFetchTaggedRequest(t *testing.T) { } func TestConvertAggregateRawQueryRequest(t *testing.T) { - ns := ident.StringID("abc") + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ns = ident.StringID("abc") + ) opts := index.AggregationOptions{ QueryOptions: index.QueryOptions{ StartInclusive: time.Now().Add(-900 * time.Hour), EndExclusive: time.Now(), - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }, Type: index.AggregateTagNamesAndValues, FieldFilter: index.AggregateFieldFilter{ @@ -179,12 +189,12 @@ func TestConvertAggregateRawQueryRequest(t *testing.T) { []byte("string"), }, } - var limit int64 = 10 requestSkeleton := &rpc.AggregateQueryRawRequest{ - NameSpace: ns.Bytes(), - RangeStart: mustToRpcTime(t, opts.StartInclusive), - RangeEnd: mustToRpcTime(t, opts.EndExclusive), - Limit: &limit, + NameSpace: ns.Bytes(), + RangeStart: mustToRpcTime(t, opts.StartInclusive), + RangeEnd: mustToRpcTime(t, opts.EndExclusive), + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, TagNameFilter: [][]byte{ []byte("some"), []byte("string"), diff --git a/src/dbnode/network/server/tchannelthrift/node/service_test.go b/src/dbnode/network/server/tchannelthrift/node/service_test.go index 1996795609..65567c26d8 100644 --- a/src/dbnode/network/server/tchannelthrift/node/service_test.go +++ b/src/dbnode/network/server/tchannelthrift/node/service_test.go @@ -1632,7 +1632,10 @@ func TestServiceFetchTagged(t *testing.T) { index.QueryResultsOptions{}, testIndexOptions) resMap.Map().Set(md1.ID, doc.NewDocumentFromMetadata(md1)) resMap.Map().Set(md2.ID, doc.NewDocumentFromMetadata(md2)) - + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) mockDB.EXPECT().QueryIDs( gomock.Any(), ident.NewIDMatcher(nsID), @@ -1640,23 +1643,25 @@ func TestServiceFetchTagged(t *testing.T) { index.QueryOptions{ StartInclusive: start, EndExclusive: end, - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }).Return(index.QueryResult{Results: resMap, Exhaustive: true}, nil) startNanos, err := convert.ToValue(start, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + data, err := idx.Marshal(req) require.NoError(t, err) r, err := service.FetchTagged(tctx, &rpc.FetchTaggedRequest{ - NameSpace: []byte(nsID), - Query: data, - RangeStart: startNanos, - RangeEnd: endNanos, - FetchData: true, - Limit: &limit, + NameSpace: []byte(nsID), + Query: data, + RangeStart: startNanos, + RangeEnd: endNanos, + FetchData: true, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, }) require.NoError(t, err) @@ -1764,16 +1769,20 @@ func TestServiceFetchTaggedIsOverloaded(t *testing.T) { require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) data, err := idx.Marshal(req) require.NoError(t, err) _, err = service.FetchTagged(tctx, &rpc.FetchTaggedRequest{ - NameSpace: []byte(nsID), - Query: data, - RangeStart: startNanos, - RangeEnd: endNanos, - FetchData: true, - Limit: &limit, + NameSpace: []byte(nsID), + Query: data, + RangeStart: startNanos, + RangeEnd: endNanos, + FetchData: true, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, }) require.Equal(t, tterrors.NewInternalError(errServerIsOverloaded), err) } @@ -1804,17 +1813,21 @@ func TestServiceFetchTaggedDatabaseNotSet(t *testing.T) { require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) data, err := idx.Marshal(req) require.NoError(t, err) _, err = service.FetchTagged(tctx, &rpc.FetchTaggedRequest{ - NameSpace: []byte(nsID), - Query: data, - RangeStart: startNanos, - RangeEnd: endNanos, - FetchData: true, - Limit: &limit, + NameSpace: []byte(nsID), + Query: data, + RangeStart: startNanos, + RangeEnd: endNanos, + FetchData: true, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, }) require.Equal(t, tterrors.NewInternalError(errDatabaseIsNotInitializedYet), err) } @@ -1856,7 +1869,10 @@ func TestServiceFetchTaggedNoData(t *testing.T) { index.QueryResultsOptions{}, testIndexOptions) resMap.Map().Set(md1.ID, doc.NewDocumentFromMetadata(md1)) resMap.Map().Set(md2.ID, doc.NewDocumentFromMetadata(md2)) - + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) mockDB.EXPECT().QueryIDs( ctx, ident.NewIDMatcher(nsID), @@ -1864,23 +1880,25 @@ func TestServiceFetchTaggedNoData(t *testing.T) { index.QueryOptions{ StartInclusive: start, EndExclusive: end, - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }).Return(index.QueryResult{Results: resMap, Exhaustive: true}, nil) startNanos, err := convert.ToValue(start, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + data, err := idx.Marshal(req) require.NoError(t, err) r, err := service.FetchTagged(tctx, &rpc.FetchTaggedRequest{ - NameSpace: []byte(nsID), - Query: data, - RangeStart: startNanos, - RangeEnd: endNanos, - FetchData: false, - Limit: &limit, + NameSpace: []byte(nsID), + Query: data, + RangeStart: startNanos, + RangeEnd: endNanos, + FetchData: false, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, }) require.NoError(t, err) @@ -1922,8 +1940,10 @@ func TestServiceFetchTaggedErrs(t *testing.T) { require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 - + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) req, err := idx.NewRegexpQuery([]byte("foo"), []byte("b.*")) require.NoError(t, err) data, err := idx.Marshal(req) @@ -1937,15 +1957,17 @@ func TestServiceFetchTaggedErrs(t *testing.T) { index.QueryOptions{ StartInclusive: start, EndExclusive: end, - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }).Return(index.QueryResult{}, fmt.Errorf("random err")) _, err = service.FetchTagged(tctx, &rpc.FetchTaggedRequest{ - NameSpace: []byte(nsID), - Query: data, - RangeStart: startNanos, - RangeEnd: endNanos, - FetchData: false, - Limit: &limit, + NameSpace: []byte(nsID), + Query: data, + RangeStart: startNanos, + RangeEnd: endNanos, + FetchData: false, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, }) require.Error(t, err) } @@ -2022,7 +2044,10 @@ func TestServiceFetchTaggedReturnOnFirstErr(t *testing.T) { resMap := index.NewQueryResults(ident.StringID(nsID), index.QueryResultsOptions{}, testIndexOptions) resMap.Map().Set(md1.ID, doc.NewDocumentFromMetadata(md1)) - + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) mockDB.EXPECT().QueryIDs( gomock.Any(), ident.NewIDMatcher(nsID), @@ -2030,23 +2055,25 @@ func TestServiceFetchTaggedReturnOnFirstErr(t *testing.T) { index.QueryOptions{ StartInclusive: start, EndExclusive: end, - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }).Return(index.QueryResult{Results: resMap, Exhaustive: true}, nil) startNanos, err := convert.ToValue(start, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + data, err := idx.Marshal(req) require.NoError(t, err) _, err = service.FetchTagged(tctx, &rpc.FetchTaggedRequest{ - NameSpace: []byte(nsID), - Query: data, - RangeStart: startNanos, - RangeEnd: endNanos, - FetchData: true, - Limit: &limit, + NameSpace: []byte(nsID), + Query: data, + RangeStart: startNanos, + RangeEnd: endNanos, + FetchData: true, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, }) require.Error(t, err) } @@ -2080,6 +2107,11 @@ func TestServiceAggregate(t *testing.T) { resMap.Map().Set(ident.StringID("foo"), index.MustNewAggregateValues(testIndexOptions)) resMap.Map().Set(ident.StringID("bar"), index.MustNewAggregateValues(testIndexOptions, ident.StringID("baz"), ident.StringID("barf"))) + + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) mockDB.EXPECT().AggregateQuery( ctx, ident.NewIDMatcher(nsID), @@ -2088,7 +2120,8 @@ func TestServiceAggregate(t *testing.T) { QueryOptions: index.QueryOptions{ StartInclusive: start, EndExclusive: end, - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }, FieldFilter: index.AggregateFieldFilter{ []byte("foo"), []byte("bar"), @@ -2101,7 +2134,7 @@ func TestServiceAggregate(t *testing.T) { require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + data, err := idx.Marshal(req) require.NoError(t, err) r, err := service.AggregateRaw(tctx, &rpc.AggregateQueryRawRequest{ @@ -2109,7 +2142,8 @@ func TestServiceAggregate(t *testing.T) { Query: data, RangeStart: startNanos, RangeEnd: endNanos, - Limit: &limit, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, AggregateQueryType: rpc.AggregateQueryType_AGGREGATE_BY_TAG_NAME_VALUE, TagNameFilter: [][]byte{ []byte("foo"), []byte("bar"), @@ -2163,6 +2197,10 @@ func TestServiceAggregateNameOnly(t *testing.T) { index.AggregateResultsOptions{}, testIndexOptions) resMap.Map().Set(ident.StringID("foo"), index.AggregateValues{}) resMap.Map().Set(ident.StringID("bar"), index.AggregateValues{}) + var ( + seriesLimit int64 = 10 + docsLimit int64 = 10 + ) mockDB.EXPECT().AggregateQuery( ctx, ident.NewIDMatcher(nsID), @@ -2171,7 +2209,8 @@ func TestServiceAggregateNameOnly(t *testing.T) { QueryOptions: index.QueryOptions{ StartInclusive: start, EndExclusive: end, - SeriesLimit: 10, + SeriesLimit: int(seriesLimit), + DocsLimit: int(docsLimit), }, FieldFilter: index.AggregateFieldFilter{ []byte("foo"), []byte("bar"), @@ -2184,7 +2223,7 @@ func TestServiceAggregateNameOnly(t *testing.T) { require.NoError(t, err) endNanos, err := convert.ToValue(end, rpc.TimeType_UNIX_NANOSECONDS) require.NoError(t, err) - var limit int64 = 10 + data, err := idx.Marshal(req) require.NoError(t, err) r, err := service.AggregateRaw(tctx, &rpc.AggregateQueryRawRequest{ @@ -2192,7 +2231,8 @@ func TestServiceAggregateNameOnly(t *testing.T) { Query: data, RangeStart: startNanos, RangeEnd: endNanos, - Limit: &limit, + SeriesLimit: &seriesLimit, + DocsLimit: &docsLimit, AggregateQueryType: rpc.AggregateQueryType_AGGREGATE_BY_TAG_NAME, TagNameFilter: [][]byte{ []byte("foo"), []byte("bar"), From 1125eb4ded59102436d7a160cc40df4431a6376d Mon Sep 17 00:00:00 2001 From: Chris Chinchilla Date: Fri, 22 Jan 2021 12:07:03 +0100 Subject: [PATCH 13/13] [DOCS] Update to cluster docs (#3084) * Some cluster section overhaul Signed-off-by: ChrisChinchilla * Add minikube note Signed-off-by: ChrisChinchilla * Add query config Signed-off-by: ChrisChinchilla * inalise changes Signed-off-by: ChrisChinchilla * Fix broken links Signed-off-by: ChrisChinchilla --- site/content/cluster/binaries_cluster.md | 56 +--- site/content/cluster/kubernetes_cluster.md | 288 +++++++++++++++++- site/content/includes/cluster-architecture.md | 31 ++ site/content/includes/cluster-common-steps.md | 29 ++ .../includes/m3query/annotated_config.yaml | 246 +++++++++++++++ site/content/operator/api.md | 39 +-- 6 files changed, 606 insertions(+), 83 deletions(-) create mode 100644 site/content/includes/cluster-architecture.md create mode 100644 site/content/includes/m3query/annotated_config.yaml diff --git a/site/content/cluster/binaries_cluster.md b/site/content/cluster/binaries_cluster.md index 5c2ac410b1..961a793c1c 100644 --- a/site/content/cluster/binaries_cluster.md +++ b/site/content/cluster/binaries_cluster.md @@ -10,30 +10,7 @@ This guide shows you the steps involved in creating an M3 cluster using M3 binar This guide assumes you have read the [quickstart](/docs/quickstart/binaries), and builds upon the concepts in that guide. {{% /notice %}} -## M3 Architecture - -Here's a typical M3 deployment: - - - -![Typical Deployment](/cluster_architecture.png) - -An M3 deployment typically has two main node types: - -- **Coordinator node**: `m3coordinator` nodes coordinate reads and writes across all nodes in the cluster. It's a lightweight process, and does not store any data. This role typically runs alongside a Prometheus instance, or is part of a collector agent such as statsD. -- **Storage node**: The `m3dbnode` processes are the workhorses of M3, they store data and serve reads and writes. - -A `m3coordinator` node exposes two ports: - -- `7201` to manage the cluster topology, you make most API calls to this endpoint -- `7203` for Prometheus to scrape the metrics produced by M3DB and M3Coordinator - -## Prerequisites - -M3 uses [etcd](https://etcd.io/) as a distributed key-value storage for the following functions: - -- Update cluster configuration in realtime -- Manage placements for distributed and sharded clusters +{{< fileinclude file="cluster-architecture.md" >}} ## Download and Install a Binary @@ -52,8 +29,6 @@ You can download the latest release as [pre-compiled binaries from the M3 GitHub ## Provision a Host -Enough background, let's create a real cluster! - M3 in production can run on local or cloud-based VMs, or bare-metal servers. M3 supports all popular Linux distributions (Ubuntu, RHEL, CentOS), and [let us know](https://github.com/m3db/m3/issues/new/choose) if you have any issues with your preferred distribution. ### Network @@ -236,35 +211,6 @@ curl -X POST {{% apiendpoint %}}database/create -d '{ If you need to setup multiple namespaces, you can run the command above multiple times with different namespace configurations. -### Ready a Namespace - -Once a namespace has finished bootstrapping, you must mark it as ready before receiving traffic by using the _{{% apiendpoint %}}namespace/ready_. - -{{< tabs name="ready_namespaces" >}} -{{% tab name="Command" %}} - -{{< codeinclude file="docs/includes/quickstart/ready-namespace.sh" language="shell" >}} - -{{% /tab %}} -{{% tab name="Output" %}} - -```json -{ - "ready": true -} -``` - -{{% /tab %}} -{{< /tabs >}} - -### Replication factor - -We recommend a replication factor of **3**, with each replica spread across failure domains such as a physical server rack, data center or availability zone. Read our [replication factor recommendations](/docs/operational_guide/replication_and_deployment_in_zones) for more details. - -### Shards - -Read the [placement configuration guide](/docs/operational_guide/placement_configuration) to determine the appropriate number of shards to specify. - {{< fileinclude file="cluster-common-steps.md" >}} + + +## Organizing Data with Placements and Namespaces + +A time series database (TSDBs) typically consist of one node (or instance) to store metrics data. This setup is simple to use but has issues with scalability over time as the quantity of metrics data written and read increases. + +As a distributed TSDB, M3 helps solve this problem by spreading metrics data, and demand for that data, across multiple nodes in a cluster. M3 does this by splitting data into segments that match certain criteria (such as above a certain value) across nodes into shards. + + + +If you've worked with a distributed database before, then these concepts are probably familiar to you, but M3 uses different terminology to represent some concepts. + +- Every cluster has **one** placement that maps shards to nodes in the cluster. +- A cluster can have **0 or more** namespaces that are similar conceptually to tables in other databases, and each node serves every namespace for the shards it owns. + + + +For example, if the cluster placement states that node A owns shards 1, 2, and 3, then node A owns shards 1, 2, 3 for all configured namespaces in the cluster. Each namespace has its own configuration options, including a name and retention time for the data. + +## Create a Placement and Namespace + +This quickstart uses the _{{% apiendpoint %}}database/create_ endpoint that creates a namespace, and the placement if it doesn't already exist based on the `type` argument. + +You can create [placements](/docs/operational_guide/placement_configuration/) and [namespaces](/docs/operational_guide/namespace_configuration/#advanced-hard-way) separately if you need more control over their settings. + +In another terminal, use the following command. + +{{< tabs name="create_placement_namespace" >}} +{{< tab name="Command" >}} + +{{< codeinclude file="docs/includes/create-database.sh" language="shell" >}} + +{{< /tab >}} +{{% tab name="Output" %}} + +```json +{ + "namespace": { + "registry": { + "namespaces": { + "default": { + "bootstrapEnabled": true, + "flushEnabled": true, + "writesToCommitLog": true, + "cleanupEnabled": true, + "repairEnabled": false, + "retentionOptions": { + "retentionPeriodNanos": "43200000000000", + "blockSizeNanos": "1800000000000", + "bufferFutureNanos": "120000000000", + "bufferPastNanos": "600000000000", + "blockDataExpiry": true, + "blockDataExpiryAfterNotAccessPeriodNanos": "300000000000", + "futureRetentionPeriodNanos": "0" + }, + "snapshotEnabled": true, + "indexOptions": { + "enabled": true, + "blockSizeNanos": "1800000000000" + }, + "schemaOptions": null, + "coldWritesEnabled": false, + "runtimeOptions": null + } + } + } + }, + "placement": { + "placement": { + "instances": { + "m3db_local": { + "id": "m3db_local", + "isolationGroup": "local", + "zone": "embedded", + "weight": 1, + "endpoint": "127.0.0.1:9000", + "shards": [ + { + "id": 0, + "state": "INITIALIZING", + "sourceId": "", + "cutoverNanos": "0", + "cutoffNanos": "0" + }, + … + { + "id": 63, + "state": "INITIALIZING", + "sourceId": "", + "cutoverNanos": "0", + "cutoffNanos": "0" + } + ], + "shardSetId": 0, + "hostname": "localhost", + "port": 9000, + "metadata": { + "debugPort": 0 + } + } + }, + "replicaFactor": 1, + "numShards": 64, + "isSharded": true, + "cutoverTime": "0", + "isMirrored": false, + "maxShardSetId": 0 + }, + "version": 0 + } +} +``` + +{{% /tab %}} +{{< /tabs >}} + +Placement initialization can take a minute or two. Once all the shards have the `AVAILABLE` state, the node has finished bootstrapping, and you should see the following messages in the node console output. + + + +```shell +{"level":"info","ts":1598367624.0117292,"msg":"bootstrap marking all shards as bootstrapped","namespace":"default","namespace":"default","numShards":64} +{"level":"info","ts":1598367624.0301404,"msg":"bootstrap index with bootstrapped index segments","namespace":"default","numIndexBlocks":0} +{"level":"info","ts":1598367624.0301914,"msg":"bootstrap success","numShards":64,"bootstrapDuration":0.049208827} +{"level":"info","ts":1598367624.03023,"msg":"bootstrapped"} +``` + +You can check on the status by calling the _{{% apiendpoint %}}services/m3db/placement_ endpoint: + +{{< tabs name="check_placement" >}} +{{% tab name="Command" %}} + +```shell +curl {{% apiendpoint %}}services/m3db/placement | jq . +``` + +{{% /tab %}} +{{% tab name="Output" %}} + +```json +{ + "placement": { + "instances": { + "m3db_local": { + "id": "m3db_local", + "isolationGroup": "local", + "zone": "embedded", + "weight": 1, + "endpoint": "127.0.0.1:9000", + "shards": [ + { + "id": 0, + "state": "AVAILABLE", + "sourceId": "", + "cutoverNanos": "0", + "cutoffNanos": "0" + }, + … + { + "id": 63, + "state": "AVAILABLE", + "sourceId": "", + "cutoverNanos": "0", + "cutoffNanos": "0" + } + ], + "shardSetId": 0, + "hostname": "localhost", + "port": 9000, + "metadata": { + "debugPort": 0 + } + } + }, + "replicaFactor": 1, + "numShards": 64, + "isSharded": true, + "cutoverTime": "0", + "isMirrored": false, + "maxShardSetId": 0 + }, + "version": 2 +} +``` + +{{% /tab %}} +{{< /tabs >}} + +{{% notice tip %}} +[Read more about the bootstrapping process](/docs/operational_guide/bootstrapping_crash_recovery/). +{{% /notice %}} + +### Ready a Namespace + +Once a namespace has finished bootstrapping, you must mark it as ready before receiving traffic by using the _{{% apiendpoint %}}services/m3db/namespace/ready_. + +{{< tabs name="ready_namespaces" >}} +{{% tab name="Command" %}} + +{{% codeinclude file="docs/includes/quickstart/ready-namespace.sh" language="shell" %}} + +{{% /tab %}} +{{% tab name="Output" %}} + +```json +{ +"ready": true +} +``` + +{{% /tab %}} +{{< /tabs >}} + +### View Details of a Namespace + +You can also view the attributes of all namespaces by calling the _{{% apiendpoint %}}services/m3db/namespace_ endpoint + +{{< tabs name="check_namespaces" >}} +{{% tab name="Command" %}} + +```shell +curl {{% apiendpoint %}}services/m3db/namespace | jq . +``` + +{{% notice tip %}} +Add `?debug=1` to the request to convert nano units in the output into standard units. +{{% /notice %}} + +{{% /tab %}} +{{% tab name="Output" %}} + +```json +{ + "registry": { + "namespaces": { + "default": { + "bootstrapEnabled": true, + "flushEnabled": true, + "writesToCommitLog": true, + "cleanupEnabled": true, + "repairEnabled": false, + "retentionOptions": { + "retentionPeriodNanos": "43200000000000", + "blockSizeNanos": "1800000000000", + "bufferFutureNanos": "120000000000", + "bufferPastNanos": "600000000000", + "blockDataExpiry": true, + "blockDataExpiryAfterNotAccessPeriodNanos": "300000000000", + "futureRetentionPeriodNanos": "0" + }, + "snapshotEnabled": true, + "indexOptions": { + "enabled": true, + "blockSizeNanos": "1800000000000" + }, + "schemaOptions": null, + "coldWritesEnabled": false, + "runtimeOptions": null + } + } + } +} +``` + +{{% /tab %}} +{{< /tabs >}} + {{< fileinclude file="cluster-common-steps.md" >}} \ No newline at end of file diff --git a/site/content/includes/cluster-architecture.md b/site/content/includes/cluster-architecture.md new file mode 100644 index 0000000000..d715c2743d --- /dev/null +++ b/site/content/includes/cluster-architecture.md @@ -0,0 +1,31 @@ +## M3 Architecture + + + +![Typical Deployment](/cluster_architecture.png) + +### Node types + +An M3 deployment typically has two main node types: + +- **[Storage nodes](/docs/m3db)** (`m3dbnode`) are the workhorses of M3, they store data and serve reads and writes. +- **[Coordinator nodes](/docs/m3coordinator)** (`m3coordinator`) coordinate reads and writes across all nodes in the cluster. It's a lightweight process, and does not store any data. This role typically runs alongside a Prometheus instance, or is part of a collector agent such as statsD. + +A `m3coordinator` node exposes two external ports: + +- `7201` to manage the cluster topology, you make most API calls to this endpoint +- `7203` for Prometheus to scrape the metrics produced by M3DB and M3Coordinator + +There are two other less-commonly used node types: + +- **[Query nodes](/docs/m3query)** (`m3query`) are an alternative query option to using M3's built-in PromQL support. +- **[Aggregator nodes](/docs/how_to/aggregator)** cluster and aggregate metrics before storing them in storage nodes. Coordinator nodes can also perform this role but are not cluster-aware. + + + +## Prerequisites + +M3 uses [etcd](https://etcd.io/) as a distributed key-value storage for the following functions: + +- Update cluster configuration in realtime +- Manage placements for distributed and sharded clusters \ No newline at end of file diff --git a/site/content/includes/cluster-common-steps.md b/site/content/includes/cluster-common-steps.md index 08e8c731d9..53b07dd82d 100644 --- a/site/content/includes/cluster-common-steps.md +++ b/site/content/includes/cluster-common-steps.md @@ -1,3 +1,32 @@ +### Ready a Namespace + +Once a namespace has finished bootstrapping, you must mark it as ready before receiving traffic by using the _{{% apiendpoint %}}namespace/ready_. + +{{< tabs name="ready_namespaces" >}} +{{% tab name="Command" %}} + +{{< codeinclude file="docs/includes/quickstart/ready-namespace.sh" language="shell" >}} + +{{% /tab %}} +{{% tab name="Output" %}} + +```json +{ + "ready": true +} +``` + +{{% /tab %}} +{{< /tabs >}} + +### Replication factor + +We recommend a replication factor of **3**, with each replica spread across failure domains such as a physical server rack, data center or availability zone. Read our [replication factor recommendations](/docs/operational_guide/replication_and_deployment_in_zones) for more details. + +### Shards + +Read the [placement configuration guide](/docs/operational_guide/placement_configuration) to determine the appropriate number of shards to specify. + ## Writing and Querying Metrics ### Writing Metrics diff --git a/site/content/includes/m3query/annotated_config.yaml b/site/content/includes/m3query/annotated_config.yaml new file mode 100644 index 0000000000..01c9c027fc --- /dev/null +++ b/site/content/includes/m3query/annotated_config.yaml @@ -0,0 +1,246 @@ +# The server listen address +listenAddress: + +# Metrics configuration +# TODO: Which is what? +metrics: + # Scope of metrics root + # TODO: Again, which is? + scope: + # Prefix prepended to metrics collected + prefix: + # Reporting frequendy of metrics collected + reportingInterval: + # Tags shared by metrics collected + tags: + # Configuration for a Prometheus reporter (if used) + prometheus: + # Metrics collection endpoint for application + # Default = "/metrics" + handlerPath: + # Listen address for metrics + # Default = "0.0.0.0:7203" + listenAddress: + # Metric sanitization type, valid options: [none, m3, prometheus] + # Default = "none" + sanitization: + # Sampling rate for metrics. min=0.0, max=1.0 + # TODO: What does this mean exactly? + samplingRate: + # Enable Go runtime metrics, valid options: [none, simple, moderate, detailed] + # See https://github.com/m3db/m3/blob/master/src/x/instrument/extended.go#L39:L64 for more details + extended: + +# Logging configuration +# TODO: More detail than this +# https://github.com/m3db/m3/blob/9f129cf9f16430cc5a399f60aa5684fb72b55bb5/src/cmd/services/m3query/config/config.go#L116 +logging: + level: info + +# Enables tracing, if nothing configured, tracing is disabled +tracing: + # Name for tracing service + serviceName: + # Tracing backen to use, valid options: [jaeger, lightstep] + backend: + # If using Jaeger, options to send to tracing backend + jaeger: + # If using Lightstep, options to send to tracing backend + lightstep: + +clusters: + - namespaces: + - namespace: default + type: unaggregated + retention: 48h + client: + config: + service: + # TODO: ? + env: default_env + # Availability zone, valid options: [user-defined, embedded] + zone: + # TODO: ?? + service: m3db + # Directory to store cached etcd data + cacheDir: + # Identify the etcd hosts this node should connect to + etcdClusters: + # TODO: Confusing, if you use embedded, why do you still need endpoints? + # TODO: Embedded vs seed nodes embedded?? + # Availability zone, valid options: [user-defined, embedded] + - zone: + # Member nodes of the etcd cluster, in form url:port + endpoints: + - + seedNodes: + initialCluster: + - hostID: m3db_local + endpoint: http://127.0.0.1:2380 + # The consistency level for writing to a cluster, valid options: [none, one, majority, all] + writeConsistencyLevel: + # The consistency level for reading from a cluster, valid options: [none, one, unstrict_majority, majority, unstrict_all, all] + readConsistencyLevel: + # The timeout for writing data + # TODO: Defaults? + writeTimeout: + # The fetch timeout for any given query + # Range = 30s to 5m + fetchTimeout: + # The cluster connect timeout + connectTimeout: + # Configuration for retrying write operations + writeRetry: + initialBackoff: + # Factor for exponential backoff + backoffFactor: + # Maximum backoff time + maxBackoff: + # Maximum retry attempts + maxRetries: + # Add randomness to wait intervals + jitter: + # Configuration for retrying fetch operations + # TODO: Query? + fetchRetry: + initialBackoff: + # Factor for exponential backoff + backoffFactor: + # Maximum backoff time + maxBackoff: + # Maximum retry attempts + maxRetries: + # Add randomness to wait intervals + jitter: + # The amount of times a background check fails before a connection is taken out of consideration + backgroundHealthCheckFailLimit: + # The factor of the host connect time when sleeping between a failed health check and the next check + backgroundHealthCheckFailThrottleFactor: + +# TODO: +local: + +# Configuration for the placemement, namespaces and database management endpoints. +clusterManagement: + # etcd client configuration + etcd: + # TODO: ? + env: default_env + # Availability zone, valid options: [user-defined, embedded] + zone: + # TODO: ?? + service: m3db + # Directory to store cached etcd data + cacheDir: + # Identify the etcd hosts this node should connect to + etcdClusters: + m3sd: + # The revision that watch requests start from + watchWithRevision: + newDirectoryNode: + retry: + # The timeout for etcd requests + requestTimeout: + # The timeout for a watchChan initialization + watchChanInitTimeout: + # Frequency to check if a watch chan is no longer subscribed and should be closed + watchChanCheckInterval: + # The delay before resetting the etcd watch chan + watchChanResetInterval: + +# TODO: +filter: + +# TODO: +rpc: + +# TODO: +backend: + +# The worker pool policy for read requests +readWorkerPoolPolicy: + # Worker pool automatically grows to capacity + grow: + # Static pool size, or initial size for dynamically growing pools + size: + +# The worker pool policy for write requests +writeWorkerPoolPolicy: + # Worker pool automatically grows to capacity + grow: + # Static pool size, or initial size for dynamically growing pools + size: + +# TODO: +writeForwarding: + +# TODO: +downsample: + +# TODO: +ingest: + +# Configuration for the carbon server +# TODO: Which is? +carbon: + ingester: + aggregateNamespacesAllData: + # A constant time to shift start by + shiftTimeStart: + # A constant time to shift end by + shiftTimeEnd: + # A constant set of steps to shift start by + shiftStepsStart: + # A constant set of steps to shift end by + shiftStepsEnd: + # A constant set of steps to shift start by, if and only if, the end is an exact match to the resolution boundary of a query, and the start is an exact match to the resolution boundary + shiftStepsStartWhenAtResolutionBoundary: + # A constant set of steps to shift end by, if and only if, the start is an exact match to the resolution boundary of a query, and the end is an exact match to the resolution boundary + shiftStepsEndWhenAtResolutionBoundary: + # A constant set of steps to shift start by, if and only if, the start is an exact match to the resolution boundary of a query, and the end is NOT an exact match to the resolution boundary + shiftStepsEndWhenStartAtResolutionBoundary: + # A constant set of steps to shift end by, if and only if, the end is an exact match to the resolution boundary of a query, and the start is NOT an exact match to the resolution boundary + shiftStepsStartWhenEndAtResolutionBoundary: + # Render partial datapoints when the start time is between a datapoint's resolution step size + renderPartialStart: + # Render partial datapoints when the end time is between a datapoint's resolution step size + renderPartialEnd: + # Render series that have only NaNs for entire output instead of returning an empty array of datapoints + renderSeriesAllNaNs: + # escape all characters using a backslash in a quoted string instead of only escaping quotes + compileEscapeAllNotOnlyQuotes: + +# TODO: +query: + +# TODO: +limits: + +# Additional configuration for metrics tags +# Read https://m3db.io/docs/how_to/query/#id-generation for more details +tagOptions: + # TODO: To do… + idScheme: + +# Sets the lookback duration for queries +# TODO: Which means what? +# Default = 5m +lookbackDuration: + +# The result options for a query +resultOptions: + # Keeps NaNs before returning query results. + # Default = false + keepNans: + +# TODO: +experimental: + +# TODO: +storeMetricsType: + +# TODO: +multiProcess: + +# TODO: +debug: \ No newline at end of file diff --git a/site/content/operator/api.md b/site/content/operator/api.md index fc6c4f7d2b..2e48d4c783 100644 --- a/site/content/operator/api.md +++ b/site/content/operator/api.md @@ -8,24 +8,25 @@ chapter: true This document enumerates the Custom Resource Definitions used by the M3DB Operator. It is auto-generated from code comments. ## Table of Contents -* [ClusterCondition](#clustercondition) -* [ClusterSpec](#clusterspec) -* [ExternalCoordinatorConfig](#externalcoordinatorconfig) -* [IsolationGroup](#isolationgroup) -* [M3DBCluster](#m3dbcluster) -* [M3DBClusterList](#m3dbclusterlist) -* [M3DBStatus](#m3dbstatus) -* [NodeAffinityTerm](#nodeaffinityterm) -* [AggregatedAttributes](#aggregatedattributes) -* [Aggregation](#aggregation) -* [AggregationOptions](#aggregationoptions) -* [DownsampleOptions](#downsampleoptions) -* [IndexOptions](#indexoptions) -* [Namespace](#namespace) -* [NamespaceOptions](#namespaceoptions) -* [RetentionOptions](#retentionoptions) -* [PodIdentity](#podidentity) -* [PodIdentityConfig](#podidentityconfig) +- [Table of Contents](#table-of-contents) +- [ClusterCondition](#clustercondition) +- [ClusterSpec](#clusterspec) +- [ExternalCoordinatorConfig](#externalcoordinatorconfig) +- [IsolationGroup](#isolationgroup) +- [M3DBCluster](#m3dbcluster) +- [M3DBClusterList](#m3dbclusterlist) +- [M3DBStatus](#m3dbstatus) +- [NodeAffinityTerm](#nodeaffinityterm) +- [AggregatedAttributes](#aggregatedattributes) +- [Aggregation](#aggregation) +- [AggregationOptions](#aggregationoptions) +- [DownsampleOptions](#downsampleoptions) +- [IndexOptions](#indexoptions) +- [Namespace](#namespace) +- [NamespaceOptions](#namespaceoptions) +- [RetentionOptions](#retentionoptions) +- [PodIdentity](#podidentity) +- [PodIdentityConfig](#podidentityconfig) ## ClusterCondition @@ -220,7 +221,7 @@ Namespace defines an M3DB namespace or points to a preset M3DB namespace. ## NamespaceOptions -NamespaceOptions defines parameters for an M3DB namespace. See https://m3db.io/docs/operational_guide/namespace_configuration/ for more details. +NamespaceOptions defines parameters for an M3DB namespace. Read [the namespace configuration guide](/docs/operational_guide/namespace_configuration) for more details. | Field | Description | Scheme | Required | | ----- | ----------- | ------ | -------- |