From 26f4ff7c8b02d1ac3702209bdc20e9ec0216d846 Mon Sep 17 00:00:00 2001 From: Prateek Rungta Date: Wed, 5 Sep 2018 17:16:57 -0400 Subject: [PATCH] Add m3ninx query proptests --- glide.lock | 2 +- glide.yaml | 2 +- .../segment/fst/encoding/docs/data_test.go | 4 +- .../index/segment/fst/writer_reader_test.go | 4 +- src/m3ninx/index/segment/mem/options.go | 2 +- src/m3ninx/index/segment/mem/segment.go | 2 +- .../index/segment/mem/segment_bench_test.go | 4 +- .../segment/mem/terms_dict_bench_test.go | 4 +- src/m3ninx/search/proptest/issue865_test.go | 129 +------------ src/m3ninx/search/proptest/prop_test.go | 147 ++++++++++++++ src/m3ninx/search/proptest/query_gen.go | 146 ++++++++++++++ src/m3ninx/search/proptest/segment_gen.go | 181 ++++++++++++++++++ src/m3ninx/search/proptest/util.go | 73 +++++++ src/m3ninx/{index => }/util/docs.go | 0 .../util/testdata/node_exporter.json | 0 src/m3ninx/{index => }/util/uuid.go | 0 src/m3ninx/{index => }/util/uuid_test.go | 0 17 files changed, 562 insertions(+), 138 deletions(-) create mode 100644 src/m3ninx/search/proptest/prop_test.go create mode 100644 src/m3ninx/search/proptest/query_gen.go create mode 100644 src/m3ninx/search/proptest/segment_gen.go create mode 100644 src/m3ninx/search/proptest/util.go rename src/m3ninx/{index => }/util/docs.go (100%) rename src/m3ninx/{index => }/util/testdata/node_exporter.json (100%) rename src/m3ninx/{index => }/util/uuid.go (100%) rename src/m3ninx/{index => }/util/uuid_test.go (100%) diff --git a/glide.lock b/glide.lock index 7af875dbb5..a19c9feea4 100644 --- a/glide.lock +++ b/glide.lock @@ -614,7 +614,7 @@ testImports: - name: github.com/go-playground/universal-translator version: 71201497bace774495daed26a3874fd339e0b538 - name: github.com/leanovate/gopter - version: f778776473e0ef7764e1434dd01a61cc1ec574b4 + version: f0356731348c8fffa27bab27c37ec8be5b0662c8 subpackages: - commands - gen diff --git a/glide.yaml b/glide.yaml index af593667fc..64ff748a1d 100644 --- a/glide.yaml +++ b/glide.yaml @@ -220,4 +220,4 @@ testImport: version: b433bbd6d743c1854040b39062a3916ed5f78fe8 - package: github.com/leanovate/gopter - version: f778776473e0ef7764e1434dd01a61cc1ec574b4 + version: f0356731348c8fffa27bab27c37ec8be5b0662c8 diff --git a/src/m3ninx/index/segment/fst/encoding/docs/data_test.go b/src/m3ninx/index/segment/fst/encoding/docs/data_test.go index 30e9361c37..4e65dd7c76 100644 --- a/src/m3ninx/index/segment/fst/encoding/docs/data_test.go +++ b/src/m3ninx/index/segment/fst/encoding/docs/data_test.go @@ -25,7 +25,7 @@ import ( "testing" "github.com/m3db/m3/src/m3ninx/doc" - "github.com/m3db/m3/src/m3ninx/index/util" + "github.com/m3db/m3/src/m3ninx/util" "github.com/stretchr/testify/require" ) @@ -76,7 +76,7 @@ func TestStoredFieldsData(t *testing.T) { }, { name: "node exporter metrics", - docs: util.MustReadDocs("../../../../util/testdata/node_exporter.json", 2000), + docs: util.MustReadDocs("../../../../../util/testdata/node_exporter.json", 2000), }, } diff --git a/src/m3ninx/index/segment/fst/writer_reader_test.go b/src/m3ninx/index/segment/fst/writer_reader_test.go index 8983f4f4c4..576af767e8 100644 --- a/src/m3ninx/index/segment/fst/writer_reader_test.go +++ b/src/m3ninx/index/segment/fst/writer_reader_test.go @@ -31,8 +31,8 @@ import ( "github.com/m3db/m3/src/m3ninx/index" sgmt "github.com/m3db/m3/src/m3ninx/index/segment" "github.com/m3db/m3/src/m3ninx/index/segment/mem" - "github.com/m3db/m3/src/m3ninx/index/util" "github.com/m3db/m3/src/m3ninx/postings" + "github.com/m3db/m3/src/m3ninx/util" "github.com/stretchr/testify/require" ) @@ -79,7 +79,7 @@ var ( }, }, } - lotsTestDocuments = util.MustReadDocs("../../util/testdata/node_exporter.json", 2000) + lotsTestDocuments = util.MustReadDocs("../../../util/testdata/node_exporter.json", 2000) testDocuments = []struct { name string diff --git a/src/m3ninx/index/segment/mem/options.go b/src/m3ninx/index/segment/mem/options.go index 4a811e6187..2cff8ae153 100644 --- a/src/m3ninx/index/segment/mem/options.go +++ b/src/m3ninx/index/segment/mem/options.go @@ -21,9 +21,9 @@ package mem import ( - "github.com/m3db/m3/src/m3ninx/index/util" "github.com/m3db/m3/src/m3ninx/postings" "github.com/m3db/m3/src/m3ninx/postings/roaring" + "github.com/m3db/m3/src/m3ninx/util" "github.com/m3db/m3/src/m3ninx/x/bytes" "github.com/m3db/m3x/instrument" diff --git a/src/m3ninx/index/segment/mem/segment.go b/src/m3ninx/index/segment/mem/segment.go index f069f8f5e2..a01687be8c 100644 --- a/src/m3ninx/index/segment/mem/segment.go +++ b/src/m3ninx/index/segment/mem/segment.go @@ -28,8 +28,8 @@ import ( "github.com/m3db/m3/src/m3ninx/doc" "github.com/m3db/m3/src/m3ninx/index" sgmt "github.com/m3db/m3/src/m3ninx/index/segment" - "github.com/m3db/m3/src/m3ninx/index/util" "github.com/m3db/m3/src/m3ninx/postings" + "github.com/m3db/m3/src/m3ninx/util" ) var ( diff --git a/src/m3ninx/index/segment/mem/segment_bench_test.go b/src/m3ninx/index/segment/mem/segment_bench_test.go index 14fb35506a..98811f18a7 100644 --- a/src/m3ninx/index/segment/mem/segment_bench_test.go +++ b/src/m3ninx/index/segment/mem/segment_bench_test.go @@ -25,7 +25,7 @@ import ( "testing" "github.com/m3db/m3/src/m3ninx/doc" - "github.com/m3db/m3/src/m3ninx/index/util" + "github.com/m3db/m3/src/m3ninx/util" ) var ( @@ -53,7 +53,7 @@ func BenchmarkSegment(b *testing.B) { }, } - docs, err := util.ReadDocs("../../util/testdata/node_exporter.json", 2000) + docs, err := util.ReadDocs("../../../util/testdata/node_exporter.json", 2000) if err != nil { b.Fatalf("unable to read documents for benchmarks: %v", err) } diff --git a/src/m3ninx/index/segment/mem/terms_dict_bench_test.go b/src/m3ninx/index/segment/mem/terms_dict_bench_test.go index 6e023be352..010e49ed38 100644 --- a/src/m3ninx/index/segment/mem/terms_dict_bench_test.go +++ b/src/m3ninx/index/segment/mem/terms_dict_bench_test.go @@ -25,8 +25,8 @@ import ( "testing" "github.com/m3db/m3/src/m3ninx/doc" - "github.com/m3db/m3/src/m3ninx/index/util" "github.com/m3db/m3/src/m3ninx/postings" + "github.com/m3db/m3/src/m3ninx/util" ) var ( @@ -54,7 +54,7 @@ func BenchmarkTermsDict(b *testing.B) { }, } - docs, err := util.ReadDocs("../../util/testdata/node_exporter.json", 2000) + docs, err := util.ReadDocs("../../../util/testdata/node_exporter.json", 2000) if err != nil { b.Fatalf("unable to read documents for benchmarks: %v", err) } diff --git a/src/m3ninx/search/proptest/issue865_test.go b/src/m3ninx/search/proptest/issue865_test.go index 0c18c8f793..e63e795dd3 100644 --- a/src/m3ninx/search/proptest/issue865_test.go +++ b/src/m3ninx/search/proptest/issue865_test.go @@ -23,7 +23,6 @@ package proptest import ( - "fmt" "math/rand" "os" "testing" @@ -31,28 +30,18 @@ import ( "github.com/m3db/m3/src/m3ninx/doc" "github.com/m3db/m3/src/m3ninx/index" - "github.com/m3db/m3/src/m3ninx/index/segment" - "github.com/m3db/m3/src/m3ninx/index/segment/fst" - "github.com/m3db/m3/src/m3ninx/index/segment/mem" - "github.com/m3db/m3/src/m3ninx/postings" "github.com/m3db/m3/src/m3ninx/search" "github.com/m3db/m3/src/m3ninx/search/executor" "github.com/m3db/m3/src/m3ninx/search/query" "github.com/leanovate/gopter" - "github.com/leanovate/gopter/gen" "github.com/leanovate/gopter/prop" "github.com/stretchr/testify/require" ) // NB(prateek): this test simulates the issues described in issue: https://github.com/m3db/m3/issues/865 -// tl;dr - the searcher code assumes the input readers had disjoint doc ID ranges; it caused issues when that -// was not true. var ( - memOptions = mem.NewOptions() - fstOptions = fst.NewOptions() - doc1 = doc.Document{ ID: []byte("__name__=node_cpu_seconds_total,cpu=1,instance=m3db-node01:9100,job=node-exporter,mode=system,"), Fields: []doc.Field{ @@ -98,6 +87,8 @@ func TestAnyDistributionOfDocsDoesNotAffectQuery(t *testing.T) { parameters.Rng = rand.New(rand.NewSource(seed)) properties := gopter.NewProperties(parameters) + docMatcher, err := newDocumentIteratorMatcher(doc2) + require.NoError(t, err) properties.Property("Any distribution of simple documents does not affect query results", prop.ForAll( func(i propTestInput) (bool, error) { segments := i.generate(t, simpleTestDocs) @@ -121,21 +112,7 @@ func TestAnyDistributionOfDocsDoesNotAffectQuery(t *testing.T) { return false, err } - if !d.Next() { - return false, fmt.Errorf("unable to find any documents") - } - - curr := d.Current() - if !curr.Equal(doc2) { - return false, fmt.Errorf("returned document [%+v] did not match exepcted document [%+v]", - curr, doc2) - } - - if d.Next() { - return false, fmt.Errorf("found too many documents") - } - - if err := d.Err(); err != nil { + if err := docMatcher.Matches(d); err != nil { return false, err } @@ -149,103 +126,3 @@ func TestAnyDistributionOfDocsDoesNotAffectQuery(t *testing.T) { t.Errorf("failed with initial seed: %d", seed) } } - -func (i propTestInput) generate(t *testing.T, docs []doc.Document) []segment.Segment { - var result []segment.Segment - for j := 0; j < len(i.segments); j++ { - initialOffset := postings.ID(i.segments[j].initialDocIDOffset) - s, err := mem.NewSegment(initialOffset, memOptions) - require.NoError(t, err) - for k := 0; k < len(i.docIds[j]); k++ { - idx := i.docIds[j][k] - _, err = s.Insert(docs[idx]) - require.NoError(t, err) - } - - if i.segments[j].simpleSegment { - result = append(result, s) - continue - } - - result = append(result, fst.ToTestSegment(t, s, fstOptions)) - } - return result -} - -type propTestInput struct { - segments []generatedSegment - docIds [][]int -} - -func genPropTestInput(numDocs int) gopter.Gen { - return func(genParams *gopter.GenParameters) *gopter.GenResult { - numSegmentsRes, ok := gen.IntRange(1, numDocs)(genParams).Retrieve() - if !ok { - panic("unable to generate segments") - } - numSegments := numSegmentsRes.(int) - - docIds := make([]int, 0, numDocs) - for i := 0; i < numDocs; i++ { - docIds = append(docIds, i) - } - - randomIds := randomDocIds(docIds) - randomIds.shuffle(genParams.Rng) - - genSegments := make([]generatedSegment, 0, numSegments) - partitionedDocs := make([][]int, 0, numSegments) - for i := 0; i < numSegments; i++ { - partitionedDocs = append(partitionedDocs, []int{}) - segRes, ok := genSegment()(genParams).Retrieve() - if !ok { - panic("unable to generate segments") - } - genSegments = append(genSegments, segRes.(generatedSegment)) - } - - for i := 0; i < numDocs; i++ { - idx := i % numSegments - partitionedDocs[idx] = append(partitionedDocs[idx], randomIds[i]) - } - - result := propTestInput{ - segments: genSegments, - docIds: partitionedDocs, - } - if len(genSegments) != len(partitionedDocs) { - panic(fmt.Errorf("unequal lengths of segments and docs: %+v", result)) - } - - return gopter.NewGenResult(result, gopter.NoShrinker) - } -} - -func genSegment() gopter.Gen { - return gopter.CombineGens( - gen.Bool(), // simple segment - gen.IntRange(1, 5), // initial doc id offset - ).Map(func(val interface{}) generatedSegment { - inputs := val.([]interface{}) - return generatedSegment{ - simpleSegment: inputs[0].(bool), - initialDocIDOffset: inputs[1].(int), - } - }) -} - -type generatedSegment struct { - simpleSegment bool - initialDocIDOffset int -} - -type randomDocIds []int - -func (d randomDocIds) shuffle(rng *rand.Rand) { - // Start from the last element and swap one by one. - // NB: We don't need to run for the first element that's why i > 0 - for i := len(d) - 1; i > 0; i-- { - j := rng.Intn(i) - d[i], d[j] = d[j], d[i] - } -} diff --git a/src/m3ninx/search/proptest/prop_test.go b/src/m3ninx/search/proptest/prop_test.go new file mode 100644 index 0000000000..ef6dbf95e6 --- /dev/null +++ b/src/m3ninx/search/proptest/prop_test.go @@ -0,0 +1,147 @@ +// +build big + +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package proptest + +import ( + "math/rand" + "os" + "testing" + "time" + + "github.com/m3db/m3/src/m3ninx/index" + "github.com/m3db/m3/src/m3ninx/index/segment/fst" + "github.com/m3db/m3/src/m3ninx/search" + "github.com/m3db/m3/src/m3ninx/search/executor" + "github.com/m3db/m3/src/m3ninx/util" + + "github.com/leanovate/gopter" + "github.com/leanovate/gopter/prop" + "github.com/stretchr/testify/require" +) + +var ( + lotsTestDocuments = util.MustReadDocs("../../util/testdata/node_exporter.json", 2000) +) + +func TestSegmentDistributionDoesNotAffectQuery(t *testing.T) { + parameters := gopter.DefaultTestParameters() + seed := time.Now().UnixNano() + parameters.MinSuccessfulTests = 100 + parameters.MaxSize = 20 + parameters.Rng = rand.New(rand.NewSource(seed)) + properties := gopter.NewProperties(parameters) + + simpleSeg := newTestMemSegment(t, lotsTestDocuments) + properties.Property("Any distribution of test documents in segments does not affect query results", prop.ForAll( + func(i propTestInput, q search.Query) (bool, error) { + r, err := simpleSeg.Reader() + require.NoError(t, err) + eOrg := executor.NewExecutor([]index.Reader{r}) + dOrg, err := eOrg.Execute(q) + if err != nil { + return false, err + } + matchedDocs, err := collectDocs(dOrg) + require.NoError(t, err) + docMatcher, err := newDocumentIteratorMatcher(matchedDocs...) + require.NoError(t, err) + + segments := i.generate(t, lotsTestDocuments) + readers := make([]index.Reader, 0, len(segments)) + for _, s := range segments { + r, err := s.Reader() + if err != nil { + return false, err + } + readers = append(readers, r) + } + + e := executor.NewExecutor(readers) + d, err := e.Execute(q) + if err != nil { + return false, err + } + + if err := docMatcher.Matches(d); err != nil { + return false, err + } + + return true, nil + }, + genPropTestInput(len(lotsTestDocuments)), + genQuery(lotsTestDocuments), + )) + + reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) + if !properties.Run(reporter) { + t.Errorf("failed with initial seed: %d", seed) + } +} + +func TestFSTSimpleSegmentsQueryTheSame(t *testing.T) { + parameters := gopter.DefaultTestParameters() + seed := time.Now().UnixNano() + parameters.MinSuccessfulTests = 100 + parameters.MaxSize = 20 + parameters.Rng = rand.New(rand.NewSource(seed)) + properties := gopter.NewProperties(parameters) + + simpleSeg := newTestMemSegment(t, lotsTestDocuments) + fstSeg := fst.ToTestSegment(t, simpleSeg, fstOptions) + + properties.Property("Simple & FST Segments Query the same results", prop.ForAll( + func(q search.Query) (bool, error) { + r, err := simpleSeg.Reader() + require.NoError(t, err) + eOrg := executor.NewExecutor([]index.Reader{r}) + dOrg, err := eOrg.Execute(q) + if err != nil { + return false, err + } + matchedDocs, err := collectDocs(dOrg) + require.NoError(t, err) + docMatcher, err := newDocumentIteratorMatcher(matchedDocs...) + require.NoError(t, err) + + rFst, err := fstSeg.Reader() + require.NoError(t, err) + e := executor.NewExecutor([]index.Reader{rFst}) + d, err := e.Execute(q) + if err != nil { + return false, err + } + + if err := docMatcher.Matches(d); err != nil { + return false, err + } + + return true, nil + }, + genQuery(lotsTestDocuments), + )) + + reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) + if !properties.Run(reporter) { + t.Errorf("failed with initial seed: %d", seed) + } +} diff --git a/src/m3ninx/search/proptest/query_gen.go b/src/m3ninx/search/proptest/query_gen.go new file mode 100644 index 0000000000..121ec51b9f --- /dev/null +++ b/src/m3ninx/search/proptest/query_gen.go @@ -0,0 +1,146 @@ +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package proptest + +import ( + "bytes" + "reflect" + + "github.com/m3db/m3/src/m3ninx/doc" + "github.com/m3db/m3/src/m3ninx/search" + "github.com/m3db/m3/src/m3ninx/search/query" + + "github.com/leanovate/gopter" + "github.com/leanovate/gopter/gen" +) + +func genTermQuery(docs []doc.Document) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve() + if !ok { + panic("unable to generate term query") // should never happen + } + docID := docIDRes.(int) + + doc := docs[docID] + fieldRes, ok := gen.IntRange(0, len(doc.Fields)-1)(genParams).Retrieve() + if !ok { + panic("unable to generate term query fields") // should never happen + } + + fieldID := fieldRes.(int) + field := doc.Fields[fieldID] + + q := query.NewTermQuery(field.Name, field.Value) + return gopter.NewGenResult(q, gopter.NoShrinker) + } +} + +func genRegexpQuery(docs []doc.Document) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve() + if !ok { + panic("unable to generate regexp query") // should never happen + } + docID := docIDRes.(int) + + doc := docs[docID] + fieldRes, ok := gen.IntRange(0, len(doc.Fields)-1)(genParams).Retrieve() + if !ok { + panic("unable to generate regexp query fields") // should never happen + } + + fieldID := fieldRes.(int) + field := doc.Fields[fieldID] + + var re []byte + + reType := genParams.NextUint64() % 3 + switch reType { + case 0: // prefix + idx := genParams.NextUint64() % uint64(len(field.Value)) + re = append([]byte(nil), field.Value[:idx]...) + re = append(re, []byte(".*")...) + case 1: // suffix + idx := genParams.NextUint64() % uint64(len(field.Value)) + re = append([]byte(".*"), field.Value[idx:]...) + case 2: // middle + start := genParams.NextUint64() % uint64(len(field.Value)) + remain := uint64(len(field.Value)) - start + end := start + genParams.NextUint64()%remain + re = append(append([]byte(".*"), field.Value[start:end]...), []byte(".*")...) + } + + // escape any '(' or ')' we see to avoid regular expression parsing failure + escapeFront := bytes.Replace(re, []byte("("), []byte("\\("), -1) + escapeBack := bytes.Replace(escapeFront, []byte("("), []byte("\\)"), -1) + + q, err := query.NewRegexpQuery(field.Name, escapeBack) + if err != nil { + panic(err) + } + + return gopter.NewGenResult(q, gopter.NoShrinker) + } +} + +func genNegationQuery(docs []doc.Document) gopter.Gen { + return gen.OneGenOf( + genTermQuery(docs), + genRegexpQuery(docs), + ). + Map(func(q search.Query) search.Query { + return query.NewNegationQuery(q) + }) +} + +func genConjuctionQuery(docs []doc.Document) gopter.Gen { + return gen.SliceOf( + gen.OneGenOf( + genTermQuery(docs), + genRegexpQuery(docs), + genNegationQuery(docs)), + reflect.TypeOf((*search.Query)(nil)).Elem()). + Map(func(qs []search.Query) search.Query { + return query.NewConjunctionQuery(qs) + }) +} + +func genDisjunctionQuery(docs []doc.Document) gopter.Gen { + return gen.SliceOf( + gen.OneGenOf( + genTermQuery(docs), + genRegexpQuery(docs), + genNegationQuery(docs)), + reflect.TypeOf((*search.Query)(nil)).Elem()). + Map(func(qs []search.Query) search.Query { + return query.NewDisjunctionQuery(qs) + }) +} + +func genQuery(docs []doc.Document) gopter.Gen { + return gen.OneGenOf( + genTermQuery(docs), + genRegexpQuery(docs), + genNegationQuery(docs), + genConjuctionQuery(docs), + genDisjunctionQuery(docs)) +} diff --git a/src/m3ninx/search/proptest/segment_gen.go b/src/m3ninx/search/proptest/segment_gen.go new file mode 100644 index 0000000000..855adebbdf --- /dev/null +++ b/src/m3ninx/search/proptest/segment_gen.go @@ -0,0 +1,181 @@ +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package proptest + +import ( + "fmt" + "math/rand" + "testing" + + "github.com/leanovate/gopter" + "github.com/leanovate/gopter/gen" + "github.com/m3db/m3/src/m3ninx/doc" + "github.com/m3db/m3/src/m3ninx/index/segment" + "github.com/m3db/m3/src/m3ninx/index/segment/fst" + "github.com/m3db/m3/src/m3ninx/index/segment/mem" + "github.com/m3db/m3/src/m3ninx/postings" + "github.com/stretchr/testify/require" +) + +var ( + memOptions = mem.NewOptions() + fstOptions = fst.NewOptions() +) + +func collectDocs(iter doc.Iterator) ([]doc.Document, error) { + var docs []doc.Document + for iter.Next() { + docs = append(docs, iter.Current()) + } + + if err := iter.Err(); err != nil { + return nil, err + } + + return docs, nil +} + +func newTestMemSegment(t *testing.T, docs []doc.Document) segment.MutableSegment { + opts := mem.NewOptions() + s, err := mem.NewSegment(postings.ID(0), opts) + require.NoError(t, err) + for _, d := range docs { + _, err := s.Insert(d) + require.NoError(t, err) + } + return s +} + +func (i propTestInput) generate(t *testing.T, docs []doc.Document) []segment.Segment { + var result []segment.Segment + for j := 0; j < len(i.segments); j++ { + initialOffset := postings.ID(i.segments[j].initialDocIDOffset) + s, err := mem.NewSegment(initialOffset, memOptions) + require.NoError(t, err) + for k := 0; k < len(i.docIds[j]); k++ { + idx := i.docIds[j][k] + _, err = s.Insert(docs[idx]) + require.NoError(t, err) + } + + if i.segments[j].simpleSegment { + result = append(result, s) + continue + } + + result = append(result, fst.ToTestSegment(t, s, fstOptions)) + } + return result +} + +type propTestInput struct { + numDocs int + segments []generatedSegment + docIds [][]int +} + +func genPropTestInput(numDocs int) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + maxNumSegments := numDocs + if maxNumSegments > 10 { + maxNumSegments = 10 + } + + numSegmentsRes, ok := gen.IntRange(1, maxNumSegments)(genParams).Retrieve() + if !ok { + panic("unable to generate segments") + } + numSegments := numSegmentsRes.(int) + + docIds := make([]int, 0, numDocs) + for i := 0; i < numDocs; i++ { + docIds = append(docIds, i) + } + + randomIds := randomDocIds(docIds) + randomIds.shuffle(genParams.Rng) + + genSegments := make([]generatedSegment, 0, numSegments) + partitionedDocs := make([][]int, 0, numSegments) + for i := 0; i < numSegments; i++ { + partitionedDocs = append(partitionedDocs, []int{}) + segRes, ok := genSegment()(genParams).Retrieve() + if !ok { + panic("unable to generate segments") + } + genSegments = append(genSegments, segRes.(generatedSegment)) + } + + for i := 0; i < numDocs; i++ { + idx := i % numSegments + partitionedDocs[idx] = append(partitionedDocs[idx], randomIds[i]) + } + + result := propTestInput{ + numDocs: numDocs, + segments: genSegments, + docIds: partitionedDocs, + } + if len(genSegments) != len(partitionedDocs) { + panic(fmt.Errorf("unequal lengths of segments and docs: %+v", result)) + } + + return gopter.NewGenResult(result, gopter.NoShrinker) + } +} + +func genSegment() gopter.Gen { + return gopter.CombineGens( + gen.Bool(), // simple segment + gen.IntRange(1, 5), // initial doc id offset + ).Map(func(val interface{}) generatedSegment { + var inputs []interface{} + if x, ok := val.(*gopter.GenResult); ok { + res, rOk := x.Retrieve() + if !rOk { + panic("should never happen") + } + inputs = res.([]interface{}) + } else { + inputs = val.([]interface{}) + } + return generatedSegment{ + simpleSegment: inputs[0].(bool), + initialDocIDOffset: inputs[1].(int), + } + }) +} + +type generatedSegment struct { + simpleSegment bool + initialDocIDOffset int +} + +type randomDocIds []int + +func (d randomDocIds) shuffle(rng *rand.Rand) { + // Start from the last element and swap one by one. + // NB: We don't need to run for the first element that's why i > 0 + for i := len(d) - 1; i > 0; i-- { + j := rng.Intn(i) + d[i], d[j] = d[j], d[i] + } +} diff --git a/src/m3ninx/search/proptest/util.go b/src/m3ninx/search/proptest/util.go new file mode 100644 index 0000000000..b4c35706ab --- /dev/null +++ b/src/m3ninx/search/proptest/util.go @@ -0,0 +1,73 @@ +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package proptest + +import ( + "fmt" + + "github.com/m3db/m3/src/m3ninx/doc" +) + +type documentIteratorMatcher struct { + expectedDocs map[string]doc.Document +} + +func newDocumentIteratorMatcher(docs ...doc.Document) (*documentIteratorMatcher, error) { + docMap := make(map[string]doc.Document, len(docs)) + for _, d := range docs { + id := string(d.ID) + if _, ok := docMap[id]; ok { + return nil, fmt.Errorf("received document with duplicate id: %v", d) + } + docMap[id] = d + } + return &documentIteratorMatcher{docMap}, nil +} + +// Matches returns whether the provided iterator matches the collection of provided docs. +func (m *documentIteratorMatcher) Matches(i doc.Iterator) error { + pendingDocIDs := make(map[string]doc.Document, len(m.expectedDocs)) + for id := range m.expectedDocs { + pendingDocIDs[id] = m.expectedDocs[id] + } + for i.Next() { + d := i.Current() + id := string(d.ID) + expectedDoc, ok := m.expectedDocs[id] + if !ok { + return fmt.Errorf("received un-expected document: %+v", d) + } + if !expectedDoc.Equal(d) { + return fmt.Errorf("received document: %+v did not match expected doc %+v", d, expectedDoc) + } + delete(pendingDocIDs, id) + } + if err := i.Err(); err != nil { + return fmt.Errorf("unexpected iterator error: %v", err) + } + if err := i.Close(); err != nil { + return fmt.Errorf("unexpected iterator close error: %v", err) + } + if len(pendingDocIDs) > 0 { + return fmt.Errorf("did not receive docs: %+v", pendingDocIDs) + } + return nil +} diff --git a/src/m3ninx/index/util/docs.go b/src/m3ninx/util/docs.go similarity index 100% rename from src/m3ninx/index/util/docs.go rename to src/m3ninx/util/docs.go diff --git a/src/m3ninx/index/util/testdata/node_exporter.json b/src/m3ninx/util/testdata/node_exporter.json similarity index 100% rename from src/m3ninx/index/util/testdata/node_exporter.json rename to src/m3ninx/util/testdata/node_exporter.json diff --git a/src/m3ninx/index/util/uuid.go b/src/m3ninx/util/uuid.go similarity index 100% rename from src/m3ninx/index/util/uuid.go rename to src/m3ninx/util/uuid.go diff --git a/src/m3ninx/index/util/uuid_test.go b/src/m3ninx/util/uuid_test.go similarity index 100% rename from src/m3ninx/index/util/uuid_test.go rename to src/m3ninx/util/uuid_test.go