Skip to content
This repository has been archived by the owner on Aug 23, 2023. It is now read-only.

Commit

Permalink
Merge pull request #1920 from grafana/improve_performance_of_metatag_…
Browse files Browse the repository at this point in the history
…doesnt_exist_expr

Improve performance of metatag doesnt exist expr
  • Loading branch information
Dieterbe authored Oct 29, 2020
2 parents ad962a3 + 23abfa4 commit 9beff46
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 16 deletions.
40 changes: 40 additions & 0 deletions idx/memory/meta_tags_query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,46 @@ func BenchmarkFilter100kByMetaTagWithIndexSize1mAnd50kMetaRecordsWithMultipleExp
benchmarkFindByMetaTag(b, metricCnt, metaRecordCnt, expectedResCount, expectedTagsPerDef, expectedMetaTagsPerDef, queryGen, tagGen, metaRecordGen)
}

func BenchmarkFilterByAbsenceOfMetaTag(b *testing.B) {
metaRecordCnt := 50000
metricCnt := 1000000
expectedResCount := 50000
expectedTagsPerDef := 5
expectedMetaTagsPerDef := 1

metaRecordGen := func(metaRecordId int) struct {
expressions []string
metaTags []string
} {
res := struct {
expressions []string
metaTags []string
}{}
if metaRecordId < metaRecordCnt/2 {
res.metaTags = []string{"stage=prod"}
} else {
// half of all meta records assign the meta tag filterBy=thisTag,
// in the query we will filter for the absence of this tag
res.metaTags = []string{"filterBy=thisTag"}
}
res.expressions = []string{fmt.Sprintf("host=hostname%d", metaRecordId)}

return res
}

tagGen := func(id int) []string {
metaRecordId := id % metaRecordCnt
// each host value will be assigned to 1M/50k = 20 metrics
return []string{fmt.Sprintf("host=hostname%d", metaRecordId), fmt.Sprintf("other=property%d", metaRecordId), fmt.Sprintf("cluster=cluster%d", id%10)}
}

queryGen := func(id uint32) []string {
return []string{fmt.Sprintf("cluster=cluster%d", id%10), "filterBy="}
}

benchmarkFindByMetaTag(b, metricCnt, metaRecordCnt, expectedResCount, expectedTagsPerDef, expectedMetaTagsPerDef, queryGen, tagGen, metaRecordGen)
}

func getMetaRecordsForMetaTagQueryBenchmark(b *testing.B, metaRecordCount int, metaRecordGen func(metaRecordId int) struct {
expressions []string
metaTags []string
Expand Down
41 changes: 25 additions & 16 deletions idx/memory/tag_query_id_filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,13 @@ func newIdFilter(expressions tagquery.Expressions, ctx *TagQueryContext) *idFilt
records = append(records, record)
}

// if we don't use an inverted set of meta records, then we check if
// all meta records involved in a meta tag filter use the "=" operator.
// if this is the case then it is cheaper to build a set of acceptable tags
// if a query only involves meta tags of which all underlying expressions
// use the "=" operator, then it is cheaper to build a set of acceptable tags
// based on the meta record expressions and just check whether they are present
// in a metric that gets filtered, compared to doing a full tag index lookup
// to check whether a metric has one of the necessary meta tags associated
// with it.
onlyEqualOperators, singleExprPerRecord := viableOptimizations(invertSetOfMetaRecords, records)
onlyEqualOperators, singleExprPerRecord := viableOptimizations(records)

if onlyEqualOperators {
// there are two different ways how we optimize for the case where all expressions
Expand All @@ -108,9 +107,9 @@ func newIdFilter(expressions tagquery.Expressions, ctx *TagQueryContext) *idFilt
// otherwise we use the second way which is a bit more expensive but it also works
// if some of the involved meta records have multiple expressions.
if singleExprPerRecord {
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValues(records)
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValues(records, invertSetOfMetaRecords)
} else {
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValueSets(records)
res.filters[i].testByMetaTags = metaRecordFilterBySetOfValidValueSets(records, invertSetOfMetaRecords)
}
} else {
metaRecordFilters := make([]tagquery.MetricDefinitionFilter, 0, len(records))
Expand All @@ -132,16 +131,12 @@ func newIdFilter(expressions tagquery.Expressions, ctx *TagQueryContext) *idFilt
// viableOptimizations looks at a set of meta tag records and decides whether two possible
// optimizations can be applied when filtering by these records. it returns two bools to
// indicate which optimizations are or are not viable.
// if invertSetOfMetaRecords is true then none of these optimizations can be used.
//
// * the first bool refers to the optimization for sets of records which all have only one
// expression and this expression is using the equal operator.
// * the second bool refers to the optimization for sets of records which all only have
// expressions using the equal operator, but there may be more than one per record.
func viableOptimizations(invertSetOfMetaRecords bool, records []tagquery.MetaTagRecord) (bool, bool) {
if invertSetOfMetaRecords {
return false, false
}
func viableOptimizations(records []tagquery.MetaTagRecord) (bool, bool) {
singleExprPerRecord := true
for recordIdx := range records {
for exprIdx := range records[recordIdx].Expressions {
Expand All @@ -161,7 +156,9 @@ func viableOptimizations(invertSetOfMetaRecords bool, records []tagquery.MetaTag
// which only involves meta records of which each only has exactly one expression and that
// expression is using the "=" operator. this is quite a narrow scenario, but since it is
// a very common use case it makes sense to optimize for it.
func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord) tagquery.MetricDefinitionFilter {
// The invertFilter bool flips the filter logic so that instead of removing metrics which
// do not have a meta tag it filters metrics which do have a meta tag.
func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord, invertFilter bool) tagquery.MetricDefinitionFilter {
// we first build a set of valid tags and names.
// since we know that each of the involved meta records uses exactly one expression
// which is using the "=" operator we know that if a given metric's name matches a
Expand All @@ -183,14 +180,19 @@ func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord) tagque
}
}

resultOnHit := tagquery.Pass
if invertFilter {
resultOnHit = tagquery.Fail
}

return func(_ schema.MKey, name string, tags []string) tagquery.FilterDecision {
for i := range tags {
if _, ok := validValues[tags[i]]; ok {
return tagquery.Pass
return resultOnHit
}
}
if _, ok := validNames[name]; ok {
return tagquery.Pass
return resultOnHit
}
return tagquery.None
}
Expand All @@ -199,7 +201,9 @@ func metaRecordFilterBySetOfValidValues(records []tagquery.MetaTagRecord) tagque
// metaRecordFilterBySetOfValidValueSets creates a filter function to filter by a meta tag
// which only involves meta records of which all expressions are only using the "=" operator,
// it is ok if one meta record uses multiple such expressions.
func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord) tagquery.MetricDefinitionFilter {
// The invertFilter bool flips the filter logic so that instead of removing metrics which
// do not have a meta tag it filters metrics which do have a meta tag.
func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord, invertFilter bool) tagquery.MetricDefinitionFilter {
// we first build a set of tag and name value combinations of which each is sufficient
// to pass the generated filter when a metric contains all values of one of these
// combinations
Expand All @@ -221,6 +225,11 @@ func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord) tag
sort.Strings(validValueSets[i].tags)
}

resultOnHit := tagquery.Pass
if invertFilter {
resultOnHit = tagquery.Fail
}

return func(_ schema.MKey, name string, tags []string) tagquery.FilterDecision {
// iterate over the acceptable value combinations and check if one matches this metric
for _, validValueSet := range validValueSets {
Expand All @@ -231,7 +240,7 @@ func metaRecordFilterBySetOfValidValueSets(records []tagquery.MetaTagRecord) tag
}

if sliceContainsElements(validValueSet.tags, tags) {
return tagquery.Pass
return resultOnHit
}
}

Expand Down

0 comments on commit 9beff46

Please sign in to comment.