Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Significantly improve performance of FetchResultToPromResult and helper functions #1003

Merged
merged 7 commits into from
Oct 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 51 additions & 19 deletions src/query/storage/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,51 +135,83 @@ func TimestampToTime(timestampMS int64) time.Time {

// TimeToTimestamp converts a time.Time to prometheus timestamp
func TimeToTimestamp(timestamp time.Time) int64 {
// Significantly faster than time.Truncate()
return timestamp.UnixNano() / int64(time.Millisecond)
}

// FetchResultToPromResult converts fetch results from M3 to Prometheus result
// FetchResultToPromResult converts fetch results from M3 to Prometheus result.
// TODO(rartoul): We should pool all of these intermediary datastructures, or
// at least the []*prompb.Sample (as thats the most heavily allocated object)
// since we have full control over the lifecycle.
func FetchResultToPromResult(result *FetchResult) *prompb.QueryResult {
timeseries := make([]*prompb.TimeSeries, 0)

// Perform bulk allocation upfront then convert to pointers afterwards
// to reduce total number of allocations. See BenchmarkFetchResultToPromResult
// if modifying.
timeseries := make([]prompb.TimeSeries, 0, len(result.SeriesList))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice pattern, we can probably make use fo this approach elsewhere too

for _, series := range result.SeriesList {
promTs := SeriesToPromTS(series)
timeseries = append(timeseries, promTs)
}

timeSeriesPointers := make([]*prompb.TimeSeries, 0, len(result.SeriesList))
for i := range timeseries {
timeSeriesPointers = append(timeSeriesPointers, &timeseries[i])
}

return &prompb.QueryResult{
Timeseries: timeseries,
Timeseries: timeSeriesPointers,
}
}

// SeriesToPromTS converts a series to prometheus timeseries
func SeriesToPromTS(series *ts.Series) *prompb.TimeSeries {
// SeriesToPromTS converts a series to prometheus timeseries.
func SeriesToPromTS(series *ts.Series) prompb.TimeSeries {
labels := TagsToPromLabels(series.Tags)
samples := SeriesToPromSamples(series)
return &prompb.TimeSeries{Labels: labels, Samples: samples}
return prompb.TimeSeries{Labels: labels, Samples: samples}
}

// TagsToPromLabels converts tags to prometheus labels
// TagsToPromLabels converts tags to prometheus labels.
func TagsToPromLabels(tags models.Tags) []*prompb.Label {
labels := make([]*prompb.Label, 0, len(tags))
// Perform bulk allocation upfront then convert to pointers afterwards
// to reduce total number of allocations. See BenchmarkFetchResultToPromResult
// if modifying.
labels := make([]prompb.Label, 0, len(tags))
for _, t := range tags {
labels = append(labels, &prompb.Label{Name: t.Name, Value: t.Value})
labels = append(labels, prompb.Label{Name: t.Name, Value: t.Value})
}

labelsPointers := make([]*prompb.Label, 0, len(tags))
for i := range labels {
labelsPointers = append(labelsPointers, &labels[i])
}

return labels
return labelsPointers
}

// SeriesToPromSamples series datapoints to prometheus samples
// SeriesToPromSamples series datapoints to prometheus samples.SeriesToPromSamples.
func SeriesToPromSamples(series *ts.Series) []*prompb.Sample {
samples := make([]*prompb.Sample, series.Len())
for i := 0; i < series.Len(); i++ {
samples[i] = &prompb.Sample{
Timestamp: series.Values().DatapointAt(i).Timestamp.UnixNano() / int64(time.Millisecond),
Value: series.Values().ValueAt(i),
}
var (
seriesLen = series.Len()
values = series.Values()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we only use these in one place so probably prefer to drop the var for it and just use the accessor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nah because then it gets called on every iteration of the loop. Moving these out into vars actually made a significant impact (20%ish percent improvement or so)

datapoints = values.Datapoints()
// Perform bulk allocation upfront then convert to pointers afterwards
// to reduce total number of allocations. See BenchmarkFetchResultToPromResult
// if modifying.
samples = make([]prompb.Sample, 0, seriesLen)
)
for _, dp := range datapoints {
samples = append(samples, prompb.Sample{
Timestamp: TimeToTimestamp(dp.Timestamp),
Value: dp.Value,
})
}

samplesPointers := make([]*prompb.Sample, 0, len(samples))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit; alternatively can use seriesLen here too

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just seemed safer to use the length of the thing I'm actually iterating through

for i := range samples {
samplesPointers = append(samplesPointers, &samples[i])
}

return samples
return samplesPointers
}

func iteratorToTsSeries(
Expand Down
44 changes: 44 additions & 0 deletions src/query/storage/converter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/m3db/m3/src/query/generated/proto/prompb"
"github.com/m3db/m3/src/query/models"
"github.com/m3db/m3/src/query/test/seriesiter"
"github.com/m3db/m3/src/query/ts"
"github.com/m3db/m3x/ident"
"github.com/m3db/m3x/pool"
xsync "github.com/m3db/m3x/sync"
Expand Down Expand Up @@ -229,3 +230,46 @@ func TestPromReadQueryToM3(t *testing.T) {
})
}
}

var (
benchResult *prompb.QueryResult
)

// BenchmarkFetchResultToPromResult-8 100 10563444 ns/op 25368543 B/op 4443 allocs/op
func BenchmarkFetchResultToPromResult(b *testing.B) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be useful to paste the benchmark results in a comment here just for easier comparisons in the future?

var (
numSeries = 1000
numDatapointsPerSeries = 1000
numTagsPerSeries = 10
fr = &FetchResult{
SeriesList: make(ts.SeriesList, 0, numSeries),
}
)

for i := 0; i < numSeries; i++ {
values := make(ts.Datapoints, 0, numDatapointsPerSeries)
for i := 0; i < numDatapointsPerSeries; i++ {
values = append(values, ts.Datapoint{
Timestamp: time.Time{},
Value: float64(i),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe use a random value and time.Now() instead?

})
}

tags := make(models.Tags, 0, numTagsPerSeries)
for i := 0; i < numTagsPerSeries; i++ {
tags = append(tags, models.Tag{
Name: fmt.Sprintf("name-%d", i),
Value: fmt.Sprintf("value-%d", i),
})
}

series := ts.NewSeries(
fmt.Sprintf("series-%d", i), values, tags)

fr.SeriesList = append(fr.SeriesList, series)
}

for i := 0; i < b.N; i++ {
benchResult = FetchResultToPromResult(fr)
}
}
13 changes: 13 additions & 0 deletions src/query/ts/values.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ type Values interface {
// DatapointAt returns the datapoint at the nth element
DatapointAt(n int) Datapoint

// Datapoints returns all the datapoints
Datapoints() []Datapoint

// AlignToBounds returns values aligned to the start time and duration
AlignToBounds(bounds models.Bounds) []Datapoints
}
Expand All @@ -68,6 +71,9 @@ func (d Datapoints) ValueAt(n int) float64 { return d[n].Value }
// DatapointAt returns the value at the nth element.
func (d Datapoints) DatapointAt(n int) Datapoint { return d[n] }

// Datapoints returns all the datapoints.
func (d Datapoints) Datapoints() []Datapoint { return d }

// Values returns the values representation.
func (d Datapoints) Values() []float64 {
values := make([]float64, len(d))
Expand Down Expand Up @@ -137,6 +143,13 @@ func (b *fixedResolutionValues) DatapointAt(point int) Datapoint {
Value: b.ValueAt(point),
}
}
func (b *fixedResolutionValues) Datapoints() []Datapoint {
datapoints := make([]Datapoint, 0, len(b.values))
for i := range b.values {
datapoints = append(datapoints, b.DatapointAt(i))
}
return datapoints
}

// AlignToBounds returns values aligned to given bounds.
// TODO: Consider bounds as well
Expand Down