Skip to content

Commit

Permalink
Optimize LabelValues in metric proto
Browse files Browse the repository at this point in the history
LabelValues is now a separate message.

Benchmarking results are below (Baseline is current `master`, Proposed is after this commit).
This change reduces CPU usage by about 18% for one-data-point timeseries encoding and reduces
memory consumption by about 7%.

```
===== Encoded sizes
Encoding                       Uncompressed  Improved        Compressed  Improved
Baseline/MetricOne              19150 bytes  [1.000], gziped 1600 bytes  [1.000]
Proposed/MetricOne              18550 bytes  [1.032], gziped 1601 bytes  [0.999]

Encoding                       Uncompressed  Improved        Compressed  Improved
Baseline/MetricSeries           43115 bytes  [1.000], gziped 6316 bytes  [1.000]
Proposed/MetricSeries           42515 bytes  [1.014], gziped 6285 bytes  [1.005]

goos: darwin
goarch: amd64
pkg: github.com/tigrannajaryan/exp-otelproto/encodings
BenchmarkEncode/Baseline/MetricOne-8           	      42	 125724579 ns/op
BenchmarkEncode/Proposed/MetricOne-8           	      62	 102677556 ns/op

BenchmarkEncode/Baseline/MetricSeries-8        	      16	 323098665 ns/op
BenchmarkEncode/Proposed/MetricSeries-8        	      19	 311684082 ns/op

BenchmarkDecode/Baseline/MetricOne-8           	      21	 274958910 ns/op	156696037 B/op	 4274000 allocs/op
BenchmarkDecode/Proposed/MetricOne-8           	      24	 268134546 ns/op	144696034 B/op	 3974000 allocs/op

BenchmarkDecode/Baseline/MetricSeries-8        	       8	 648421823 ns/op	339096032 B/op	 8374000 allocs/op
BenchmarkDecode/Proposed/MetricSeries-8        	       9	 637746497 ns/op	327096035 B/op	 8074000 allocs/op
```
  • Loading branch information
Tigran Najaryan committed Oct 31, 2019
1 parent 6d70feb commit beea350
Showing 1 changed file with 34 additions and 12 deletions.
46 changes: 34 additions & 12 deletions opentelemetry/proto/metrics/v1/metrics.proto
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,43 @@ message MetricDescriptor {
repeated string label_keys = 5;
}

// LabelValues is a list of label string values.
message LabelValues {
// Values of labels. Each element in this array is the value of the key defined at the
// corresponding index of MetricDescriptor.label_keys array. The number of elements
// in these 2 arrays must be the same. Note that values array must include empty
// strings and must also contain an empty string value for labels for which the
// value was originally unspecified.
repeated string values = 1;

// Some metric sources allow setting the value of a label to be equal not just to
// empty string but to an unspecified value, or in other words the value of a particular
// label defined in label_keys array may be missing in a particular timeseries.
// In order to support this concept and because ProtoBuf cannot distinguish empty string
// from unspecified string we store indexes of unspecified labels in "values" array in
// this field. This field contains only indexes of unspecified labels.
// This field should not contain duplicate numbers, numbers less than zero or
// numbers greater than or equal to the length of values array.
repeated int32 unspecified_value_indexes = 2;

// For example:
// label_keys = {"a", "b", "c", "d", "e"}
// values = {"value1", "another value", "", "", "yet another string"}
// unspecified_value_indexes = {2}
// Because unspecified_value_indexes field contains value 2 it means that the empty
// string at index 3 of values array must be interpreted as unspecified value and
// not as an empty string, which means label "c" has no value. As opposed to that
// empty value at index 3 is treated as normal empty string so label "d" has
// value of empty string.
}

// Int64TimeSeries is a list of data points that describes the time-varying values
// of a int64 metric.
message Int64TimeSeries {
// The set of label values that uniquely identify this timeseries. Applies to
// all points. The order of label values must match that of label keys in the
// metric descriptor.
repeated LabelValue label_values = 1;
LabelValues label_values = 1;

// The data points of this timeseries.
repeated Int64Value points = 2;
Expand All @@ -122,7 +152,7 @@ message DoubleTimeSeries {
// The set of label values that uniquely identify this timeseries. Applies to
// all points. The order of label values must match that of label keys in the
// metric descriptor.
repeated LabelValue label_values = 1;
LabelValues label_values = 1;

// The data points of this timeseries.
repeated DoubleValue points = 2;
Expand All @@ -134,7 +164,7 @@ message HistogramTimeSeries {
// The set of label values that uniquely identify this timeseries. Applies to
// all points. The order of label values must match that of label keys in the
// metric descriptor.
repeated LabelValue label_values = 1;
LabelValues label_values = 1;

// The data points of this timeseries.
repeated HistogramValue points = 2;
Expand All @@ -146,20 +176,12 @@ message SummaryTimeSeries {
// The set of label values that uniquely identify this timeseries. Applies to
// all points. The order of label values must match that of label keys in the
// metric descriptor.
repeated LabelValue label_values = 1;
LabelValues label_values = 1;

// The data points of this timeseries.
repeated SummaryValue points = 2;
}

message LabelValue {
// The value for the label.
string value = 1;
// If false the value field is ignored and considered not set.
// This is used to differentiate a missing label from an empty string.
bool has_value = 2;
}

// Int64Value is a timestamped measurement of int64 value.
message Int64Value {
// start_time_unixnano is the time when the cumulative value was reset to zero.
Expand Down

0 comments on commit beea350

Please sign in to comment.