From 217b415da36c83865edbbe80111704f6e52f3359 Mon Sep 17 00:00:00 2001 From: Yuke Zhuge Date: Wed, 21 Oct 2020 18:39:53 -0700 Subject: [PATCH 1/4] update histogram protocol --- .gitignore | 3 + proto/.gitignore | 2 + proto/Makefile | 4 +- proto/openmetrics_data_model.proto | 125 ++++++++++++++++++++++------- 4 files changed, 102 insertions(+), 32 deletions(-) create mode 100644 proto/.gitignore diff --git a/.gitignore b/.gitignore index af48d02..5a4c353 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ node_modules/ # Hugo-generated assets public/ + +# IntelliJ stuff +/.idea/ diff --git a/proto/.gitignore b/proto/.gitignore new file mode 100644 index 0000000..d391e59 --- /dev/null +++ b/proto/.gitignore @@ -0,0 +1,2 @@ +*.pb.go +/build/ diff --git a/proto/Makefile b/proto/Makefile index bce6ddf..95cfae2 100644 --- a/proto/Makefile +++ b/proto/Makefile @@ -1,7 +1,7 @@ -# NB(rob): Move this to top level Makefile once merged to master, +# NB(rob): Move this to top level Makefile once merged to master, # then can add a rule to CircleCI to build the protobuf to make sure # it is valid. %.pb.go: %.proto - protoc --go_out=. $< + protoc --go_out=./build --java_out=./build $< all: $(patsubst %.proto,%.pb.go,$(wildcard *.proto)) diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto index fd24736..03e5c36 100644 --- a/proto/openmetrics_data_model.proto +++ b/proto/openmetrics_data_model.proto @@ -1,6 +1,7 @@ syntax = "proto3"; package openmetrics; +option go_package = ".;openmetrics"; import "google/protobuf/timestamp.proto"; @@ -82,55 +83,119 @@ message CounterValue { Exemplar exemplar = 4; } +// Bucket boundary is inclusive at lower bound, exclusive at upper bound. +// Bucket count supports both integer and double. +// Use integer count whenever possible as it is efficiently encoded as varint. message HistogramValue { + // Sum of values represented by this histogram oneof sum { double double_value = 1; int64 int_value = 2; } - uint64 count = 2; + // Number of values represented by this histogram + oneof count { + double double_count = 3; + int64 int_count = 4; + } - google.protobuf.Timestamp created = 3; + google.protobuf.Timestamp created = 5; + + oneof buckets { + Linear linear_buckets = 6; + Exponential exponential_buckets = 7; + Explicit explicit_buckets = 8; + } - BucketOptions bucket_options = 4; + HistogramProducer histogram_producer = 9; - message BucketOptions { - oneof options { - Linear linear_buckets = 1; + // A consumer may use HistogramProducer as a hint to deserialize a histogram + // into the producer's format. + enum HistogramProducer { + UNKNOWN = 0; + HDRHISTOGRAM = 1; + DDSKETCH = 2; + CIRCLLHIST = 3; + DYNAHIST = 4; + } - Exponential exponential_buckets = 2; + // Linear buckets. Bucket n's lower bound is "offset + width * n" + message Linear { + double offset = 1; + double width = 2; + uint32 num_of_buckets = 3; + BucketCounts bucket_counts = 4; + } - Explicit explicit_buckets = 3; + // Exponential buckets. + // Bucket n's lower bound is "reference * (base ^ n)" + // "reference" should be set to a number that can be exactly represented by a double type. + // Negative bucket index is allowed so that reference can be set to a convenient value such as 1. + // If negative index were not allowed, reference has to be set at or below the smallest tracked number. + // When reference is close to zero, any inaccuracy in its "double" representation would + // result in a large error in higher indexes. + message Exponential { + double reference = 1; + double base = 2; + + // Exponential scale cannot represent zero. So we need a special counter for zero. + oneof counterForZero { + double double_count = 3; + uint64 int_count = 4; } - message Linear { - uint32 num_explicit_buckets = 1; + // Negative numbers are tracked via their absolute values. + BucketCountsWithIndexOffset bucket_counts_for_positive_numbers = 5; + BucketCountsWithIndexOffset bucket_counts_for_negative_numbers = 6; + + // Certain histograms (such as HdrHistogram) further divide an exponential bucket + // into multiple linear subbuckets. + // num_of_linear_subbuckets at 1 or 0 (default) means no linear subbuckets. + // When subbuckets are present, bucket_counts must start on the first subbucket in an + // exponential bucket. Bucket_counts need not end on the last subbucket in an + // exponential bucket. + // Example of base 2 exponential buckets, with 4 linear subbuckets: + // Bucket boundary: 1 1.25 1.5 1.75 2 2.5 3 3.5 4 5 6 7 8 + // bucket index: 0 1 2 3 4 5 6 7 8 9 10 11 12 + uint32 num_of_linear_subbuckets = 7; + + // A bucket's logical index is physical index in bucket_counts plus index_offset. + message BucketCountsWithIndexOffset { + uint32 num_of_buckets = 1; + sint32 index_offset = 2; + BucketCounts bucket_counts = 3; + } + } - double width = 2; + // Explicitly enumerating all bucket bounds. + message Explicit { + uint32 num_of_buckets = 1; + // bucket_bounds and bucket_counts must have num_of_buckets entries. + repeated double bucket_bounds = 2; + BucketCounts bucket_counts = 3; + } - double offset = 3; + message BucketCounts { + // IntegerBucketCounts uses efficient encoding of the counts using varint. + // Use DoubleBucketCounts only when necessary, because each count costs 8 bytes. + oneof bucket_counts { + IntegerBucketCounts integer_bucket_counts = 1; + DoubleBucketCounts double_bucket_counts = 2; } - message Exponential { - uint32 num_exponential_buckets = 1; - - double growth_factor = 2; + // When present, exemplar must have num_of_buckets entries. + repeated Exemplar exemplar = 3; - double scale = 3; + // In proto3, repeated fields of scalar numeric types use packed encoding by default. + // Thus small counters will cost fewer bytes when encoded. + message IntegerBucketCounts { + repeated uint64 count = 1; } - message Explicit { - repeated double bounds = 1; + message DoubleBucketCounts { + repeated double count = 1; } } - - repeated BucketCount bucket_counts = 5; - - message BucketCount { - uint64 count = 1; - - Exemplar exemplar = 2; - } } message Exemplar { @@ -161,11 +226,11 @@ message SummaryValue { int64 int_value = 2; } - uint64 count = 2; + uint64 count = 3; - google.protobuf.Timestamp created = 3; + google.protobuf.Timestamp created = 4; - repeated Quantile quantile = 4; + repeated Quantile quantile = 5; message Quantile { double quantile = 1; From c33e783f9d278215c85681147bdcfbd335cf311f Mon Sep 17 00:00:00 2001 From: Yuke Zhuge Date: Wed, 21 Oct 2020 18:57:13 -0700 Subject: [PATCH 2/4] minor touch up --- proto/openmetrics_data_model.proto | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto index 03e5c36..ae9111a 100644 --- a/proto/openmetrics_data_model.proto +++ b/proto/openmetrics_data_model.proto @@ -109,10 +109,10 @@ message HistogramValue { HistogramProducer histogram_producer = 9; - // A consumer may use HistogramProducer as a hint to deserialize a histogram - // into the producer's format. + // A consumer may use HistogramProducer as a hint to generate a histogram + // in the producer's orignal format. enum HistogramProducer { - UNKNOWN = 0; + UNLISTED = 0; HDRHISTOGRAM = 1; DDSKETCH = 2; CIRCLLHIST = 3; @@ -161,8 +161,8 @@ message HistogramValue { // A bucket's logical index is physical index in bucket_counts plus index_offset. message BucketCountsWithIndexOffset { - uint32 num_of_buckets = 1; - sint32 index_offset = 2; + sint32 index_offset = 1; + uint32 num_of_buckets = 2; BucketCounts bucket_counts = 3; } } @@ -170,7 +170,7 @@ message HistogramValue { // Explicitly enumerating all bucket bounds. message Explicit { uint32 num_of_buckets = 1; - // bucket_bounds and bucket_counts must have num_of_buckets entries. + // bucket_bounds and bucket_counts must both have num_of_buckets entries. repeated double bucket_bounds = 2; BucketCounts bucket_counts = 3; } @@ -183,8 +183,8 @@ message HistogramValue { DoubleBucketCounts double_bucket_counts = 2; } - // When present, exemplar must have num_of_buckets entries. - repeated Exemplar exemplar = 3; + // When present, exemplars must have the same number entries as the count list. + repeated Exemplar exemplars = 3; // In proto3, repeated fields of scalar numeric types use packed encoding by default. // Thus small counters will cost fewer bytes when encoded. From 5414ba57520baff320a65b8f644c901e6e542d47 Mon Sep 17 00:00:00 2001 From: Yuke Zhuge Date: Thu, 22 Oct 2020 10:57:03 -0700 Subject: [PATCH 3/4] update comments --- proto/openmetrics_data_model.proto | 53 ++++++++++++++++++------------ 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto index ae9111a..e157071 100644 --- a/proto/openmetrics_data_model.proto +++ b/proto/openmetrics_data_model.proto @@ -1,7 +1,6 @@ syntax = "proto3"; package openmetrics; -option go_package = ".;openmetrics"; import "google/protobuf/timestamp.proto"; @@ -89,14 +88,15 @@ message CounterValue { message HistogramValue { // Sum of values represented by this histogram oneof sum { - double double_value = 1; - int64 int_value = 2; + double double_sum = 1; + int64 int_sum = 2; } - // Number of values represented by this histogram + // Number of values represented by this histogram. + // Should match sum of all bucket counts. oneof count { double double_count = 3; - int64 int_count = 4; + uint64 int_count = 4; } google.protobuf.Timestamp created = 5; @@ -109,8 +109,20 @@ message HistogramValue { HistogramProducer histogram_producer = 9; + // Min of values represented by this histogram + oneof min { + double double_min = 10; + int64 int_min = 11; + } + + // Max of values represented by this histogram + oneof max { + double double_max = 12; + int64 int_max = 13; + } + // A consumer may use HistogramProducer as a hint to generate a histogram - // in the producer's orignal format. + // in the producer's original format. enum HistogramProducer { UNLISTED = 0; HDRHISTOGRAM = 1; @@ -129,11 +141,9 @@ message HistogramValue { // Exponential buckets. // Bucket n's lower bound is "reference * (base ^ n)" - // "reference" should be set to a number that can be exactly represented by a double type. - // Negative bucket index is allowed so that reference can be set to a convenient value such as 1. - // If negative index were not allowed, reference has to be set at or below the smallest tracked number. - // When reference is close to zero, any inaccuracy in its "double" representation would - // result in a large error in higher indexes. + // Negative bucket index is allowed. Example of reference 1, base 2 buckets: + // bucket lower bound: .125 .25 .5 1 2 4 8 16 + // bucket index: -3 -2 -1 0 1 2 3 4 message Exponential { double reference = 1; double base = 2; @@ -151,12 +161,9 @@ message HistogramValue { // Certain histograms (such as HdrHistogram) further divide an exponential bucket // into multiple linear subbuckets. // num_of_linear_subbuckets at 1 or 0 (default) means no linear subbuckets. - // When subbuckets are present, bucket_counts must start on the first subbucket in an - // exponential bucket. Bucket_counts need not end on the last subbucket in an - // exponential bucket. - // Example of base 2 exponential buckets, with 4 linear subbuckets: - // Bucket boundary: 1 1.25 1.5 1.75 2 2.5 3 3.5 4 5 6 7 8 - // bucket index: 0 1 2 3 4 5 6 7 8 9 10 11 12 + // Example of reference 1, base 2 exponential buckets, with 4 linear subbuckets: + // Bucket lower bound: .5 .625 .75 .875 1 1.25 1.5 1.75 2 2.5 3 3.5 4 + // bucket index: -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 uint32 num_of_linear_subbuckets = 7; // A bucket's logical index is physical index in bucket_counts plus index_offset. @@ -167,11 +174,15 @@ message HistogramValue { } } - // Explicitly enumerating all bucket bounds. + // Explicitly enumerating all bucket lower bounds. + // A bucket's upper bound comes from the next bucket's lower bound. + // The last bucket's upper bound is assumed to be +infinity. + // An extra bucket with zero count at the end is recommended to define + // the upper bound of the bucket series. message Explicit { uint32 num_of_buckets = 1; - // bucket_bounds and bucket_counts must both have num_of_buckets entries. - repeated double bucket_bounds = 2; + // bucket_lower_bounds and bucket_counts must both have num_of_buckets entries. + repeated double bucket_lower_bounds = 2; BucketCounts bucket_counts = 3; } @@ -183,7 +194,7 @@ message HistogramValue { DoubleBucketCounts double_bucket_counts = 2; } - // When present, exemplars must have the same number entries as the count list. + // When present, exemplars must have the same number of entries as the count list. repeated Exemplar exemplars = 3; // In proto3, repeated fields of scalar numeric types use packed encoding by default. From 9b5366a5cdec09b1d93d8198749554513cd40c66 Mon Sep 17 00:00:00 2001 From: Yuke Zhuge Date: Thu, 22 Oct 2020 11:46:31 -0700 Subject: [PATCH 4/4] update comments --- proto/openmetrics_data_model.proto | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto index e157071..72f3b84 100644 --- a/proto/openmetrics_data_model.proto +++ b/proto/openmetrics_data_model.proto @@ -83,7 +83,7 @@ message CounterValue { } // Bucket boundary is inclusive at lower bound, exclusive at upper bound. -// Bucket count supports both integer and double. +// Bucket count supports both integer and double. Zero count buckets are allowed. // Use integer count whenever possible as it is efficiently encoded as varint. message HistogramValue { // Sum of values represented by this histogram @@ -182,6 +182,7 @@ message HistogramValue { message Explicit { uint32 num_of_buckets = 1; // bucket_lower_bounds and bucket_counts must both have num_of_buckets entries. + // The values in bucket_lower_bounds must be strictly increasing. repeated double bucket_lower_bounds = 2; BucketCounts bucket_counts = 3; } @@ -195,7 +196,7 @@ message HistogramValue { } // When present, exemplars must have the same number of entries as the count list. - repeated Exemplar exemplars = 3; + repeated Exemplar exemplars = 3; // In proto3, repeated fields of scalar numeric types use packed encoding by default. // Thus small counters will cost fewer bytes when encoded.