From 217b415da36c83865edbbe80111704f6e52f3359 Mon Sep 17 00:00:00 2001
From: Yuke Zhuge <yzhuge@newrelic.com>
Date: Wed, 21 Oct 2020 18:39:53 -0700
Subject: [PATCH 1/4] update histogram protocol

---
 .gitignore                         |   3 +
 proto/.gitignore                   |   2 +
 proto/Makefile                     |   4 +-
 proto/openmetrics_data_model.proto | 125 ++++++++++++++++++++++-------
 4 files changed, 102 insertions(+), 32 deletions(-)
 create mode 100644 proto/.gitignore

diff --git a/.gitignore b/.gitignore
index af48d02..5a4c353 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,6 @@ node_modules/
 
 # Hugo-generated assets
 public/
+
+# IntelliJ stuff
+/.idea/
diff --git a/proto/.gitignore b/proto/.gitignore
new file mode 100644
index 0000000..d391e59
--- /dev/null
+++ b/proto/.gitignore
@@ -0,0 +1,2 @@
+*.pb.go
+/build/
diff --git a/proto/Makefile b/proto/Makefile
index bce6ddf..95cfae2 100644
--- a/proto/Makefile
+++ b/proto/Makefile
@@ -1,7 +1,7 @@
-# NB(rob): Move this to top level Makefile once merged to master, 
+# NB(rob): Move this to top level Makefile once merged to master,
 # then can add a rule to CircleCI to build the protobuf to make sure
 # it is valid.
 %.pb.go: %.proto
-	protoc --go_out=. $<
+	protoc --go_out=./build --java_out=./build $<
 
 all: $(patsubst %.proto,%.pb.go,$(wildcard *.proto))
diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto
index fd24736..03e5c36 100644
--- a/proto/openmetrics_data_model.proto
+++ b/proto/openmetrics_data_model.proto
@@ -1,6 +1,7 @@
 syntax = "proto3";
 
 package openmetrics;
+option go_package = ".;openmetrics";
 
 import "google/protobuf/timestamp.proto";
 
@@ -82,55 +83,119 @@ message CounterValue {
   Exemplar exemplar = 4;
 }
 
+// Bucket boundary is inclusive at lower bound, exclusive at upper bound.
+// Bucket count supports both integer and double.
+// Use integer count whenever possible as it is efficiently encoded as varint.
 message HistogramValue {
+  // Sum of values represented by this histogram
   oneof sum {
     double double_value = 1;
     int64 int_value = 2;
   }
 
-  uint64 count = 2;
+  // Number of values represented by this histogram
+  oneof count {
+    double double_count = 3;
+    int64 int_count = 4;
+  }
 
-  google.protobuf.Timestamp created = 3;
+  google.protobuf.Timestamp created = 5;
+
+  oneof buckets {
+    Linear linear_buckets = 6;
+    Exponential exponential_buckets = 7;
+    Explicit explicit_buckets = 8;
+  }
 
-  BucketOptions bucket_options = 4;
+  HistogramProducer histogram_producer = 9;
 
-  message BucketOptions {
-    oneof options {
-      Linear linear_buckets = 1;
+  // A consumer may use HistogramProducer as a hint to deserialize a histogram
+  // into the producer's format.
+  enum HistogramProducer {
+    UNKNOWN = 0;
+    HDRHISTOGRAM = 1;
+    DDSKETCH = 2;
+    CIRCLLHIST = 3;
+    DYNAHIST = 4;
+  }
 
-      Exponential exponential_buckets = 2;
+  // Linear buckets. Bucket n's lower bound is "offset + width * n"
+  message Linear {
+    double offset = 1;
+    double width = 2;
+    uint32 num_of_buckets = 3;
+    BucketCounts bucket_counts = 4;
+  }
 
-      Explicit explicit_buckets = 3;
+  // Exponential buckets.
+  // Bucket n's lower bound is "reference * (base ^ n)"
+  // "reference" should be set to a number that can be exactly represented by a double type.
+  // Negative bucket index is allowed so that reference can be set to a convenient value such as 1.
+  // If negative index were not allowed, reference has to be set at or below the smallest tracked number.
+  // When reference is close to zero, any inaccuracy in its "double" representation would
+  // result in a large error in higher indexes.
+  message Exponential {
+    double reference = 1;
+    double base = 2;
+
+    // Exponential scale cannot represent zero. So we need a special counter for zero.
+    oneof counterForZero {
+      double double_count = 3;
+      uint64 int_count = 4;
     }
 
-    message Linear {
-      uint32 num_explicit_buckets = 1;
+    // Negative numbers are tracked via their absolute values.
+    BucketCountsWithIndexOffset bucket_counts_for_positive_numbers = 5;
+    BucketCountsWithIndexOffset bucket_counts_for_negative_numbers = 6;
+
+    // Certain histograms (such as HdrHistogram) further divide an exponential bucket
+    // into multiple linear subbuckets.
+    // num_of_linear_subbuckets at 1 or 0 (default) means no linear subbuckets.
+    // When subbuckets are present, bucket_counts must start on the first subbucket in an
+    // exponential bucket. Bucket_counts need not end on the last subbucket in an
+    // exponential bucket.
+    // Example of base 2 exponential buckets, with 4 linear subbuckets:
+    // Bucket boundary:  1 1.25 1.5 1.75 2 2.5 3 3.5 4 5  6  7  8
+    // bucket index:     0 1    2   3    4 5   6 7   8 9 10 11 12
+    uint32 num_of_linear_subbuckets = 7;
+
+    // A bucket's logical index is physical index in bucket_counts plus index_offset.
+    message BucketCountsWithIndexOffset {
+      uint32 num_of_buckets = 1;
+      sint32 index_offset = 2;
+      BucketCounts bucket_counts = 3;
+    }
+  }
 
-      double width = 2;
+  // Explicitly enumerating all bucket bounds.
+  message Explicit {
+    uint32 num_of_buckets = 1;
+    // bucket_bounds and bucket_counts must have num_of_buckets entries.
+    repeated double bucket_bounds = 2;
+    BucketCounts bucket_counts = 3;
+  }
 
-      double offset = 3;
+  message BucketCounts {
+    // IntegerBucketCounts uses efficient encoding of the counts using varint.
+    // Use DoubleBucketCounts only when necessary, because each count costs 8 bytes.
+    oneof bucket_counts {
+      IntegerBucketCounts integer_bucket_counts = 1;
+      DoubleBucketCounts double_bucket_counts = 2;
     }
 
-    message Exponential {
-      uint32 num_exponential_buckets = 1;
-
-      double growth_factor = 2;
+    // When present, exemplar must have num_of_buckets entries.
+    repeated  Exemplar exemplar = 3;
 
-      double scale = 3;
+    // In proto3, repeated fields of scalar numeric types use packed encoding by default.
+    // Thus small counters will cost fewer bytes when encoded.
+    message IntegerBucketCounts {
+      repeated uint64 count = 1;
     }
 
-    message Explicit {
-      repeated double bounds = 1;
+    message DoubleBucketCounts {
+      repeated double count = 1;
     }
   }
-
-  repeated BucketCount bucket_counts = 5;
-
-  message BucketCount {
-    uint64 count = 1;
-
-    Exemplar exemplar = 2;
-  }
 }
 
 message Exemplar {
@@ -161,11 +226,11 @@ message SummaryValue {
     int64 int_value = 2;
   }
 
-  uint64 count = 2;
+  uint64 count = 3;
 
-  google.protobuf.Timestamp created = 3;
+  google.protobuf.Timestamp created = 4;
 
-  repeated Quantile quantile = 4;
+  repeated Quantile quantile = 5;
 
   message Quantile {
     double quantile = 1;

From c33e783f9d278215c85681147bdcfbd335cf311f Mon Sep 17 00:00:00 2001
From: Yuke Zhuge <yzhuge@newrelic.com>
Date: Wed, 21 Oct 2020 18:57:13 -0700
Subject: [PATCH 2/4] minor touch up

---
 proto/openmetrics_data_model.proto | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto
index 03e5c36..ae9111a 100644
--- a/proto/openmetrics_data_model.proto
+++ b/proto/openmetrics_data_model.proto
@@ -109,10 +109,10 @@ message HistogramValue {
 
   HistogramProducer histogram_producer = 9;
 
-  // A consumer may use HistogramProducer as a hint to deserialize a histogram
-  // into the producer's format.
+  // A consumer may use HistogramProducer as a hint to generate a histogram
+  // in the producer's orignal format.
   enum HistogramProducer {
-    UNKNOWN = 0;
+    UNLISTED = 0;
     HDRHISTOGRAM = 1;
     DDSKETCH = 2;
     CIRCLLHIST = 3;
@@ -161,8 +161,8 @@ message HistogramValue {
 
     // A bucket's logical index is physical index in bucket_counts plus index_offset.
     message BucketCountsWithIndexOffset {
-      uint32 num_of_buckets = 1;
-      sint32 index_offset = 2;
+      sint32 index_offset = 1;
+      uint32 num_of_buckets = 2;
       BucketCounts bucket_counts = 3;
     }
   }
@@ -170,7 +170,7 @@ message HistogramValue {
   // Explicitly enumerating all bucket bounds.
   message Explicit {
     uint32 num_of_buckets = 1;
-    // bucket_bounds and bucket_counts must have num_of_buckets entries.
+    // bucket_bounds and bucket_counts must both have num_of_buckets entries.
     repeated double bucket_bounds = 2;
     BucketCounts bucket_counts = 3;
   }
@@ -183,8 +183,8 @@ message HistogramValue {
       DoubleBucketCounts double_bucket_counts = 2;
     }
 
-    // When present, exemplar must have num_of_buckets entries.
-    repeated  Exemplar exemplar = 3;
+    // When present, exemplars must have the same number entries as the count list.
+    repeated  Exemplar exemplars = 3;
 
     // In proto3, repeated fields of scalar numeric types use packed encoding by default.
     // Thus small counters will cost fewer bytes when encoded.

From 5414ba57520baff320a65b8f644c901e6e542d47 Mon Sep 17 00:00:00 2001
From: Yuke Zhuge <yzhuge@newrelic.com>
Date: Thu, 22 Oct 2020 10:57:03 -0700
Subject: [PATCH 3/4] update comments

---
 proto/openmetrics_data_model.proto | 53 ++++++++++++++++++------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto
index ae9111a..e157071 100644
--- a/proto/openmetrics_data_model.proto
+++ b/proto/openmetrics_data_model.proto
@@ -1,7 +1,6 @@
 syntax = "proto3";
 
 package openmetrics;
-option go_package = ".;openmetrics";
 
 import "google/protobuf/timestamp.proto";
 
@@ -89,14 +88,15 @@ message CounterValue {
 message HistogramValue {
   // Sum of values represented by this histogram
   oneof sum {
-    double double_value = 1;
-    int64 int_value = 2;
+    double double_sum = 1;
+    int64 int_sum = 2;
   }
 
-  // Number of values represented by this histogram
+  // Number of values represented by this histogram.
+  // Should match sum of all bucket counts.
   oneof count {
     double double_count = 3;
-    int64 int_count = 4;
+    uint64 int_count = 4;
   }
 
   google.protobuf.Timestamp created = 5;
@@ -109,8 +109,20 @@ message HistogramValue {
 
   HistogramProducer histogram_producer = 9;
 
+  // Min of values represented by this histogram
+  oneof min {
+    double double_min = 10;
+    int64 int_min = 11;
+  }
+
+  // Max of values represented by this histogram
+  oneof max {
+    double double_max = 12;
+    int64 int_max = 13;
+  }
+
   // A consumer may use HistogramProducer as a hint to generate a histogram
-  // in the producer's orignal format.
+  // in the producer's original format.
   enum HistogramProducer {
     UNLISTED = 0;
     HDRHISTOGRAM = 1;
@@ -129,11 +141,9 @@ message HistogramValue {
 
   // Exponential buckets.
   // Bucket n's lower bound is "reference * (base ^ n)"
-  // "reference" should be set to a number that can be exactly represented by a double type.
-  // Negative bucket index is allowed so that reference can be set to a convenient value such as 1.
-  // If negative index were not allowed, reference has to be set at or below the smallest tracked number.
-  // When reference is close to zero, any inaccuracy in its "double" representation would
-  // result in a large error in higher indexes.
+  // Negative bucket index is allowed. Example of reference 1, base 2 buckets:
+  // bucket lower bound: .125 .25 .5 1 2 4 8 16
+  // bucket index:       -3   -2  -1 0 1 2 3 4
   message Exponential {
     double reference = 1;
     double base = 2;
@@ -151,12 +161,9 @@ message HistogramValue {
     // Certain histograms (such as HdrHistogram) further divide an exponential bucket
     // into multiple linear subbuckets.
     // num_of_linear_subbuckets at 1 or 0 (default) means no linear subbuckets.
-    // When subbuckets are present, bucket_counts must start on the first subbucket in an
-    // exponential bucket. Bucket_counts need not end on the last subbucket in an
-    // exponential bucket.
-    // Example of base 2 exponential buckets, with 4 linear subbuckets:
-    // Bucket boundary:  1 1.25 1.5 1.75 2 2.5 3 3.5 4 5  6  7  8
-    // bucket index:     0 1    2   3    4 5   6 7   8 9 10 11 12
+    // Example of reference 1, base 2 exponential buckets, with 4 linear subbuckets:
+    // Bucket lower bound: .5 .625 .75 .875 1 1.25 1.5 1.75 2 2.5 3 3.5 4
+    // bucket index:       -4 -3   -2  -1   0 1    2   3    4 5   6 7   8
     uint32 num_of_linear_subbuckets = 7;
 
     // A bucket's logical index is physical index in bucket_counts plus index_offset.
@@ -167,11 +174,15 @@ message HistogramValue {
     }
   }
 
-  // Explicitly enumerating all bucket bounds.
+  // Explicitly enumerating all bucket lower bounds.
+  // A bucket's upper bound comes from the next bucket's lower bound.
+  // The last bucket's upper bound is assumed to be +infinity.
+  // An extra bucket with zero count at the end is recommended to define
+  // the upper bound of the bucket series.
   message Explicit {
     uint32 num_of_buckets = 1;
-    // bucket_bounds and bucket_counts must both have num_of_buckets entries.
-    repeated double bucket_bounds = 2;
+    // bucket_lower_bounds and bucket_counts must both have num_of_buckets entries.
+    repeated double bucket_lower_bounds = 2;
     BucketCounts bucket_counts = 3;
   }
 
@@ -183,7 +194,7 @@ message HistogramValue {
       DoubleBucketCounts double_bucket_counts = 2;
     }
 
-    // When present, exemplars must have the same number entries as the count list.
+    // When present, exemplars must have the same number of entries as the count list.
     repeated  Exemplar exemplars = 3;
 
     // In proto3, repeated fields of scalar numeric types use packed encoding by default.

From 9b5366a5cdec09b1d93d8198749554513cd40c66 Mon Sep 17 00:00:00 2001
From: Yuke Zhuge <yzhuge@newrelic.com>
Date: Thu, 22 Oct 2020 11:46:31 -0700
Subject: [PATCH 4/4] update comments

---
 proto/openmetrics_data_model.proto | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/proto/openmetrics_data_model.proto b/proto/openmetrics_data_model.proto
index e157071..72f3b84 100644
--- a/proto/openmetrics_data_model.proto
+++ b/proto/openmetrics_data_model.proto
@@ -83,7 +83,7 @@ message CounterValue {
 }
 
 // Bucket boundary is inclusive at lower bound, exclusive at upper bound.
-// Bucket count supports both integer and double.
+// Bucket count supports both integer and double. Zero count buckets are allowed.
 // Use integer count whenever possible as it is efficiently encoded as varint.
 message HistogramValue {
   // Sum of values represented by this histogram
@@ -182,6 +182,7 @@ message HistogramValue {
   message Explicit {
     uint32 num_of_buckets = 1;
     // bucket_lower_bounds and bucket_counts must both have num_of_buckets entries.
+    // The values in bucket_lower_bounds must be strictly increasing.
     repeated double bucket_lower_bounds = 2;
     BucketCounts bucket_counts = 3;
   }
@@ -195,7 +196,7 @@ message HistogramValue {
     }
 
     // When present, exemplars must have the same number of entries as the count list.
-    repeated  Exemplar exemplars = 3;
+    repeated Exemplar exemplars = 3;
 
     // In proto3, repeated fields of scalar numeric types use packed encoding by default.
     // Thus small counters will cost fewer bytes when encoded.