Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DistributionAggregation fixes #375

Merged
2 changes: 1 addition & 1 deletion opencensus/stats/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def __init__(
self._boundaries = bucket_boundaries.BucketBoundaries(boundaries)
self._distribution = distribution or {}
self.aggregation_data = aggregation_data.DistributionAggregationData(
0, 0, 0, 0, 0, None, boundaries)
0, 0, float('inf'), float('-inf'), 0, None, boundaries)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to match the java client's behavior, this previously failed on e.g. negative value points.


@property
def boundaries(self):
Expand Down
51 changes: 25 additions & 26 deletions opencensus/stats/aggregation_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from opencensus.stats import bucket_boundaries


Expand All @@ -21,6 +22,7 @@ class BaseAggregationData(object):
:param aggregation_data: represents the aggregated value from a collection

"""

def __init__(self, aggregation_data):
self._aggregation_data = aggregation_data

Expand All @@ -37,6 +39,7 @@ class SumAggregationDataFloat(BaseAggregationData):
:param sum_data: represents the aggregated sum

"""

def __init__(self, sum_data):
super(SumAggregationDataFloat, self).__init__(sum_data)
self._sum_data = sum_data
Expand All @@ -60,6 +63,7 @@ class CountAggregationData(BaseAggregationData):
:param count_data: represents the aggregated count

"""

def __init__(self, count_data):
super(CountAggregationData, self).__init__(count_data)
self._count_data = count_data
Expand Down Expand Up @@ -104,6 +108,7 @@ class DistributionAggregationData(BaseAggregationData):
:param bounds: the histogram distribution of the values

"""

def __init__(self,
mean_data,
count_data,
Expand All @@ -123,13 +128,14 @@ def __init__(self,
bounds = []

if counts_per_bucket is None:
counts_per_bucket = []
bucket_size = len(bounds) + 1
for i in range(bucket_size):
counts_per_bucket.append(0)
counts_per_bucket = [0 for ii in range(len(bounds) + 1)]
elif len(counts_per_bucket) != len(bounds) + 1:
raise ValueError("counts_per_bucket length does not match bounds "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can change to "counts_per_bucket has X fields, which is more than the Y buckets allowed." where, X:len(counts_per_bucket) and Y: len(bounds) + 1. What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One benefit of fixed error messages is that you can grep for the exact text, but I'm happy with either here.

"length")

self._counts_per_bucket = counts_per_bucket
self._bounds = bucket_boundaries.BucketBoundaries(
boundaries=bounds).boundaries
boundaries=bounds).boundaries
bucket = 0
for _ in self.bounds:
bucket = bucket + 1
Expand Down Expand Up @@ -207,30 +213,25 @@ def add_sample(self, value, timestamp, attachments):

old_mean = self._mean_data
self._mean_data = self._mean_data + (
(value - self._mean_data) / self._count_data)
(value - self._mean_data) / self._count_data)
self._sum_of_sqd_deviations = self._sum_of_sqd_deviations + (
(value - old_mean) *
(value - self._mean_data))
(value - old_mean) * (value - self._mean_data))


def increment_bucket_count(self, value):
"""Increment the bucket count based on a given value from the user"""
i = 0
incremented = False
for b in self._bounds:
if value < b and not incremented:
self._counts_per_bucket[i] += 1
incremented = True
i += 1

if incremented:
return i

if len(self._bounds) == 0:
self._counts_per_bucket[0] += 1
return i
return 0

self._counts_per_bucket[(len(self._bounds))-1] += 1
return i
for ii, bb in enumerate(self._bounds):
if value < bb:
self._counts_per_bucket[ii] += 1
return ii
else:
last_bucket_index = len(self._bounds)
self._counts_per_bucket[last_bucket_index] += 1
return last_bucket_index
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One fix here and the other up in __init__, the other changes in this file are lint and formatting.



class LastValueAggregationData(BaseAggregationData):
Expand All @@ -241,6 +242,7 @@ class LastValueAggregationData(BaseAggregationData):
:param value: represents the current value

"""

def __init__(self, value):
super(LastValueAggregationData, self).__init__(value)
self._value = value
Expand Down Expand Up @@ -271,10 +273,7 @@ class Exemplar(object):
:param attachments: the contextual information about the example value.
"""

def __init__(self,
value,
timestamp,
attachments):
def __init__(self, value, timestamp, attachments):
self._value = value

self._timestamp = timestamp
Expand Down
59 changes: 40 additions & 19 deletions tests/unit/stats/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,92 +12,113 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime
import unittest
import mock

from opencensus.stats import aggregation as aggregation_module


class TestBaseAggregation(unittest.TestCase):

def test_constructor_defaults(self):
base_aggregation = aggregation_module.BaseAggregation()

self.assertEqual(aggregation_module.Type.NONE, base_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.NONE,
base_aggregation.aggregation_type)
self.assertEqual([], base_aggregation.buckets)

def test_constructor_explicit(self):

buckets = ["test"]
base_aggregation = aggregation_module.BaseAggregation(buckets=buckets)

self.assertEqual(aggregation_module.Type.NONE, base_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.NONE,
base_aggregation.aggregation_type)
self.assertEqual(["test"], base_aggregation.buckets)


class TestSumAggregation(unittest.TestCase):

def test_constructor_defaults(self):
sum_aggregation = aggregation_module.SumAggregation()

self.assertEqual(0, sum_aggregation.sum.sum_data)
self.assertEqual(aggregation_module.Type.SUM, sum_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.SUM,
sum_aggregation.aggregation_type)

def test_constructor_explicit(self):
sum = 1

sum_aggregation = aggregation_module.SumAggregation(sum=sum)

self.assertEqual(1, sum_aggregation.sum.sum_data)
self.assertEqual(aggregation_module.Type.SUM, sum_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.SUM,
sum_aggregation.aggregation_type)


class TestCountAggregation(unittest.TestCase):

def test_constructor_defaults(self):
count_aggregation = aggregation_module.CountAggregation()

self.assertEqual(0, count_aggregation.count.count_data)
self.assertEqual(aggregation_module.Type.COUNT, count_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.COUNT,
count_aggregation.aggregation_type)

def test_constructor_explicit(self):
count = 4

count_aggregation = aggregation_module.CountAggregation(count=count)

self.assertEqual(4, count_aggregation.count.count_data)
self.assertEqual(aggregation_module.Type.COUNT, count_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.COUNT,
count_aggregation.aggregation_type)


class TestLastValueAggregation(unittest.TestCase):

def test_constructor_defaults(self):
last_value_aggregation = aggregation_module.LastValueAggregation()

self.assertEqual(0, last_value_aggregation.value)
self.assertEqual(aggregation_module.Type.LASTVALUE, last_value_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.LASTVALUE,
last_value_aggregation.aggregation_type)

def test_constructor_explicit(self):
val = 16
last_value_aggregation = aggregation_module.LastValueAggregation(value=val)
last_value_aggregation = aggregation_module.LastValueAggregation(
value=val)

self.assertEqual(16, last_value_aggregation.value)
self.assertEqual(aggregation_module.Type.LASTVALUE, last_value_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.LASTVALUE,
last_value_aggregation.aggregation_type)


class TestDistributionAggregation(unittest.TestCase):

def test_constructor_defaults(self):
distribution_aggregation = aggregation_module.DistributionAggregation()

self.assertEqual([], distribution_aggregation.boundaries.boundaries)
self.assertEqual({}, distribution_aggregation.distribution)
self.assertEqual(aggregation_module.Type.DISTRIBUTION, distribution_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.DISTRIBUTION,
distribution_aggregation.aggregation_type)

def test_constructor_explicit(self):
boundaries = ["test"]
distribution = {1: "test"}
distribution_aggregation = aggregation_module.DistributionAggregation(boundaries=boundaries, distribution=distribution)
distribution_aggregation = aggregation_module.DistributionAggregation(
boundaries=boundaries, distribution=distribution)

self.assertEqual(["test"], distribution_aggregation.boundaries.boundaries)
self.assertEqual(["test"],
distribution_aggregation.boundaries.boundaries)
self.assertEqual({1: "test"}, distribution_aggregation.distribution)
self.assertEqual(aggregation_module.Type.DISTRIBUTION, distribution_aggregation.aggregation_type)
self.assertEqual(aggregation_module.Type.DISTRIBUTION,
distribution_aggregation.aggregation_type)

def test_min_max(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, all lint and formatting except this one.

da = aggregation_module.DistributionAggregation([])

self.assertEqual(da.aggregation_data.min, float('inf'))
self.assertEqual(da.aggregation_data.max, float('-inf'))

for dp in range(-10, 11):
da.aggregation_data.add_sample(dp, datetime(1999, 12, 31), {})

self.assertEqual(da.aggregation_data.min, -10)
self.assertEqual(da.aggregation_data.max, 10)
Loading