From f5ceb313f4808451e0902037a8424164eb572880 Mon Sep 17 00:00:00 2001 From: Keith Date: Fri, 22 Dec 2017 11:18:54 +0000 Subject: [PATCH 1/3] Dealing with the division by zero issue which happens if all the values in the column are null --- agate/aggregations/mean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agate/aggregations/mean.py b/agate/aggregations/mean.py index 6a83c1a3f..7171376ea 100644 --- a/agate/aggregations/mean.py +++ b/agate/aggregations/mean.py @@ -38,4 +38,4 @@ def run(self, table): sum_total = self._sum.run(table) - return sum_total / len(column.values_without_nulls()) + return sum_total / (len(column.values_without_nulls()) or 1) From 696dc0c6f367bfbb87716c479ed893f5e010b22c Mon Sep 17 00:00:00 2001 From: Keith Date: Fri, 29 Dec 2017 20:23:41 +0000 Subject: [PATCH 2/3] Adding in test case to show divide by zero, and the modification which addresses this. --- tests/test_aggregations.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/test_aggregations.py b/tests/test_aggregations.py index d244130b4..4909ba2a2 100644 --- a/tests/test_aggregations.py +++ b/tests/test_aggregations.py @@ -211,17 +211,17 @@ def test_max(self): class TestNumberAggregation(unittest.TestCase): def setUp(self): self.rows = ( - (Decimal('1.1'), Decimal('2.19'), 'a'), - (Decimal('2.7'), Decimal('3.42'), 'b'), - (None, Decimal('4.1'), 'c'), - (Decimal('2.7'), Decimal('3.42'), 'c') + (Decimal('1.1'), Decimal('2.19'), 'a', None), + (Decimal('2.7'), Decimal('3.42'), 'b', None), + (None, Decimal('4.1'), 'c', None), + (Decimal('2.7'), Decimal('3.42'), 'c', None) ) self.number_type = Number() self.text_type = Text() - self.column_names = ['one', 'two', 'three'] - self.column_types = [self.number_type, self.number_type, self.text_type] + self.column_names = ['one', 'two', 'three', 'four'] + self.column_types = [self.number_type, self.number_type, self.text_type, self.number_type] self.table = Table(self.rows, self.column_names, self.column_types) @@ -272,6 +272,15 @@ def test_mean(self): self.assertEqual(Mean('two').run(self.table), Decimal('3.2825')) + def test_mean_all_nulls(self): + """ + Small test to confirm that if mean calculation is ran on a column of + nulls then zero will be returned. + + :return: + """ + self.assertEqual(Mean('four').run(self.table), Decimal('0')) + def test_mean_with_nulls(self): warnings.simplefilter('ignore') From 02682a0864b3155b7be458edefb860ed53555f79 Mon Sep 17 00:00:00 2001 From: Keith Date: Fri, 29 Dec 2017 20:33:54 +0000 Subject: [PATCH 3/3] This is a better solution for the mean on a null column. After thinking about this some more it seems better to return a None value when there are no values to calculate a Mean of. --- agate/aggregations/mean.py | 7 +++++-- tests/test_aggregations.py | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/agate/aggregations/mean.py b/agate/aggregations/mean.py index 7171376ea..d2de20c9f 100644 --- a/agate/aggregations/mean.py +++ b/agate/aggregations/mean.py @@ -35,7 +35,10 @@ def validate(self, table): def run(self, table): column = table.columns[self._column_name] + num_of_values = len(column.values_without_nulls()) + # If there are no non-null columns then return null. + if num_of_values == 0: + return None sum_total = self._sum.run(table) - - return sum_total / (len(column.values_without_nulls()) or 1) + return sum_total / num_of_values diff --git a/tests/test_aggregations.py b/tests/test_aggregations.py index 4909ba2a2..56e942f4a 100644 --- a/tests/test_aggregations.py +++ b/tests/test_aggregations.py @@ -274,12 +274,14 @@ def test_mean(self): def test_mean_all_nulls(self): """ - Small test to confirm that if mean calculation is ran on a column of - nulls then zero will be returned. + Test to confirm mean of only nulls doesn't cause a critical error. + The assumption here is that if you attempt to perform a mean + calculation, on a column which contains only null values, then a null + value should be returned to the caller. :return: """ - self.assertEqual(Mean('four').run(self.table), Decimal('0')) + self.assertIsNone(Mean('four').run(self.table)) def test_mean_with_nulls(self): warnings.simplefilter('ignore')