Change DB utils behavior when a truncated row is found to only drop t…

…he row (#7983) * Change behavior when a truncated row is found to only drop the row * Fix truncation tests
DataDog · Nov 10, 2020 · 4d14ac0 · 4d14ac0
1 parent cf291af
commit 4d14ac0
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 17 deletions.
diff --git a/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py b/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py
@@ -44,7 +44,6 @@ def compute_derivative_rows(self, rows, metrics, key):
         """
         result = []
         new_cache = {}
-        negative_result_found = False
         metrics = set(metrics)
 
         if len(rows) > 0:
@@ -68,11 +67,6 @@ def compute_derivative_rows(self, rows, metrics, key):
             # whether a metric is submitted for the row during this run or not.
             new_cache[row_key] = row
 
-            # If a stats reset has happened, all other logic can be skipped since the previous check run's values are
-            # invalidated.
-            if negative_result_found:
-                continue
-
             prev = self._previous_statements.get(row_key)
             if prev is None:
                 continue
@@ -83,8 +77,8 @@ def compute_derivative_rows(self, rows, metrics, key):
             # There are a couple of edge cases to be aware of:
             #
             # 1. Table truncation or stats reset: Because the table values are always increasing, a negative value
-            #    suggests truncation or a stats reset. In this case, all rows from the previous run must be discarded.
-            #    Tracking should start over with the current run's values.
+            #    suggests truncation or a stats reset. In this case, the row difference is discarded and the row should.
+            #    be tracked from this run forward.
             #
             # 2. No changes since the previous run: There is no need to store metrics of 0, since that is implied by
             #    the absence of metrics. On any given check run, most rows will have no difference so this optimization
@@ -94,7 +88,9 @@ def compute_derivative_rows(self, rows, metrics, key):
 
             # Check for negative values, but only in the columns used for metrics
             if any(diffed_row[k] < 0 for k in metric_columns):
-                negative_result_found = True
+                # A "break" might be expected here instead of "continue," but there are cases where a subset of rows
+                # are removed. To avoid situations where all results are discarded every check run, we err on the side
+                # of potentially including truncated rows that exceed previous run counts.
                 continue
 
             # No changes to the query; no metric needed
@@ -105,9 +101,6 @@ def compute_derivative_rows(self, rows, metrics, key):
 
         self._previous_statements = new_cache
 
-        if negative_result_found:
-            return []
-
         return result
 
 

diff --git a/datadog_checks_base/tests/test_db_statements.py b/datadog_checks_base/tests/test_db_statements.py
@@ -97,17 +97,17 @@ def key(row):
         ]
         # Simulate a stats reset by decreasing one of the metrics rather than increasing
         rows3 = [
-            add_to_dict(rows2[1], {'count': 1, 'time': 15, 'errors': 0}),
             add_to_dict(rows2[0], {'count': -1, 'time': 0, 'errors': 15}),
+            add_to_dict(rows2[1], {'count': 1, 'time': 15, 'errors': 0}),
         ]
         rows4 = [
-            add_to_dict(rows3[1], {'count': 1, 'time': 1, 'errors': 0}),
             add_to_dict(rows3[0], {'count': 1, 'time': 1, 'errors': 1}),
+            add_to_dict(rows3[1], {'count': 1, 'time': 1, 'errors': 0}),
         ]
         assert [] == sm.compute_derivative_rows(rows1, metrics, key=key)
-        assert 2 == len(sm.compute_derivative_rows(rows2, metrics, key=key))
-        assert [] == sm.compute_derivative_rows(rows3, metrics, key=key)
-        assert 2 == len(sm.compute_derivative_rows(rows4, metrics, key=key))
+        assert 2 == len(sm.compute_derivative_rows(rows2, metrics, key=key))  # both rows computed
+        assert 1 == len(sm.compute_derivative_rows(rows3, metrics, key=key))  # only 1 row computed
+        assert 2 == len(sm.compute_derivative_rows(rows4, metrics, key=key))  # both rows computed
 
     def test_apply_row_limits(self):
         def assert_any_order(a, b):