From 4d14ac00daf4f9a9909a7ec7f5c5b6b83806771a Mon Sep 17 00:00:00 2001
From: Justin Iso <justin.james.iso@gmail.com>
Date: Tue, 10 Nov 2020 10:07:31 -0500
Subject: [PATCH] Change DB utils behavior when a truncated row is found to
 only drop the row (#7983)

* Change behavior when a truncated row is found to only drop the row

* Fix truncation tests
---
 .../base/utils/db/statement_metrics.py          | 17 +++++------------
 datadog_checks_base/tests/test_db_statements.py | 10 +++++-----
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py b/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py
index 8eb3f15c5a248..acd4f6529ec21 100644
--- a/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py
+++ b/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py
@@ -44,7 +44,6 @@ def compute_derivative_rows(self, rows, metrics, key):
         """
         result = []
         new_cache = {}
-        negative_result_found = False
         metrics = set(metrics)
 
         if len(rows) > 0:
@@ -68,11 +67,6 @@ def compute_derivative_rows(self, rows, metrics, key):
             # whether a metric is submitted for the row during this run or not.
             new_cache[row_key] = row
 
-            # If a stats reset has happened, all other logic can be skipped since the previous check run's values are
-            # invalidated.
-            if negative_result_found:
-                continue
-
             prev = self._previous_statements.get(row_key)
             if prev is None:
                 continue
@@ -83,8 +77,8 @@ def compute_derivative_rows(self, rows, metrics, key):
             # There are a couple of edge cases to be aware of:
             #
             # 1. Table truncation or stats reset: Because the table values are always increasing, a negative value
-            #    suggests truncation or a stats reset. In this case, all rows from the previous run must be discarded.
-            #    Tracking should start over with the current run's values.
+            #    suggests truncation or a stats reset. In this case, the row difference is discarded and the row should.
+            #    be tracked from this run forward.
             #
             # 2. No changes since the previous run: There is no need to store metrics of 0, since that is implied by
             #    the absence of metrics. On any given check run, most rows will have no difference so this optimization
@@ -94,7 +88,9 @@ def compute_derivative_rows(self, rows, metrics, key):
 
             # Check for negative values, but only in the columns used for metrics
             if any(diffed_row[k] < 0 for k in metric_columns):
-                negative_result_found = True
+                # A "break" might be expected here instead of "continue," but there are cases where a subset of rows
+                # are removed. To avoid situations where all results are discarded every check run, we err on the side
+                # of potentially including truncated rows that exceed previous run counts.
                 continue
 
             # No changes to the query; no metric needed
@@ -105,9 +101,6 @@ def compute_derivative_rows(self, rows, metrics, key):
 
         self._previous_statements = new_cache
 
-        if negative_result_found:
-            return []
-
         return result
 
 
diff --git a/datadog_checks_base/tests/test_db_statements.py b/datadog_checks_base/tests/test_db_statements.py
index c2c8f620457df..56b47bbe2a0fd 100644
--- a/datadog_checks_base/tests/test_db_statements.py
+++ b/datadog_checks_base/tests/test_db_statements.py
@@ -97,17 +97,17 @@ def key(row):
         ]
         # Simulate a stats reset by decreasing one of the metrics rather than increasing
         rows3 = [
-            add_to_dict(rows2[1], {'count': 1, 'time': 15, 'errors': 0}),
             add_to_dict(rows2[0], {'count': -1, 'time': 0, 'errors': 15}),
+            add_to_dict(rows2[1], {'count': 1, 'time': 15, 'errors': 0}),
         ]
         rows4 = [
-            add_to_dict(rows3[1], {'count': 1, 'time': 1, 'errors': 0}),
             add_to_dict(rows3[0], {'count': 1, 'time': 1, 'errors': 1}),
+            add_to_dict(rows3[1], {'count': 1, 'time': 1, 'errors': 0}),
         ]
         assert [] == sm.compute_derivative_rows(rows1, metrics, key=key)
-        assert 2 == len(sm.compute_derivative_rows(rows2, metrics, key=key))
-        assert [] == sm.compute_derivative_rows(rows3, metrics, key=key)
-        assert 2 == len(sm.compute_derivative_rows(rows4, metrics, key=key))
+        assert 2 == len(sm.compute_derivative_rows(rows2, metrics, key=key))  # both rows computed
+        assert 1 == len(sm.compute_derivative_rows(rows3, metrics, key=key))  # only 1 row computed
+        assert 2 == len(sm.compute_derivative_rows(rows4, metrics, key=key))  # both rows computed
 
     def test_apply_row_limits(self):
         def assert_any_order(a, b):