datachecks · subhankarb · Sep 5, 2024 · Sep 4, 2024
diff --git a/dcs_core/core/common/models/validation.py b/dcs_core/core/common/models/validation.py
@@ -82,6 +82,8 @@ class ValidationFunction(str, Enum):
     PERCENTILE_90 = "percentile_90"
     COUNT_ZERO = "count_zero"
     PERCENT_ZERO = "percent_zero"
+    COUNT_NEGATIVE = "count_negative"
+    PERCENT_NEGATIVE = "percent_negative"
 
     # Reliability validations 3
     COUNT_ROWS = "count_rows"

diff --git a/dcs_core/core/datasource/sql_datasource.py b/dcs_core/core/datasource/sql_datasource.py
@@ -649,3 +649,28 @@ def query_zero_metric(
         else:
             result = self.fetchone(zero_query)[0]
             return result
+
+    def query_negative_metric(
+        self, table: str, field: str, operation: str, filters: str = None
+    ) -> Union[int, float]:
+        qualified_table_name = self.qualified_table_name(table)
+
+        negative_query = (
+            f"SELECT COUNT(*) FROM {qualified_table_name} WHERE {field} < 0"
+        )
+
+        if filters:
+            negative_query += f" AND {filters}"
+
+        total_count_query = f"SELECT COUNT(*) FROM {qualified_table_name}"
+
+        if filters:
+            total_count_query += f" WHERE {filters}"
+
+        if operation == "percent":
+            query = f"SELECT (CAST(({negative_query}) AS float) / CAST(({total_count_query}) AS float)) * 100"
+        else:
+            query = negative_query
+
+        result = self.fetchone(query)[0]
+        return round(result, 2) if operation == "percent" else result
diff --git a/dcs_core/core/validation/manager.py b/dcs_core/core/validation/manager.py
@@ -32,6 +32,7 @@
 )
 from dcs_core.core.validation.numeric_validation import (  # noqa F401 this is used in globals
     AvgValidation,
+    CountNegativeValidation,
     CountZeroValidation,
     MaxValidation,
     MinValidation,
@@ -40,6 +41,7 @@
     Percentile60Validation,
     Percentile80Validation,
     Percentile90Validation,
+    PercentNegativeValidation,
     PercentZeroValidation,
     StdDevValidation,
     SumValidation,
@@ -161,6 +163,8 @@ class ValidationManager:
         ValidationFunction.PERCENTILE_90.value: "Percentile90Validation",
         ValidationFunction.COUNT_ZERO.value: "CountZeroValidation",
         ValidationFunction.PERCENT_ZERO.value: "PercentZeroValidation",
+        ValidationFunction.COUNT_NEGATIVE.value: "CountNegativeValidation",
+        ValidationFunction.PERCENT_NEGATIVE.value: "PercentNegativeValidation",
     }
 
     def __init__(

diff --git a/dcs_core/core/validation/numeric_validation.py b/dcs_core/core/validation/numeric_validation.py
@@ -216,3 +216,31 @@ def _generate_metric_value(self, **kwargs) -> float:
             )
         else:
             raise ValueError("Unsupported data source type for PercentZeroValidation")
+
+
+class CountNegativeValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> int:
+        if isinstance(self.data_source, SQLDataSource):
+            return self.data_source.query_negative_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="count",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+        else:
+            raise ValueError("Unsupported data source type for CountNegativeValidation")
+
+
+class PercentNegativeValidation(Validation):
+    def _generate_metric_value(self, **kwargs) -> float:
+        if isinstance(self.data_source, SQLDataSource):
+            return self.data_source.query_negative_metric(
+                table=self.dataset_name,
+                field=self.field_name,
+                operation="percent",
+                filters=self.where_filter if self.where_filter is not None else None,
+            )
+        else:
+            raise ValueError(
+                "Unsupported data source type for PercentNegativeValidation"
+            )
diff --git a/docs/validations/validity.md b/docs/validations/validity.md
@@ -475,4 +475,33 @@ validations for product_db.products:
   - price zero percent:
       on: percent_zero(price)
       threshold: "< 10"
+
+# **Numeric Negative Value Validations**
+
+The Numeric Negative Value Validations detect negative values in numeric fields within a dataset and ensure that they do not exceed or fall below a specified threshold.
+
+## **COUNT_NEGATIVE**
+
+This validation counts the number of negative values present in a given numeric field.
+
+**Example**
+
+```yaml
+validations for product_db.products:
+  - negative value count should be less than 2:
+      on: count_negative(price)
+      threshold: "< 2"
+```
+
+## **PERCENT_NEGATIVE**
+
+This validation calculates the percentage of negative values in a numeric field, relative to the total number of records.
+
+**Example**
+
+```yaml
+validations for product_db.products:
+  - negative value percentage should be less than 40%:
+      on: percent_negative(price)
+      threshold: "< 40"
 ```
diff --git a/examples/configurations/postgres/example_postgres_config.yaml b/examples/configurations/postgres/example_postgres_config.yaml
@@ -48,6 +48,15 @@ validations for iris_pgsql.dcs_iris:
       threshold: "< 10"
 
 
+  # **Negative Value Validations**
+  - price negative value count:
+      on: count_negative(price)
+      threshold: "< 2"
+
+  - price negative value percentage:
+      on: percent_negative(price)
+      threshold: "< 40"
+
   # Uniqueness Metrics
   - species duplicate count:
       on: count_duplicate(species)

diff --git a/tests/core/configuration/test_configuration_v1.py b/tests/core/configuration/test_configuration_v1.py
@@ -993,3 +993,35 @@ def test_should_parse_percent_zero_validation():
         .threshold.lt
         == 10
     )
+
+
+def test_should_parse_count_negative_validation():
+    yaml_string = """
+    validations for product_db.products:
+      - count_negative for price should be less than 2:
+          on: count_negative(price)
+          threshold: "< 2"
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["product_db.products"]
+        .validations["count_negative for price should be less than 2"]
+        .get_validation_function
+        == ValidationFunction.COUNT_NEGATIVE
+    )
+
+
+def test_should_parse_percent_negative_validation():
+    yaml_string = """
+    validations for product_db.products:
+      - percent_negative for price should be less than 40%:
+          on: percent_negative(price)
+          threshold: "< 40"
+    """
+    configuration = load_configuration_from_yaml_str(yaml_string)
+    assert (
+        configuration.validations["product_db.products"]
+        .validations["percent_negative for price should be less than 40%"]
+        .get_validation_function
+        == ValidationFunction.PERCENT_NEGATIVE
+    )
diff --git a/tests/integration/datasource/test_sql_datasource.py b/tests/integration/datasource/test_sql_datasource.py
@@ -131,7 +131,8 @@ def setup_tables(
                             figi VARCHAR(12),
                             isin VARCHAR(12),
                             perm_id VARCHAR(50),
-                            salary INTEGER
+                            salary INTEGER,
+                            price FLOAT
                         )
                     """
                 )
@@ -143,27 +144,27 @@ def setup_tables(
                 ('thor', '{(utc_now - datetime.timedelta(days=10)).strftime("%Y-%m-%d")}',
                     1500, NULL, 'thor hammer', 'e7194aaa-5516-4362-a5ff-6ff971976bec',
                     '123-456-7890', 'jane.doe@domain', 'C2', 'ABCDE', 40.0678, -7555555554.0060,'856-45-6789','0067340',
-                    'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0), -- invalid email -- invalid usa_state_code  -- invalid usa_zip_code -- invalid cusip -- invalid perm_id
+                    'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0), -- invalid email -- invalid usa_state_code  -- invalid usa_zip_code -- invalid cusip -- invalid perm_id
                 ('captain america', '{(utc_now - datetime.timedelta(days=3)).strftime("%Y-%m-%d")}',
                     90, 80, 'shield', 'e7194aaa-5516-4362-a5ff-6ff971976b', '(123) 456-7890',
                     '[email protected] ', 'NY', '12-345', 34.0522, -118.2437,'000-12-3456', 'B01HL06',
-                    'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn
+                    'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn
                 ('iron man', '{(utc_now - datetime.timedelta(days=4)).strftime("%Y-%m-%d")}',
                     50, 70, 'suit', '1739c676-6108-4dd2-8984-2459df744936', '123 456 7890',
                     '[email protected]', 'XY', '85001', 37.7749, -122.4194,'859-99-9999','4155586',
-                    'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id
+                    'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id
                 ('hawk eye', '{(utc_now - datetime.timedelta(days=5)).strftime("%Y-%m-%d")}',
                     40, 60, 'bow', '1739c676-6108-4dd2-8984-2459df746', '+1 123-456-7890',
                     'user@@example.com', 'TX', '30301', 51.1657, 10.4515,'123-45-67890','12345',
-                    'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol
+                    'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol
                 ('clark kent', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}',
                     35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8', '09123.456.7890',
                     '[email protected]', 'ZZ', '123456', 51.5074, -0.1278,'666-45-6789','34A56B7',
-                    '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id
+                    '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id
                 ('black widow', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}',
                     35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8032c73', '+1 (123) 456-7890',
                     '[email protected]', 'FL', '90210', 483.8566, 2.3522,'001-01-0001','456VGHY',
-                    '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70) -- invalid isin -- invalid sedol
+                    '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0) -- invalid isin -- invalid sedol
             """
 
             postgresql_connection.execute(text(insert_query))
@@ -611,7 +612,7 @@ def test_should_return_90th_percentile_age(
         percentile_90 = postgres_datasource.query_get_percentile(
             table=self.TABLE_NAME, field="age", percentile=0.9
         )
-        assert percentile_90 == 1500
+        assert percentile_90 == 1500  # Expected 90th percentile value.
 
     def test_should_return_count_zero(self, postgres_datasource: PostgresDataSource):
         count_zero = postgres_datasource.query_zero_metric(
@@ -624,3 +625,19 @@ def test_should_return_percent_zero(self, postgres_datasource: PostgresDataSourc
             table=self.TABLE_NAME, field="salary", operation="percent"
         )
         assert round(percent_zero, 2) == 50.0
+
+    def test_should_return_count_negative(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        count_negative = postgres_datasource.query_negative_metric(
+            table=self.TABLE_NAME, field="price", operation="count"
+        )
+        assert count_negative == 3
+
+    def test_should_return_percent_negative(
+        self, postgres_datasource: PostgresDataSource
+    ):
+        percent_negative = postgres_datasource.query_negative_metric(
+            table=self.TABLE_NAME, field="price", operation="percent"
+        )
+        assert round(percent_negative, 2) == 50.0