diff --git a/src/evidently/metrics/data_quality/column_value_list_metric.py b/src/evidently/metrics/data_quality/column_value_list_metric.py index 12770f6715..2022cf500c 100644 --- a/src/evidently/metrics/data_quality/column_value_list_metric.py +++ b/src/evidently/metrics/data_quality/column_value_list_metric.py @@ -1,7 +1,7 @@ from typing import Any -from typing import Dict from typing import List from typing import Optional +from typing import Tuple import pandas as pd @@ -26,8 +26,8 @@ class ValueListStat(MetricResult): number_not_in_list: int share_in_list: float share_not_in_list: float - values_in_list: Dict[Any, int] - values_not_in_list: Dict[Any, int] + values_in_list: List[Tuple[Any, int]] + values_not_in_list: List[Tuple[Any, int]] rows_count: int @@ -85,8 +85,8 @@ def _calculate_stats(values: list, column: pd.Series) -> ValueListStat: number_not_in_list=number_not_in_list, share_in_list=share_in_list, share_not_in_list=share_not_in_list, - values_in_list=values_in_list, - values_not_in_list=values_not_in_list, + values_in_list=[(k, v) for k, v in values_in_list.items()], + values_not_in_list=[(k, v) for k, v in values_not_in_list.items()], rows_count=rows_count, ) @@ -137,7 +137,7 @@ def _get_table_stat(dataset_name: str, stats: ValueListStat) -> BaseWidgetInfo: widget=table_data( title="", column_names=matched_stat_headers, - data=[(k, v) for k, v in stats.values_in_list.items() if v > 0][:10], + data=[(k, v) for k, v in stats.values_in_list if v > 0][:10], ), ), TabData( @@ -145,7 +145,7 @@ def _get_table_stat(dataset_name: str, stats: ValueListStat) -> BaseWidgetInfo: widget=table_data( title="", column_names=matched_stat_headers, - data=[(k, v) for k, v in stats.values_in_list.items() if v <= 0][:10], + data=[(k, v) for k, v in stats.values_in_list if v <= 0][:10], ), ), TabData( @@ -153,7 +153,7 @@ def _get_table_stat(dataset_name: str, stats: ValueListStat) -> BaseWidgetInfo: widget=table_data( title="", column_names=matched_stat_headers, - data=list(stats.values_not_in_list.items())[:10], + data=list(stats.values_not_in_list)[:10], ), ), ] diff --git a/src/evidently/tests/data_quality_tests.py b/src/evidently/tests/data_quality_tests.py index 2e6eaa3b7f..67563471cc 100644 --- a/src/evidently/tests/data_quality_tests.py +++ b/src/evidently/tests/data_quality_tests.py @@ -1482,16 +1482,16 @@ def render_html(self, obj: Union[BaseDataQualityValueListMetricsTest, TestValueL values = metric_result.values curr_df = pd.concat( [ - pd.DataFrame(metric_result.current.values_in_list.items(), columns=["x", "count"]), - pd.DataFrame(metric_result.current.values_not_in_list.items(), columns=["x", "count"]), + pd.DataFrame(metric_result.current.values_in_list, columns=["x", "count"]), + pd.DataFrame(metric_result.current.values_not_in_list, columns=["x", "count"]), ] ) if metric_result.reference is not None: ref_df = pd.concat( [ - pd.DataFrame(metric_result.reference.values_in_list.items(), columns=["x", "count"]), - pd.DataFrame(metric_result.reference.values_in_list.items(), columns=["x", "count"]), + pd.DataFrame(metric_result.reference.values_in_list, columns=["x", "count"]), + pd.DataFrame(metric_result.reference.values_in_list, columns=["x", "count"]), ] ) diff --git a/tests/metrics/data_quality/test_column_value_list_metric.py b/tests/metrics/data_quality/test_column_value_list_metric.py index ad499bcc6a..e037b742ee 100644 --- a/tests/metrics/data_quality/test_column_value_list_metric.py +++ b/tests/metrics/data_quality/test_column_value_list_metric.py @@ -27,8 +27,8 @@ number_not_in_list=0, share_in_list=0, share_not_in_list=0, - values_in_list={}, - values_not_in_list={}, + values_in_list=[], + values_not_in_list=[], rows_count=0, ), reference=None, @@ -46,8 +46,8 @@ number_not_in_list=3, share_in_list=0, share_not_in_list=1, - values_in_list={"test": 0}, - values_not_in_list={}, + values_in_list=[("test", 0)], + values_not_in_list=[], rows_count=3, ), reference=None, @@ -65,8 +65,8 @@ number_not_in_list=1, share_in_list=0.75, share_not_in_list=0.25, - values_in_list={2: 1, 1: 2, 3: 0}, - values_not_in_list={}, + values_in_list=[(1, 2), (2, 1), (3, 0)], + values_not_in_list=[], rows_count=4, ), reference=None, @@ -84,8 +84,8 @@ number_not_in_list=3, share_in_list=0.25, share_not_in_list=0.75, - values_in_list={"d": 1}, - values_not_in_list={"n": 2, "p": 1}, + values_in_list=[("d", 1)], + values_not_in_list=[("n", 2), ("p", 1)], rows_count=4, ), reference=None, @@ -103,8 +103,8 @@ number_not_in_list=2, share_in_list=0.5, share_not_in_list=0.5, - values_in_list={2: 2}, - values_not_in_list={432: 1, 0: 1}, + values_in_list=[(2, 2)], + values_not_in_list=[(0, 1), (432, 1)], rows_count=4, ), reference=None, @@ -122,8 +122,8 @@ number_not_in_list=0, share_in_list=1.0, share_not_in_list=0.0, - values_in_list={"n": 2, "p": 1, "d": 1}, - values_not_in_list={}, + values_in_list=[("n", 2), ("d", 1), ("p", 1)], + values_not_in_list=[], rows_count=4, ), reference=ValueListStat( @@ -131,8 +131,8 @@ number_not_in_list=0, share_in_list=1.0, share_not_in_list=0.0, - values_in_list={"n": 2, "p": 1, "d": 1}, - values_not_in_list={}, + values_in_list=[("n", 2), ("d", 1), ("p", 1)], + values_not_in_list=[], rows_count=4, ), ), @@ -216,8 +216,8 @@ def test_data_quality_value_list_metric_value_errors( "rows_count": 3, "share_in_list": 0.3333333333333333, "share_not_in_list": 0.6666666666666666, - "values_in_list": {"1": 1}, - "values_not_in_list": {"2": 1, "3": 1}, + "values_in_list": [[1, 1]], + "values_not_in_list": [[2, 1], [3, 1]], }, "reference": None, "values": [1], @@ -240,8 +240,8 @@ def test_data_quality_value_list_metric_value_errors( "rows_count": 3, "share_in_list": 0.0, "share_not_in_list": 1.0, - "values_in_list": {"10.0": 0, "20.0": 0, "3.5": 0}, - "values_not_in_list": {"1": 1, "2": 1, "3": 1}, + "values_in_list": [[10.0, 0], [20.0, 0], [3.5, 0]], + "values_not_in_list": [[1, 1], [2, 1], [3, 1]], }, "reference": { "number_in_list": 3, @@ -249,8 +249,8 @@ def test_data_quality_value_list_metric_value_errors( "rows_count": 3, "share_in_list": 1.0, "share_not_in_list": 0.0, - "values_in_list": {"10.0": 1, "20.0": 1, "3.5": 1}, - "values_not_in_list": {}, + "values_in_list": [[10.0, 1], [20.0, 1], [3.5, 1]], + "values_not_in_list": [], }, "values": [10.0, 20.0, 3.5], },