Skip to content

Commit

Permalink
feat(dashboard): html report framework (#115)
Browse files Browse the repository at this point in the history
* docs: added tracking for docs (#99)

* bump up version to 0.2.2 (#101)

Co-authored-by: Subhankar <[email protected]>

* Update README.md (#102)

* build(deps): bump cryptography from 41.0.3 to 41.0.4 (#103)

Bumps [cryptography](https://github.com/pyca/cryptography) from 41.0.3 to 41.0.4.
- [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst)
- [Commits](pyca/cryptography@41.0.3...41.0.4)

---
updated-dependencies:
- dependency-name: cryptography
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* feat(metrics): implemented sum metric + tests (#109)

* feat(metrics): implemented standard deviation + tests (#108)

* feat(validation): metric validation framework (#107)

* build(deps): bump urllib3 from 1.26.16 to 1.26.17 (#114)

Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.16 to 1.26.17.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](urllib3/urllib3@1.26.16...1.26.17)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* feat(dashboard): html report framework

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: Subhankar Biswas <[email protected]>
Co-authored-by: Subhankar <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Niyas Hameed <[email protected]>
  • Loading branch information
5 people authored Oct 11, 2023
1 parent a908163 commit 71b404a
Show file tree
Hide file tree
Showing 65 changed files with 9,960 additions and 748 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,5 @@ processor/runnotebook.ipynb

# Poetry
#poetry.lock
_internal_*
_internal_*
datachecks_report.html
53 changes: 38 additions & 15 deletions datachecks/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import click
from loguru import logger
from rich import print
from rich.table import Table
from rich.table import Table, Text

from datachecks.__version__ import __version__
from datachecks.core import Configuration, Inspect, load_configuration
Expand Down Expand Up @@ -106,47 +106,70 @@ def inspect(

def _build_metric_cli_table(*, inspect_output: InspectOutput):
table = Table(
title="List of Generated Metrics", show_header=True, header_style="bold blue"
title="List of Generated Metrics",
show_header=True,
header_style="bold blue",
)
table.add_column(
"Metric Name",
style="cyan",
no_wrap=True,
)
table.add_column("Metric Name", style="cyan", no_wrap=True)
table.add_column("Data Source", style="magenta")
table.add_column("Metric Type", style="magenta")
table.add_column("Value", justify="right", style="green")
table.add_column("Valid", justify="right")
table.add_column("Reason", justify="right")

for data_source_name, ds_metrics in inspect_output.metrics.items():
row = None
if isinstance(ds_metrics, DataSourceMetrics):
for tabel_name, table_metrics in ds_metrics.table_metrics.items():
for metric_identifier, metric in table_metrics.metrics.items():
table.add_row(
f"{metric.tags.get('metric_name')}",
f"{data_source_name}",
f"{metric.metric_type}",
f"{metric.value}",
*_build_row(metric),
)
for index_name, index_metrics in ds_metrics.index_metrics.items():
for metric_identifier, metric in index_metrics.metrics.items():
table.add_row(
f"{metric.tags.get('metric_name')}",
f"{data_source_name}",
f"{metric.metric_type}",
f"{metric.value}",
*_build_row(metric),
)
else:
for metric_identifier, metric in ds_metrics.metrics.items():
table.add_row(
f"{metric.tags.get('metric_name')}",
f"",
f"{metric.metric_type}",
f"{metric.value}",
*_build_row(metric),
)

return table


def _build_html_report(*, inspect_output: InspectOutput, report_path: str):
logger.info(inspect_output)
template_params = TemplateParams(
dashboard_id="dcs_dashboard_" + str(uuid.uuid4()).replace("-", ""),
dashboard_info=DashboardInfoBuilder(inspect_output).build(),
)

with open(report_path, "w", encoding="utf-8") as out_file:
out_file.write(html_template(template_params))


def _build_row(metric):
_validity_style = (
"" if metric.is_valid is None else "red" if not metric.is_valid else "green"
)
return (
metric.tags.get("metric_name"),
metric.data_source,
metric.metric_type,
str(metric.value),
Text(
"-"
if metric.is_valid is None
else "Failed"
if not metric.is_valid
else "Passed",
style=_validity_style,
),
"-" if metric.reason is None else metric.reason,
)
2 changes: 2 additions & 0 deletions datachecks/core/common/models/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from datachecks.core.common.models.data_source_resource import Field, Index, Table
from datachecks.core.common.models.metric import MetricsType
from datachecks.core.common.models.validation import Validation


class DataSourceType(str, Enum):
Expand Down Expand Up @@ -85,6 +86,7 @@ class MetricConfiguration:
metric_type: MetricsType
expression: Optional[str] = None
resource: Optional[Union[Table, Index, Field]] = None
validation: Optional[Validation] = None
filters: Optional[MetricsFilterConfiguration] = None

def __post_init__(self):
Expand Down
2 changes: 2 additions & 0 deletions datachecks/core/common/models/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class MetricValue:
table_name: Optional[str] = None
index_name: Optional[str] = None
field_name: Optional[str] = None
is_valid: Optional[bool] = None
reason: Optional[str] = None
tags: Dict[str, str] = None


Expand Down
39 changes: 39 additions & 0 deletions datachecks/core/common/models/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass
from enum import Enum
from typing import Optional


class ConditionType(str, Enum):
GTE = "gte"
LTE = "lte"
GT = "gt"
LT = "lt"
EQ = "eq"


@dataclass
class Threshold:
gte: Optional[float] = None
lte: Optional[float] = None
gt: Optional[float] = None
lt: Optional[float] = None
eq: Optional[float] = None


@dataclass
class Validation:
threshold: Threshold
62 changes: 59 additions & 3 deletions datachecks/core/configuration/configuration_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

from typing import Dict, List, Union

from pyparsing import Forward, Group, Suppress, Word, alphas, delimitedList, nums
from pyparsing import Combine, Group, Literal
from pyparsing import Optional as OptionalParsing
from pyparsing import Word, delimitedList, nums, oneOf

from datachecks.core.common.errors import DataChecksConfigurationError
from datachecks.core.common.models.configuration import (
Expand All @@ -27,8 +29,21 @@
)
from datachecks.core.common.models.data_source_resource import Field, Index, Table
from datachecks.core.common.models.metric import MetricsType
from datachecks.core.common.models.validation import (
ConditionType,
Threshold,
Validation,
)
from datachecks.core.configuration.config_loader import parse_config

CONDITION_TYPE_MAPPING = {
">=": ConditionType.GTE,
"<=": ConditionType.LTE,
"=": ConditionType.EQ,
"<": ConditionType.LT,
">": ConditionType.GT,
}


def parse_data_source_yaml_configurations(
data_source_yaml_configurations: List[dict],
Expand Down Expand Up @@ -74,6 +89,41 @@ def _parse_resource_index(resource_str: str) -> Index:
return Index(data_source=splits[0], name=splits[1])


def _parse_threshold_str(threshold: str) -> Threshold:
try:
operator = oneOf(">= <= = < >").setParseAction(
lambda t: CONDITION_TYPE_MAPPING[t[0]]
)
number = Combine(
OptionalParsing(Literal("-"))
+ Word(nums)
+ OptionalParsing(Literal(".") + Word(nums))
).setParseAction(lambda t: float(t[0]))

condition = operator + number
conditions = delimitedList(
Group(condition) | Group(condition + Literal("&") + condition),
delim="&",
)
result = conditions.parseString(threshold)
return Threshold(**{operator: value for operator, value in result})

except Exception as e:
raise DataChecksConfigurationError(
f"Invalid threshold configuration {threshold}: {str(e)}"
)


def _parse_validation_configuration(validation_config: dict) -> Validation:
if "threshold" in validation_config:
threshold = _parse_threshold_str(threshold=validation_config["threshold"])
return Validation(threshold=threshold)
else:
raise DataChecksConfigurationError(
f"Invalid validation configuration {validation_config}"
)


def _parse_resource_field(resource_str: str, belongs_to: str) -> Field:
splits = resource_str.split(".")
if len(splits) != 3:
Expand Down Expand Up @@ -121,7 +171,6 @@ def parse_metric_configurations(
),
expression=expression_str,
)
metric_configurations[metric_configuration.name] = metric_configuration
else:
resource_str = metric_yaml_configuration["resource"]
data_source_name = resource_str.split(".")[0]
Expand All @@ -145,7 +194,14 @@ def parse_metric_configurations(
metric_configuration.filter = MetricsFilterConfiguration(
where=metric_yaml_configuration["filters"]["where"]
)
metric_configurations[metric_configuration.name] = metric_configuration
if (
"validation" in metric_yaml_configuration
and metric_yaml_configuration["validation"] is not None
):
metric_configuration.validation = _parse_validation_configuration(
metric_yaml_configuration["validation"]
)
metric_configurations[metric_configuration.name] = metric_configuration

return metric_configurations

Expand Down
71 changes: 61 additions & 10 deletions datachecks/core/metric/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
import datetime
import json
from abc import ABC
from typing import Dict, Optional, Union
from typing import Optional, Tuple, Union

from loguru import logger

from datachecks.core.common.errors import DataChecksMetricGenerationError
from datachecks.core.common.models.metric import MetricsType, MetricValue
from datachecks.core.common.models.validation import ConditionType
from datachecks.core.datasource.base import DataSource
from datachecks.core.datasource.search_datasource import SearchIndexDataSource
from datachecks.core.datasource.sql_datasource import SQLDataSource
Expand Down Expand Up @@ -104,6 +104,9 @@ def __init__(
self.filter_query = json.loads(filters.where)
elif isinstance(data_source, SQLDataSource):
self.filter_query = filters.where
self.validation = None
if "validation" in kwargs and kwargs["validation"] is not None:
self.validation = kwargs["validation"]

def get_metric_identity(self):
MetricIdentity.generate_identity(
Expand All @@ -122,15 +125,28 @@ def get_metric_value(self, **kwargs) -> Union[MetricValue, None]:
tags = {
"metric_name": self.name,
}
if self.metric_type.value == MetricsType.COMBINED.value:
value = MetricValue(
identity=self.get_metric_identity(),
metric_type=self.metric_type.value,
value=metric_value,
expression=self.expression,
timestamp=datetime.datetime.utcnow().isoformat(),
tags=tags,
)
else:
value = MetricValue(
identity=self.get_metric_identity(),
metric_type=self.metric_type.value,
value=metric_value,
timestamp=datetime.datetime.utcnow().isoformat(),
data_source=self.data_source.data_source_name,
expression=self.expression,
tags=tags,
)
if self.validation is not None and self.validation.threshold is not None:
value.is_valid, value.reason = self.validate_metric(metric_value)

value = MetricValue(
identity=self.get_metric_identity(),
metric_type=self.metric_type.value,
value=metric_value,
timestamp=datetime.datetime.utcnow().isoformat(),
data_source=self.data_source.data_source_name,
tags=tags,
)
if (
"index_name" in self.__dict__
and self.__dict__["index_name"] is not None
Expand All @@ -153,6 +169,41 @@ def get_metric_value(self, **kwargs) -> Union[MetricValue, None]:
logger.error(f"Failed to generate metric {self.name}: {str(e)}")
return None

def validate_metric(self, metric_value) -> Tuple[bool, Optional[str]]:
for operator, value in self.validation.threshold.__dict__.items():
if value is not None:
if ConditionType.GTE == operator:
if metric_value < value:
return (
False,
f"Less than threshold of {value}",
)
elif ConditionType.LTE == operator:
if metric_value > value:
return (
False,
f"Greater than threshold of {value}",
)
elif ConditionType.GT == operator:
if metric_value <= value:
return (
False,
f"Less than or equal to threshold of {value}",
)
elif ConditionType.LT == operator:
if metric_value >= value:
return (
False,
f"Greater than or equal to threshold of {value}",
)
elif ConditionType.EQ == operator:
if metric_value != value:
return (
False,
f"Not equal to {value}",
)
return True, None


class FieldMetrics(Metric, ABC):
def __init__(
Expand Down
24 changes: 1 addition & 23 deletions datachecks/core/metric/combined_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,26 +97,4 @@ def _generate_metric_value(self, metric_values: List[MetricValue]):
Generate the metric value for this metric
"""
expression_data = self._metric_expression_parser(self.expression, metric_values)
return self._perform_operation(expression_data)

def get_metric_value(
self, metric_values: List[MetricValue]
) -> Union[MetricValue, None]:
try:
metric_value = self._generate_metric_value(metric_values)
tags = {
"metric_name": self.name,
}

value = MetricValue(
identity=self.get_metric_identity(),
metric_type=self.metric_type.value,
value=metric_value,
expression=self.expression,
timestamp=datetime.datetime.utcnow().isoformat(),
tags=tags,
)
return value
except Exception as e:
logger.error(f"Failed to generate metric {self.name}: {str(e)}")
return None
return round(self._perform_operation(expression_data), 2)
Loading

0 comments on commit 71b404a

Please sign in to comment.