Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for newer Evidently versions #1780

Merged
merged 8 commits into from
Sep 7, 2023
4 changes: 2 additions & 2 deletions src/zenml/cli/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def list_integrations() -> None:
warning(
"\n" + "To install the dependencies of a specific integration, type: "
)
warning("zenml integration install EXAMPLE_NAME")
warning("zenml integration install INTEGRATION_NAME")


@integration.command(
Expand Down Expand Up @@ -89,7 +89,7 @@ def get_requirements(integration_name: Optional[str] = None) -> None:
"\n" + "To install the dependencies of a "
"specific integration, type: "
)
warning("zenml integration install EXAMPLE_NAME")
warning("zenml integration install INTEGRATION_NAME")


@integration.command(
Expand Down
27 changes: 20 additions & 7 deletions src/zenml/integrations/evidently/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,39 @@
file.
"""

import logging
import os
import warnings
from typing import List, Type

from zenml.enums import StackComponentType
from zenml.integrations.constants import EVIDENTLY
from zenml.integrations.integration import Integration
from zenml.stack import Flavor


# Fix numba errors in Docker and suppress logs and deprecation warning spam
try:
from numba.core.errors import ( # type: ignore[import]
NumbaDeprecationWarning,
NumbaPendingDeprecationWarning,
)

os.environ["NUMBA_CACHE_DIR"] = "/tmp" # nosec
numba_logger = logging.getLogger("numba")
numba_logger.setLevel(logging.WARNING)
warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
warnings.simplefilter("ignore", category=NumbaPendingDeprecationWarning)
except ImportError:
pass

EVIDENTLY_DATA_VALIDATOR_FLAVOR = "evidently"


class EvidentlyIntegration(Integration):
"""[Evidently](https://github.com/evidentlyai/evidently) integration for ZenML."""

NAME = EVIDENTLY
REQUIREMENTS = ["evidently>0.2.6,<=0.2.8"] # supports old API and pyyaml 6

@staticmethod
def activate() -> None:
"""Activate the Evidently integration."""
from zenml.integrations.evidently import materializers # noqa
REQUIREMENTS = ["evidently>0.2.6"] # supports pyyaml 6

@classmethod
def flavors(cls) -> List[Type[Flavor]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,13 @@
Any,
ClassVar,
Dict,
List,
Optional,
Sequence,
Tuple,
Type,
)

import pandas as pd
from evidently.dashboard import Dashboard # type: ignore
from evidently.dashboard.tabs import ( # type: ignore
CatTargetDriftTab,
ClassificationPerformanceTab,
DataDriftTab,
DataQualityTab,
NumTargetDriftTab,
ProbClassificationPerformanceTab,
RegressionPerformanceTab,
)
from evidently.dashboard.tabs.base_tab import Tab # type: ignore
from evidently.model_profile import Profile # type: ignore
from evidently.model_profile.sections import ( # type: ignore
CatTargetDriftProfileSection,
ClassificationPerformanceProfileSection,
DataDriftProfileSection,
DataQualityProfileSection,
NumTargetDriftProfileSection,
ProbClassificationPerformanceProfileSection,
RegressionPerformanceProfileSection,
)
from evidently.model_profile.sections.base_profile_section import ( # type: ignore
ProfileSection,
)
from evidently.pipeline.column_mapping import ColumnMapping # type: ignore
from evidently.report import Report # type: ignore
from evidently.test_suite import TestSuite # type: ignore
Expand All @@ -66,62 +41,6 @@
logger = get_logger(__name__)


profile_mapper = {
"datadrift": DataDriftProfileSection,
"categoricaltargetdrift": CatTargetDriftProfileSection,
"numericaltargetdrift": NumTargetDriftProfileSection,
"dataquality": DataQualityProfileSection,
"classificationmodelperformance": ClassificationPerformanceProfileSection,
"regressionmodelperformance": RegressionPerformanceProfileSection,
"probabilisticmodelperformance": ProbClassificationPerformanceProfileSection,
}

dashboard_mapper = {
"dataquality": DataQualityTab,
"datadrift": DataDriftTab,
"categoricaltargetdrift": CatTargetDriftTab,
"numericaltargetdrift": NumTargetDriftTab,
"classificationmodelperformance": ClassificationPerformanceTab,
"regressionmodelperformance": RegressionPerformanceTab,
"probabilisticmodelperformance": ProbClassificationPerformanceTab,
}


def get_profile_sections_and_tabs(
profile_list: Optional[Sequence[str]],
verbose_level: int = 1,
) -> Tuple[List[ProfileSection], List[Tab]]:
"""Get the profile sections and dashboard tabs for a profile list.

Args:
profile_list: List of identifiers for Evidently profiles.
verbose_level: Verbosity level for the rendered dashboard. Use
0 for a brief dashboard, 1 for a detailed dashboard.

Returns:
A tuple of two lists of profile sections and tabs.

Raises:
ValueError: if the profile_section is not supported.
"""
profile_list = profile_list or list(profile_mapper.keys())
try:
return (
[profile_mapper[profile]() for profile in profile_list],
[
dashboard_mapper[profile](verbose_level=verbose_level)
for profile in profile_list
],
)
except KeyError as e:
nl = "\n"
raise ValueError(
f"Invalid profile sections: {profile_list} \n\n"
f"Valid and supported options are: {nl}- "
f'{f"{nl}- ".join(list(profile_mapper.keys()))}'
) from e


class EvidentlyDataValidator(BaseDataValidator):
"""Evidently data validator stack component."""

Expand Down Expand Up @@ -339,71 +258,3 @@ def data_validation(
)

return test_suite

def legacy_data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
column_mapping: Optional[ColumnMapping] = None,
verbose_level: int = 1,
profile_options: Sequence[Tuple[str, Dict[str, Any]]] = [],
dashboard_options: Sequence[Tuple[str, Dict[str, Any]]] = [],
**kwargs: Any,
) -> Tuple[Profile, Dashboard]:
"""Analyze a dataset and generate a data profile with Evidently.

The method takes in an optional list of Evidently options to be passed
to the profile constructor (`profile_options`) and the dashboard
constructor (`dashboard_options`). Each element in the list must be
composed of two items: the first is a full class path of an Evidently
option `dataclass`, the second is a dictionary of kwargs with the actual
option parameters.

Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of Evidently
data profiles to be generated.
column_mapping: Properties of the DataFrame columns used
verbose_level: Level of verbosity for the Evidently dashboards. Use
0 for a brief dashboard, 1 for a detailed dashboard.
profile_options: Optional list of options to pass to the
profile constructor.
dashboard_options: Optional list of options to pass to the
dashboard constructor.
**kwargs: Extra keyword arguments (unused).

Returns:
The Evidently Profile and Dashboard objects corresponding to the set
of generated profiles.
"""
logger.warning(
"The ZenML Evidently data profile step and data validator "
"methods that are still using Evidently Profile and Dashboard "
"objects are deprecated and will be removed in a future release. "
"Please use the new data report step and data validator methods "
"that make use of the Evidently Report and Test Suite objects "
"instead."
)

sections, tabs = get_profile_sections_and_tabs(
profile_list, verbose_level
)
unpacked_profile_options = self._unpack_options(profile_options)
unpacked_dashboard_options = self._unpack_options(dashboard_options)

dashboard = Dashboard(tabs=tabs, options=unpacked_dashboard_options)
dashboard.calculate(
reference_data=dataset,
current_data=comparison_dataset,
column_mapping=column_mapping,
)
profile = Profile(sections=sections, options=unpacked_profile_options)
profile.calculate(
reference_data=dataset,
current_data=comparison_dataset,
column_mapping=column_mapping,
)
return profile, dashboard
18 changes: 0 additions & 18 deletions src/zenml/integrations/evidently/materializers/__init__.py

This file was deleted.

This file was deleted.

3 changes: 0 additions & 3 deletions src/zenml/integrations/evidently/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,3 @@
from zenml.integrations.evidently.steps.evidently_test import (
evidently_test_step,
)
from zenml.integrations.evidently.steps.evidently_profile import (
evidently_profile_step,
)
Loading
Loading