Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refreshing examples and to_html() method #1169

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions plugins/flytekit-whylogs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@ pip install flytekitplugins-whylogs
To generate profiles, you can add a task like the following:

```python
import whylogs as why
from whylogs.core import DatasetProfileView
import whylogs as ylog

import pandas as pd

from flytekit import task

@task
def profile(df: pd.DataFrame) -> DatasetProfileView:
result = ylog.log(df) # Various overloads for different common data types exist
profile = result.view()
result = why.log(df) # Various overloads for different common data types exist
profile_view = result.view()
return profile
```

Expand All @@ -37,21 +39,19 @@ if the data in the workflow doesn't conform to some configured constraints, like
min/max values on features, data types on features, etc.

```python
from whylogs.core.constraints.factories import greater_than_number, mean_between_range

@task
def validate_data(profile: DatasetProfileView):
column = profile.get_column("my_column")
print(column.to_summary_dict()) # To see available things you can validate against
builder = ConstraintsBuilder(profile)
numConstraint = MetricConstraint(
name='numbers between 0 and 4 only',
condition=lambda x: x.min > 0 and x.max < 4,
metric_selector=MetricsSelector(metric_name='distribution', column_name='my_column'))
builder.add_constraint(numConstraint)
def validate_data(profile_view: DatasetProfileView):
builder = ConstraintsBuilder(dataset_profile_view=profile_view)
builder.add_constraint(greater_than_number(column_name="my_column", number=0.14))
builder.add_constraint(mean_between_range(column_name="my_other_column", lower=2, upper=3))
constraint = builder.build()
valid = constraint.validate()

if(not valid):
if valid is False:
print(constraint.report())
raise Exception("Invalid data found")
```

Check out our [constraints notebook](https://github.com/whylabs/whylogs/blob/1.0.x/python/examples/basic/MetricConstraints.ipynb) for more examples.
If you want to learn more about whylogs, check out our [example notebooks](https://github.com/whylabs/whylogs/tree/mainline/python/examples).
6 changes: 3 additions & 3 deletions plugins/flytekit-whylogs/flytekitplugins/whylogs/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Type

from whylogs.core import DatasetProfileView
from whylogs.viz.extensions.reports.profile_summary import ProfileSummaryReport

from flytekit import BlobType, FlyteContext
from flytekit.extend import T, TypeEngine, TypeTransformer
Expand Down Expand Up @@ -42,9 +43,8 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type:
def to_html(
self, ctx: FlyteContext, python_val: DatasetProfileView, expected_python_type: Type[DatasetProfileView]
) -> str:
pandas_profile = str(python_val.to_pandas().to_html())
header = str("<h1>Profile View</h1> \n")
return header + pandas_profile
report = ProfileSummaryReport(target_view=python_val)
return report.report().data


TypeEngine.register(WhylogsDatasetProfileTransformer())
58 changes: 49 additions & 9 deletions plugins/flytekit-whylogs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,61 @@
#
-e file:.#egg=flytekitplugins-whylogs
# via -r requirements.in
flake8==4.0.1
appnope==0.1.3
# via ipython
asttokens==2.0.8
# via stack-data
backcall==0.2.0
# via ipython
decorator==5.1.1
# via ipython
executing==1.0.0
# via stack-data
ipython==8.5.0
# via whylogs
mccabe==0.6.1
# via flake8
jedi==0.18.1
# via ipython
matplotlib-inline==0.1.6
# via ipython
numpy==1.23.3
# via scipy
parso==0.8.3
# via jedi
pexpect==4.8.0
# via ipython
pickleshare==0.7.5
# via ipython
prompt-toolkit==3.0.31
# via ipython
protobuf==3.20.1
# via
# flytekitplugins-whylogs
# whylogs
pycodestyle==2.8.0
# via flake8
pyflakes==2.4.0
# via flake8
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
# via stack-data
pybars3==0.9.7
# via whylogs
pygments==2.13.0
# via ipython
pymeta3==0.5.1
# via pybars3
scipy==1.9.1
# via whylogs
six==1.16.0
# via asttokens
stack-data==0.5.0
# via ipython
traitlets==5.4.0
# via
# ipython
# matplotlib-inline
typing-extensions==4.3.0
# via whylogs
whylogs==1.0.6
wcwidth==0.2.5
# via prompt-toolkit
whylogs[viz]==1.1.0
# via flytekitplugins-whylogs
whylogs-sketching==3.4.1.dev2
whylogs-sketching==3.4.1.dev3
# via whylogs
2 changes: 1 addition & 1 deletion plugins/flytekit-whylogs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

microlib_name = f"flytekitplugins-{PLUGIN_NAME}"

plugin_requires = ["protobuf>=3.15,<4.0.0", "whylogs", "whylogs[viz]"]
plugin_requires = ["protobuf>=3.15,<4.0.0", "whylogs[viz]>=1.0.8"]

__version__ = "0.0.0+develop"

Expand Down
35 changes: 21 additions & 14 deletions plugins/flytekit-whylogs/tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
from datetime import datetime
from typing import Type

import pandas as pd
import pytest
import whylogs as why
from flytekitplugins.whylogs.schema import WhylogsDatasetProfileTransformer
from whylogs.core import DatasetProfileView

from flytekit import task, workflow


@pytest.fixture
def input_data():
return pd.DataFrame({"a": [1, 2, 3, 4]})
from flytekit.core.context_manager import FlyteContextManager


@task
def whylogs_profiling(data: pd.DataFrame) -> DatasetProfileView:
result = why.log(pandas=data)
def whylogs_profiling() -> DatasetProfileView:
df = pd.DataFrame({"a": [1, 2, 3, 4]})
result = why.log(pandas=df)
return result.view()


Expand All @@ -25,18 +23,27 @@ def fetch_whylogs_datetime(profile_view: DatasetProfileView) -> datetime:


@workflow
def whylogs_wf(data: pd.DataFrame) -> datetime:
profile_view = whylogs_profiling(data=data)
def whylogs_wf() -> datetime:
profile_view = whylogs_profiling()
return fetch_whylogs_datetime(profile_view=profile_view)


def test_task_returns_whylogs_profile_view(input_data):
actual_profile = whylogs_profiling(data=input_data)
def test_task_returns_whylogs_profile_view() -> None:
actual_profile = whylogs_profiling()
assert actual_profile is not None
assert isinstance(actual_profile, DatasetProfileView)


def test_profile_view_gets_passed_on_tasks(input_data):
result = whylogs_wf(data=input_data)
def test_profile_view_gets_passed_on_tasks() -> None:
result = whylogs_wf()
assert result is not None
assert isinstance(result, datetime)


def test_to_html_method() -> None:
tf = WhylogsDatasetProfileTransformer()
profile_view = whylogs_profiling()
report = tf.to_html(FlyteContextManager.current_context(), profile_view, Type[DatasetProfileView])

assert isinstance(report, str)
assert "Profile Visualizer" in report