Skip to content

Commit

Permalink
refreshing examples and to_html() method
Browse files Browse the repository at this point in the history
details:
- examples on our README were not using the latest whylogs.core.constraints.factories the same way as on flytesnacks.
- WhylogsDatasetProfileTransformer.to_html() now returns a ProfileSummary HTML string

Signed-off-by: murilommen <[email protected]>
  • Loading branch information
murilommen committed Sep 19, 2022
1 parent cfcccb8 commit 0dda2fe
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 41 deletions.
28 changes: 14 additions & 14 deletions plugins/flytekit-whylogs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@ pip install flytekitplugins-whylogs
To generate profiles, you can add a task like the following:

```python
import whylogs as why
from whylogs.core import DatasetProfileView
import whylogs as ylog

import pandas as pd

from flytekit import task

@task
def profile(df: pd.DataFrame) -> DatasetProfileView:
result = ylog.log(df) # Various overloads for different common data types exist
profile = result.view()
result = why.log(df) # Various overloads for different common data types exist
profile_view = result.view()
return profile
```

Expand All @@ -37,21 +39,19 @@ if the data in the workflow doesn't conform to some configured constraints, like
min/max values on features, data types on features, etc.

```python
from whylogs.core.constraints.factories import greater_than_number, mean_between_range

@task
def validate_data(profile: DatasetProfileView):
column = profile.get_column("my_column")
print(column.to_summary_dict()) # To see available things you can validate against
builder = ConstraintsBuilder(profile)
numConstraint = MetricConstraint(
name='numbers between 0 and 4 only',
condition=lambda x: x.min > 0 and x.max < 4,
metric_selector=MetricsSelector(metric_name='distribution', column_name='my_column'))
builder.add_constraint(numConstraint)
def validate_data(profile_view: DatasetProfileView):
builder = ConstraintsBuilder(dataset_profile_view=profile_view)
builder.add_constraint(greater_than_number(column_name="my_column", number=0.14))
builder.add_constraint(mean_between_range(column_name="my_other_column", lower=2, upper=3))
constraint = builder.build()
valid = constraint.validate()

if(not valid):
if valid is False:
print(constraint.report())
raise Exception("Invalid data found")
```

Check out our [constraints notebook](https://github.com/whylabs/whylogs/blob/1.0.x/python/examples/basic/MetricConstraints.ipynb) for more examples.
If you want to learn more about whylogs, check out our [example notebooks](https://github.com/whylabs/whylogs/tree/mainline/python/examples).
6 changes: 3 additions & 3 deletions plugins/flytekit-whylogs/flytekitplugins/whylogs/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Type

from whylogs.core import DatasetProfileView
from whylogs.viz.extensions.reports.profile_summary import ProfileSummaryReport

from flytekit import BlobType, FlyteContext
from flytekit.extend import T, TypeEngine, TypeTransformer
Expand Down Expand Up @@ -42,9 +43,8 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type:
def to_html(
self, ctx: FlyteContext, python_val: DatasetProfileView, expected_python_type: Type[DatasetProfileView]
) -> str:
pandas_profile = str(python_val.to_pandas().to_html())
header = str("<h1>Profile View</h1> \n")
return header + pandas_profile
report = ProfileSummaryReport(target_view=python_val)
return report.report().data


TypeEngine.register(WhylogsDatasetProfileTransformer())
58 changes: 49 additions & 9 deletions plugins/flytekit-whylogs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,61 @@
#
-e file:.#egg=flytekitplugins-whylogs
# via -r requirements.in
flake8==4.0.1
appnope==0.1.3
# via ipython
asttokens==2.0.8
# via stack-data
backcall==0.2.0
# via ipython
decorator==5.1.1
# via ipython
executing==1.0.0
# via stack-data
ipython==8.5.0
# via whylogs
mccabe==0.6.1
# via flake8
jedi==0.18.1
# via ipython
matplotlib-inline==0.1.6
# via ipython
numpy==1.23.3
# via scipy
parso==0.8.3
# via jedi
pexpect==4.8.0
# via ipython
pickleshare==0.7.5
# via ipython
prompt-toolkit==3.0.31
# via ipython
protobuf==3.20.1
# via
# flytekitplugins-whylogs
# whylogs
pycodestyle==2.8.0
# via flake8
pyflakes==2.4.0
# via flake8
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
# via stack-data
pybars3==0.9.7
# via whylogs
pygments==2.13.0
# via ipython
pymeta3==0.5.1
# via pybars3
scipy==1.9.1
# via whylogs
six==1.16.0
# via asttokens
stack-data==0.5.0
# via ipython
traitlets==5.4.0
# via
# ipython
# matplotlib-inline
typing-extensions==4.3.0
# via whylogs
whylogs==1.0.6
wcwidth==0.2.5
# via prompt-toolkit
whylogs[viz]==1.1.0
# via flytekitplugins-whylogs
whylogs-sketching==3.4.1.dev2
whylogs-sketching==3.4.1.dev3
# via whylogs
2 changes: 1 addition & 1 deletion plugins/flytekit-whylogs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

microlib_name = f"flytekitplugins-{PLUGIN_NAME}"

plugin_requires = ["protobuf>=3.15,<4.0.0", "whylogs", "whylogs[viz]"]
plugin_requires = ["protobuf>=3.15,<4.0.0", "whylogs[viz]>=1.0.8"]

__version__ = "0.0.0+develop"

Expand Down
35 changes: 21 additions & 14 deletions plugins/flytekit-whylogs/tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
from datetime import datetime
from typing import Type

import pandas as pd
import pytest
import whylogs as why
from whylogs.core import DatasetProfileView

from flytekitplugins.whylogs.schema import WhylogsDatasetProfileTransformer
from flytekit.core.context_manager import FlyteContextManager
from flytekit import task, workflow


@pytest.fixture
def input_data():
return pd.DataFrame({"a": [1, 2, 3, 4]})


@task
def whylogs_profiling(data: pd.DataFrame) -> DatasetProfileView:
result = why.log(pandas=data)
def whylogs_profiling() -> DatasetProfileView:
df = pd.DataFrame({"a": [1, 2, 3, 4]})
result = why.log(pandas=df)
return result.view()


Expand All @@ -25,18 +23,27 @@ def fetch_whylogs_datetime(profile_view: DatasetProfileView) -> datetime:


@workflow
def whylogs_wf(data: pd.DataFrame) -> datetime:
profile_view = whylogs_profiling(data=data)
def whylogs_wf() -> datetime:
profile_view = whylogs_profiling()
return fetch_whylogs_datetime(profile_view=profile_view)


def test_task_returns_whylogs_profile_view(input_data):
actual_profile = whylogs_profiling(data=input_data)
def test_task_returns_whylogs_profile_view() -> None:
actual_profile = whylogs_profiling()
assert actual_profile is not None
assert isinstance(actual_profile, DatasetProfileView)


def test_profile_view_gets_passed_on_tasks(input_data):
result = whylogs_wf(data=input_data)
def test_profile_view_gets_passed_on_tasks() -> None:
result = whylogs_wf()
assert result is not None
assert isinstance(result, datetime)


def test_to_html_method() -> None:
tf = WhylogsDatasetProfileTransformer()
profile_view = whylogs_profiling()
report = tf.to_html(FlyteContextManager.current_context(), profile_view, Type[DatasetProfileView])

assert isinstance(report, str)
assert "Profile Visualizer" in report

0 comments on commit 0dda2fe

Please sign in to comment.