Skip to content

Commit

Permalink
refreshing examples and to_html() method (#1169)
Browse files Browse the repository at this point in the history
* refreshing examples and to_html() method

details:
- examples on our README were not using the latest whylogs.core.constraints.factories the same way as on flytesnacks.
- WhylogsDatasetProfileTransformer.to_html() now returns a ProfileSummary HTML string

Signed-off-by: murilommen <[email protected]>

* fixing lint on test file

Signed-off-by: murilommen <[email protected]>

Signed-off-by: murilommen <[email protected]>
  • Loading branch information
murilommen authored Sep 22, 2022
1 parent cfcccb8 commit 2ecd653
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 41 deletions.
28 changes: 14 additions & 14 deletions plugins/flytekit-whylogs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@ pip install flytekitplugins-whylogs
To generate profiles, you can add a task like the following:

```python
import whylogs as why
from whylogs.core import DatasetProfileView
import whylogs as ylog

import pandas as pd

from flytekit import task

@task
def profile(df: pd.DataFrame) -> DatasetProfileView:
result = ylog.log(df) # Various overloads for different common data types exist
profile = result.view()
result = why.log(df) # Various overloads for different common data types exist
profile_view = result.view()
return profile
```

Expand All @@ -37,21 +39,19 @@ if the data in the workflow doesn't conform to some configured constraints, like
min/max values on features, data types on features, etc.

```python
from whylogs.core.constraints.factories import greater_than_number, mean_between_range

@task
def validate_data(profile: DatasetProfileView):
column = profile.get_column("my_column")
print(column.to_summary_dict()) # To see available things you can validate against
builder = ConstraintsBuilder(profile)
numConstraint = MetricConstraint(
name='numbers between 0 and 4 only',
condition=lambda x: x.min > 0 and x.max < 4,
metric_selector=MetricsSelector(metric_name='distribution', column_name='my_column'))
builder.add_constraint(numConstraint)
def validate_data(profile_view: DatasetProfileView):
builder = ConstraintsBuilder(dataset_profile_view=profile_view)
builder.add_constraint(greater_than_number(column_name="my_column", number=0.14))
builder.add_constraint(mean_between_range(column_name="my_other_column", lower=2, upper=3))
constraint = builder.build()
valid = constraint.validate()

if(not valid):
if valid is False:
print(constraint.report())
raise Exception("Invalid data found")
```

Check out our [constraints notebook](https://github.com/whylabs/whylogs/blob/1.0.x/python/examples/basic/MetricConstraints.ipynb) for more examples.
If you want to learn more about whylogs, check out our [example notebooks](https://github.com/whylabs/whylogs/tree/mainline/python/examples).
6 changes: 3 additions & 3 deletions plugins/flytekit-whylogs/flytekitplugins/whylogs/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Type

from whylogs.core import DatasetProfileView
from whylogs.viz.extensions.reports.profile_summary import ProfileSummaryReport

from flytekit import BlobType, FlyteContext
from flytekit.extend import T, TypeEngine, TypeTransformer
Expand Down Expand Up @@ -42,9 +43,8 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type:
def to_html(
self, ctx: FlyteContext, python_val: DatasetProfileView, expected_python_type: Type[DatasetProfileView]
) -> str:
pandas_profile = str(python_val.to_pandas().to_html())
header = str("<h1>Profile View</h1> \n")
return header + pandas_profile
report = ProfileSummaryReport(target_view=python_val)
return report.report().data


TypeEngine.register(WhylogsDatasetProfileTransformer())
58 changes: 49 additions & 9 deletions plugins/flytekit-whylogs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,61 @@
#
-e file:.#egg=flytekitplugins-whylogs
# via -r requirements.in
flake8==4.0.1
appnope==0.1.3
# via ipython
asttokens==2.0.8
# via stack-data
backcall==0.2.0
# via ipython
decorator==5.1.1
# via ipython
executing==1.0.0
# via stack-data
ipython==8.5.0
# via whylogs
mccabe==0.6.1
# via flake8
jedi==0.18.1
# via ipython
matplotlib-inline==0.1.6
# via ipython
numpy==1.23.3
# via scipy
parso==0.8.3
# via jedi
pexpect==4.8.0
# via ipython
pickleshare==0.7.5
# via ipython
prompt-toolkit==3.0.31
# via ipython
protobuf==3.20.1
# via
# flytekitplugins-whylogs
# whylogs
pycodestyle==2.8.0
# via flake8
pyflakes==2.4.0
# via flake8
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
# via stack-data
pybars3==0.9.7
# via whylogs
pygments==2.13.0
# via ipython
pymeta3==0.5.1
# via pybars3
scipy==1.9.1
# via whylogs
six==1.16.0
# via asttokens
stack-data==0.5.0
# via ipython
traitlets==5.4.0
# via
# ipython
# matplotlib-inline
typing-extensions==4.3.0
# via whylogs
whylogs==1.0.6
wcwidth==0.2.5
# via prompt-toolkit
whylogs[viz]==1.1.0
# via flytekitplugins-whylogs
whylogs-sketching==3.4.1.dev2
whylogs-sketching==3.4.1.dev3
# via whylogs
2 changes: 1 addition & 1 deletion plugins/flytekit-whylogs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

microlib_name = f"flytekitplugins-{PLUGIN_NAME}"

plugin_requires = ["protobuf>=3.15,<4.0.0", "whylogs", "whylogs[viz]"]
plugin_requires = ["protobuf>=3.15,<4.0.0", "whylogs[viz]>=1.0.8"]

__version__ = "0.0.0+develop"

Expand Down
35 changes: 21 additions & 14 deletions plugins/flytekit-whylogs/tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
from datetime import datetime
from typing import Type

import pandas as pd
import pytest
import whylogs as why
from flytekitplugins.whylogs.schema import WhylogsDatasetProfileTransformer
from whylogs.core import DatasetProfileView

from flytekit import task, workflow


@pytest.fixture
def input_data():
return pd.DataFrame({"a": [1, 2, 3, 4]})
from flytekit.core.context_manager import FlyteContextManager


@task
def whylogs_profiling(data: pd.DataFrame) -> DatasetProfileView:
result = why.log(pandas=data)
def whylogs_profiling() -> DatasetProfileView:
df = pd.DataFrame({"a": [1, 2, 3, 4]})
result = why.log(pandas=df)
return result.view()


Expand All @@ -25,18 +23,27 @@ def fetch_whylogs_datetime(profile_view: DatasetProfileView) -> datetime:


@workflow
def whylogs_wf(data: pd.DataFrame) -> datetime:
profile_view = whylogs_profiling(data=data)
def whylogs_wf() -> datetime:
profile_view = whylogs_profiling()
return fetch_whylogs_datetime(profile_view=profile_view)


def test_task_returns_whylogs_profile_view(input_data):
actual_profile = whylogs_profiling(data=input_data)
def test_task_returns_whylogs_profile_view() -> None:
actual_profile = whylogs_profiling()
assert actual_profile is not None
assert isinstance(actual_profile, DatasetProfileView)


def test_profile_view_gets_passed_on_tasks(input_data):
result = whylogs_wf(data=input_data)
def test_profile_view_gets_passed_on_tasks() -> None:
result = whylogs_wf()
assert result is not None
assert isinstance(result, datetime)


def test_to_html_method() -> None:
tf = WhylogsDatasetProfileTransformer()
profile_view = whylogs_profiling()
report = tf.to_html(FlyteContextManager.current_context(), profile_view, Type[DatasetProfileView])

assert isinstance(report, str)
assert "Profile Visualizer" in report

0 comments on commit 2ecd653

Please sign in to comment.