Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.1.3 updates #19

Merged
merged 29 commits into from
Aug 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f76eba6
version bump; add helper for generating random dataframe
shouples Aug 4, 2022
729933f
rename "default" display mode to "plain"
shouples Aug 4, 2022
a578b56
add More Info user help section after truncating dataframe
shouples Aug 4, 2022
1e505ec
ensure temporary dx.display calls revert properly
shouples Aug 4, 2022
6a90ee0
flake8/black configs
shouples Aug 4, 2022
0d3623e
handle custom indexes
shouples Aug 4, 2022
dd0d0ce
update changelog
shouples Aug 4, 2022
81ebd77
add is_default_index check
shouples Aug 4, 2022
0db445d
update changelog
shouples Aug 4, 2022
f54a525
rename and test default indexing
shouples Aug 5, 2022
91a1d44
update fixture to more accurately test large dataframe truncating
shouples Aug 5, 2022
c6c2189
minor tweaks until we can replace this
shouples Aug 5, 2022
11fefca
don't require display_id for format_*
shouples Aug 5, 2022
5ffc881
use generic DisplayFormatter instead of None
shouples Aug 5, 2022
213d9e1
add str methods
shouples Aug 5, 2022
ed70ace
newline
shouples Aug 5, 2022
516ca87
updates for testing
shouples Aug 5, 2022
8b7fb95
minor warning update
shouples Aug 5, 2022
a54ccf1
test custom/multiindex formatting
shouples Aug 5, 2022
10d427d
add display mode tests
shouples Aug 5, 2022
33bf6d8
updates for testing
shouples Aug 5, 2022
aeae89d
this isn't necessary
shouples Aug 5, 2022
338dd51
fix default index check; add geopandas jsonifying
shouples Aug 5, 2022
5d678f3
add geopandas soft dependency
shouples Aug 5, 2022
d0f7364
changelog update
shouples Aug 5, 2022
041d6c1
clean up index/column cleaning for easier reading/testing
shouples Aug 5, 2022
19ba1fc
booo empty strings
shouples Aug 5, 2022
9a02553
more commentary in this column helper
shouples Aug 5, 2022
407b013
changelog
shouples Aug 5, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
All notable changes will be documented here.

---
## `1.1.3`
_2022-08-05_
### Added
- Helper text for adjusting settings after a dataset has been truncated
- `.reset_index()` when a custom/MultiIndex is used to preserve `0-n` row numbers at the `index` level
- Support for geopandas GeoDataFrames and GeoSeries objects
- `dx.random_dataframe(num_rows, num_columns)` convenience function
- More tests!

### Changed
- `default` display mode (vanilla pandas) renamed to `plain`

### Fixed
- Properly pass `metadata` through during the `IPython.display` calls to render dataresource/dx formatted data
- `plain` display format tests use a basic IPython `DisplayFormatter` instead of `None`

<img width="568" alt="image" src="https://user-images.githubusercontent.com/7707189/182971951-52b440ae-f894-4eb0-8941-3cadd78aef0a.png">


## `1.1.1`-`1.1.2`
_2022-07-22_
### Added
Expand Down
3 changes: 2 additions & 1 deletion dx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .config import *
from .dx import *
from .formatters import *
from .helpers import *
from .settings import *

__version__ = "1.1.2"
__version__ = "1.1.3"

set_display_mode("simple")
12 changes: 11 additions & 1 deletion dx/config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
import os

from IPython import get_ipython
from IPython.core.formatters import DisplayFormatter

IN_IPYTHON_ENV = get_ipython() is not None
DEFAULT_IPYTHON_DISPLAY_FORMATTER = None

DEFAULT_IPYTHON_DISPLAY_FORMATTER = DisplayFormatter()
if IN_IPYTHON_ENV:
DEFAULT_IPYTHON_DISPLAY_FORMATTER = get_ipython().display_formatter

# we don't want to require geopandas as a hard dependency
try:
import geopandas as gpd

GEOPANDAS_INSTALLED = True
except ImportError:
GEOPANDAS_INSTALLED = False


def in_noteable_env() -> bool:
"""
Expand Down
10 changes: 6 additions & 4 deletions dx/dx.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pathlib
from typing import List, Union
from typing import List, Optional, Union

import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display as ipydisplay

from dx.settings import set_display_mode, settings
Expand All @@ -11,6 +12,7 @@
def display(
data: Union[List[dict], pd.DataFrame, Union[pathlib.Path, str]],
mode: DXDisplayMode = DXDisplayMode.simple,
ipython_shell: Optional[InteractiveShell] = None,
) -> None:
"""
Display a single object with the DX display format.
Expand All @@ -28,10 +30,10 @@ def display(

df = pd.DataFrame(data)

orig_mode = settings.DISPLAY_MODE
set_display_mode(mode)
orig_mode = settings.DISPLAY_MODE.value
set_display_mode(mode, ipython_shell=ipython_shell)
ipydisplay(df)
set_display_mode(orig_mode)
set_display_mode(orig_mode, ipython_shell=ipython_shell)
return


Expand Down
50 changes: 39 additions & 11 deletions dx/formatters/callouts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import enum
import uuid
from typing import Optional

from IPython.display import HTML, display
from pydantic import BaseModel
Expand All @@ -10,27 +12,53 @@ class CalloutLevel(enum.Enum):
info = "info"
warning = "warning"
danger = "danger"
success = "success"


class CalloutIcon(enum.Enum):
info = "info"
warning = "warning"
success = "success"


class Callout(BaseModel):
message: str
icon: Optional[CalloutIcon] = None
level: CalloutLevel = CalloutLevel.info
message: str
use_header: bool = True

@property
def html(self):
heading_html = f"<h6 class='bp3-heading'>{self.level.value.title()}</h6>"
callout_classes = " ".join(
[
"bp3-callout",
f"bp3-intent-{self.level.value}",
]
)
return f"<div class='{callout_classes}'>{heading_html}{self.message}</div>"
callout_classes = [
"bp3-callout",
f"bp3-intent-{self.level.value}",
]
if self.icon is not None:
callout_classes.append(f"bp3-icon-{self.icon.value}-sign")
callout_class_str = " ".join(callout_classes)

if self.use_header:
heading_html = f"<h6 class='bp3-heading'>{self.level.value.title()}</h6>"
return f"""<div class="{callout_class_str}" style="margin-bottom: 0.5rem">{heading_html}{self.message}</div>"""

return f"""<div class="{callout_class_str}" style="margin-bottom: 0.5rem">{self.message}</div>"""


def display_callout(
message: str,
level: CalloutLevel = CalloutLevel.info,
header: bool = True,
icon: Optional[CalloutIcon] = None,
display_id: str = None,
) -> None:
callout = Callout(message=message, level=level)
display(HTML(callout.html))
callout = Callout(
message=message,
level=level,
use_header=header,
icon=icon,
)
display_id = display_id or str(uuid.uuid4())

# TODO: coordinate with frontend to replace this with a standalone media type
# instead of rendering HTML with custom classes/styles
display(HTML(callout.html), display_id=display_id)
57 changes: 17 additions & 40 deletions dx/formatters/dataresource.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@
from pydantic import BaseSettings, Field

from dx.config import DEFAULT_IPYTHON_DISPLAY_FORMATTER, IN_IPYTHON_ENV
from dx.formatters.utils import (
stringify_columns,
stringify_indices,
truncate_and_describe,
)
from dx.formatters.utils import normalize_index_and_columns, truncate_and_describe
from dx.settings import settings


Expand All @@ -25,9 +21,7 @@ class DataResourceSettings(BaseSettings):
DATARESOURCE_DISPLAY_MAX_ROWS: int = 100_000
DATARESOURCE_DISPLAY_MAX_COLUMNS: int = 50
DATARESOURCE_HTML_TABLE_SCHEMA: bool = Field(True, allow_mutation=False)
DATARESOURCE_MEDIA_TYPE: str = Field(
"application/vnd.dataresource+json", allow_mutation=False
)
DATARESOURCE_MEDIA_TYPE: str = Field("application/vnd.dataresource+json", allow_mutation=False)
DATARESOURCE_RENDERABLE_OBJECTS: List[type] = [pd.DataFrame, np.ndarray]

class Config:
Expand All @@ -48,35 +42,22 @@ def format(self, obj, **kwargs):
if isinstance(obj, tuple(settings.RENDERABLE_OBJECTS)):
display_id = str(uuid.uuid4())
df_obj = pd.DataFrame(obj)
payload, metadata = _render_dataresource(df_obj, display_id)
payload, metadata = format_dataresource(df_obj, display_id)
# TODO: determine if/how we can pass payload/metadata with
# display_id for the frontend to pick up properly
return ({}, {})

return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs)


def format_dataresource(df: pd.DataFrame, display_id: str) -> tuple:
def generate_dataresource_body(df: pd.DataFrame, display_id: Optional[str] = None) -> tuple:
"""
Transforms the dataframe to a payload dictionary containing the
table schema and column values as arrays.
"""
# temporary workaround for numeric column rendering errors with GRID
# https://noteables.slack.com/archives/C03CB8A4Z2L/p1658497348488939
display_df = df.copy()
display_df = stringify_columns(display_df)

# temporary workaround for numeric MultiIndices
# because of pandas build_table_schema() errors
if isinstance(display_df.index, pd.MultiIndex):
display_df = stringify_indices(display_df)

# build_table_schema() also doesn't like pd.NAs
display_df.fillna(np.nan, inplace=True)

payload_body = {
"schema": build_table_schema(display_df),
"data": display_df.reset_index().to_dict("records"),
"schema": build_table_schema(df),
"data": df.reset_index().to_dict("records"),
"datalink": {},
}
payload = {dataresource_settings.DATARESOURCE_MEDIA_TYPE: payload_body}
Expand All @@ -89,16 +70,18 @@ def format_dataresource(df: pd.DataFrame, display_id: str) -> tuple:
}
metadata = {dataresource_settings.DATARESOURCE_MEDIA_TYPE: metadata_body}

if display_id is not None:
payload_body["datalink"]["display_id"] = display_id
metadata_body["datalink"]["display_id"] = display_id
display_id = display_id or str(uuid.uuid4())
payload_body["datalink"]["display_id"] = display_id
metadata_body["datalink"]["display_id"] = display_id

return (payload, metadata)


def _render_dataresource(df, display_id) -> tuple:
def format_dataresource(df, display_id) -> tuple:
# enable 0-n row counts for frontend
df = normalize_index_and_columns(df)
df, dataframe_info = truncate_and_describe(df)
payload, metadata = format_dataresource(df, display_id)
payload, metadata = generate_dataresource_body(df, display_id)
metadata[dataresource_settings.DATARESOURCE_MEDIA_TYPE]["datalink"][
"dataframe_info"
] = dataframe_info
Expand All @@ -107,7 +90,7 @@ def _render_dataresource(df, display_id) -> tuple:
with pd.option_context(
"html.table_schema", dataresource_settings.DATARESOURCE_HTML_TABLE_SCHEMA
):
ipydisplay(payload, raw=True, display_id=display_id)
ipydisplay(payload, raw=True, metadata=metadata, display_id=display_id)

return (payload, metadata)

Expand All @@ -124,19 +107,13 @@ def deregister(ipython_shell: Optional[InteractiveShell] = None) -> None:
global settings
settings.DISPLAY_MODE = "simple"

settings.DISPLAY_MAX_COLUMNS = (
dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS
)
settings.DISPLAY_MAX_COLUMNS = dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS
settings.DISPLAY_MAX_ROWS = dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS
settings.MEDIA_TYPE = dataresource_settings.DATARESOURCE_MEDIA_TYPE
settings.RENDERABLE_OBJECTS = dataresource_settings.DATARESOURCE_RENDERABLE_OBJECTS

pd.set_option(
"display.max_columns", dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS
)
pd.set_option(
"display.max_rows", dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS
)
pd.set_option("display.max_columns", dataresource_settings.DATARESOURCE_DISPLAY_MAX_COLUMNS)
pd.set_option("display.max_rows", dataresource_settings.DATARESOURCE_DISPLAY_MAX_ROWS)

ipython = ipython_shell or get_ipython()
ipython.display_formatter = DXDataResourceDisplayFormatter()
40 changes: 12 additions & 28 deletions dx/formatters/dx.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@
from pydantic import BaseSettings, Field

from dx.config import DEFAULT_IPYTHON_DISPLAY_FORMATTER, IN_IPYTHON_ENV
from dx.formatters.utils import (
stringify_columns,
stringify_indices,
truncate_and_describe,
)
from dx.formatters.utils import normalize_index_and_columns, truncate_and_describe
from dx.settings import settings


Expand Down Expand Up @@ -46,36 +42,23 @@ def format(self, obj, **kwargs):
if isinstance(obj, tuple(settings.RENDERABLE_OBJECTS)):
display_id = str(uuid.uuid4())
df_obj = pd.DataFrame(obj)
payload, metadata = _render_dx(df_obj, display_id)
payload, metadata = format_dx(df_obj, display_id)
# TODO: determine if/how we can pass payload/metadata with
# display_id for the frontend to pick up properly
return ({}, {})

return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs)


def format_dx(df: pd.DataFrame, display_id: str) -> tuple:
def generate_dx_body(df: pd.DataFrame, display_id: Optional[str] = None) -> tuple:
"""
Transforms the dataframe to a payload dictionary containing the
table schema and column values as arrays.
"""
# temporary workaround for numeric column rendering errors
# https://noteables.slack.com/archives/C03CB8A4Z2L/p1658497348488939
display_df = df.copy()
display_df = stringify_columns(display_df)

# temporary workaround for numeric MultiIndices
# because of pandas build_table_schema() errors
if isinstance(display_df.index, pd.MultiIndex):
display_df = stringify_indices(display_df)

# build_table_schema() also doesn't like pd.NAs
display_df.fillna(np.nan, inplace=True)

# this will include the `df.index` by default (e.g. slicing/sampling)
payload_body = {
"schema": build_table_schema(display_df),
"data": display_df.reset_index().transpose().values.tolist(),
"schema": build_table_schema(df),
"data": df.reset_index().transpose().values.tolist(),
"datalink": {},
}
payload = {dx_settings.DX_MEDIA_TYPE: payload_body}
Expand All @@ -88,21 +71,22 @@ def format_dx(df: pd.DataFrame, display_id: str) -> tuple:
}
metadata = {dx_settings.DX_MEDIA_TYPE: metadata_body}

if display_id is not None:
payload_body["datalink"]["display_id"] = display_id
metadata_body["datalink"]["display_id"] = display_id
display_id = display_id or str(uuid.uuid4())
payload_body["datalink"]["display_id"] = display_id
metadata_body["datalink"]["display_id"] = display_id

return (payload, metadata)


def _render_dx(df, display_id) -> tuple:
def format_dx(df, display_id) -> tuple:
df = normalize_index_and_columns(df)
df, dataframe_info = truncate_and_describe(df)
payload, metadata = format_dx(df, display_id)
payload, metadata = generate_dx_body(df, display_id)
metadata[dx_settings.DX_MEDIA_TYPE]["datalink"]["dataframe_info"] = dataframe_info

# don't pass a dataframe in here, otherwise you'll get recursion errors
with pd.option_context("html.table_schema", dx_settings.DX_HTML_TABLE_SCHEMA):
ipydisplay(payload, raw=True, display_id=display_id)
ipydisplay(payload, raw=True, metadata=metadata, display_id=display_id)

return (payload, metadata)

Expand Down
Loading