From 978dca19ea7e5a4b352196730de4a8a1f0370aa4 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 12 Jan 2024 23:35:01 +0000 Subject: [PATCH 1/4] Start working on version 1.7.0-dev --- docs/changelog.md | 7 +++++++ itables/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 96145cfc..89873b06 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,13 @@ ITables ChangeLog ================= +1.7.0-dev (2024-01-??) +---------------------- + +**Added** +- Added support for Ibis tables (#215) + + 1.6.3 (2023-12-10) ------------------ diff --git a/itables/version.py b/itables/version.py index 6557b260..9d027231 100644 --- a/itables/version.py +++ b/itables/version.py @@ -1,3 +1,3 @@ """ITables' version number""" -__version__ = "1.6.3" +__version__ = "1.7.0-dev" From 7f5da9951869fd8ed00e27273c4df8f0ee9ce7ab Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Fri, 12 Jan 2024 23:48:10 +0000 Subject: [PATCH 2/4] Prepare some tests for Ibis --- .github/workflows/continuous-integration.yml | 6 ++++++ itables/sample_dfs.py | 15 ++++++++++++++- setup.py | 5 ++++- tests/test_ibis.py | 16 ++++++++++++++++ 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 tests/test_ibis.py diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 3d7ae113..32250f07 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -50,6 +50,8 @@ jobs: - python-version: "3.11" pandas-version: pre polars: true + - python-version: "3.11" + ibis: true - python-version: "3.11" uninstall_jinja2: true runs-on: ubuntu-20.04 @@ -85,6 +87,10 @@ jobs: if: matrix.polars run: pip install -e .[polars] + - name: Install Ibis + if: matrix.ibis + run: pip install -e .[ibis] + - name: Uninstall jinja2 if: matrix.uninstall_jinja2 run: pip uninstall jinja2 -y diff --git a/itables/sample_dfs.py b/itables/sample_dfs.py index 2cd0325d..45adffdb 100644 --- a/itables/sample_dfs.py +++ b/itables/sample_dfs.py @@ -105,7 +105,7 @@ def get_df_complex_index(): return df -def get_dict_of_test_dfs(N=100, M=100, polars=False): +def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False): NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M)) test_dfs = { @@ -279,6 +279,19 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False): pass return polars_dfs + if ibis: + import ibis as ib + + con = ib.pandas.connect(test_dfs) + ibis_dfs = {} + for key in test_dfs: + try: + ibis_dfs[key] = con.table(key) + except (TypeError, AttributeError): + pass + + return ibis_dfs + return test_dfs diff --git a/setup.py b/setup.py index 7d54d90e..36000d7c 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,10 @@ }, tests_require=["pytest", "pytz"], install_requires=["IPython", "pandas", "numpy"], - extras_require={"polars": ["polars", "pyarrow"]}, + extras_require={ + "polars": ["polars", "pyarrow"], + "ibis": ["ibis-framework[pandas]"], + }, license="MIT", classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/tests/test_ibis.py b/tests/test_ibis.py new file mode 100644 index 00000000..487cbb59 --- /dev/null +++ b/tests/test_ibis.py @@ -0,0 +1,16 @@ +import pytest + +from itables import to_html_datatable +from itables.sample_dfs import get_dict_of_test_dfs + +try: + import ibis # noqa +except ImportError as e: + pytest.skip(str(e), allow_module_level=True) + + +@pytest.mark.parametrize( + "name,df", [(name, df) for name, df in get_dict_of_test_dfs(ibis=True).items()] +) +def test_show_ibis_df(name, df, use_to_html): + to_html_datatable(df, use_to_html) From a5d12fa0ea15257b78cb1cb15af711138a9d6408 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 13 Jan 2024 21:27:13 +0000 Subject: [PATCH 3/4] Cleanup environment.yml --- environment.yml | 10 +++------- environment2.yml | 17 ----------------- 2 files changed, 3 insertions(+), 24 deletions(-) delete mode 100644 environment2.yml diff --git a/environment.yml b/environment.yml index 6a1061da..7427717f 100644 --- a/environment.yml +++ b/environment.yml @@ -6,21 +6,17 @@ dependencies: - python - jupyter - jupyterlab - - jupytext>=1.13.8 - - markdown-it-py>=2.0 + - jupytext - nbconvert - ipykernel - pandas - polars - pyarrow + - ibis-duckdb - pytest - pytest-xdist - pytest-cov - pre-commit - - pylint - - flake8 - - black - - isort - pip - setuptools - twine @@ -29,4 +25,4 @@ dependencies: - shiny - pip: - world_bank_data - - jupyter_book>=0.12 # jupyter-book-0.12.2-pyhd8ed1ab_0 requires jupytext >=1.11.2,<1.12 + - jupyter_book diff --git a/environment2.yml b/environment2.yml deleted file mode 100644 index 53cb13b2..00000000 --- a/environment2.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: itables2 -channels: - - conda-forge - - defaults -dependencies: - - python<3 - - IPython - - pandas==0.22 - - pytest - - pytest-xdist - - pytest-cov - - pathlib - - functools32 - - pip - - pip: - - world_bank_data - - jupytext From 9d4cc6c539081e56aeea6a58febdbde36cd779b8 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sat, 13 Jan 2024 23:11:23 +0000 Subject: [PATCH 4/4] Quick hack to make some Ibis tables work in show --- docs/changelog.md | 2 +- docs/polars_dataframes.md | 2 +- itables/downsample.py | 45 +++++++++++++++++++++++++++------ itables/javascript.py | 20 ++++++++++----- itables/sample_dfs.py | 53 ++++++++++++++++++++++++++------------- tests/test_ibis.py | 16 ++++++++++-- tests/test_polars.py | 4 +-- 7 files changed, 106 insertions(+), 36 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 89873b06..5a067e8d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -5,7 +5,7 @@ ITables ChangeLog ---------------------- **Added** -- Added support for Ibis tables (#215) +- Added support for Ibis tables ([#215](https://github.com/mwouts/itables/issues/215)) 1.6.3 (2023-12-10) diff --git a/docs/polars_dataframes.md b/docs/polars_dataframes.md index 753d2081..72a2f2f9 100644 --- a/docs/polars_dataframes.md +++ b/docs/polars_dataframes.md @@ -21,7 +21,7 @@ dataframes are displayed nicely with the default `itables` settings. from itables import init_notebook_mode, show from itables.sample_dfs import get_dict_of_test_dfs -dict_of_test_dfs = get_dict_of_test_dfs(polars=True) +dict_of_test_dfs = get_dict_of_test_dfs(type="polars") init_notebook_mode(all_interactive=True) ``` diff --git a/itables/downsample.py b/itables/downsample.py index 1761a90b..231b4033 100644 --- a/itables/downsample.py +++ b/itables/downsample.py @@ -3,6 +3,14 @@ import pandas as pd +try: + from ibis.common.exceptions import ExpressionError +except ImportError: + + class ExpressionError(Exception): + pass + + logging.basicConfig() logger = logging.getLogger(__name__) @@ -11,18 +19,37 @@ def nbytes(df): try: return sum(x.values.nbytes for _, x in df.items()) except AttributeError: - # Polars DataFrame - return df.estimated_size() + try: + # Polars DataFrame + return df.estimated_size() + except AttributeError: + # Ibis Table + # TODO: find a more direct way to estimate the size of the table + nrows = df.count().execute() + if not nrows: + return 0 + return nrows * (nbytes(df.head(5).to_pandas()) / min(nrows, 5)) + + +def nrows(df): + try: + return len(df) + except TypeError: + # Pandas Styler + return len(df.index) + except ExpressionError: + # ibis table + return df.count().execute() def downsample(df, max_rows=0, max_columns=0, max_bytes=0): """Return a subset of the dataframe that fits the limits""" - org_rows, org_columns, org_bytes = len(df), len(df.columns), nbytes(df) + org_rows, org_columns, org_bytes = nrows(df), len(df.columns), nbytes(df) df = _downsample( df, max_rows=max_rows, max_columns=max_columns, max_bytes=max_bytes ) - if len(df) < org_rows or len(df.columns) < org_columns: + if nrows(df) < org_rows or len(df.columns) < org_columns: link = 'downsampled' reasons = [] if org_rows > max_rows > 0: @@ -76,7 +103,7 @@ def shrink_towards_target_aspect_ratio( def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=None): """Implementation of downsample - may be called recursively""" - if len(df) > max_rows > 0: + if nrows(df) > max_rows > 0: second_half = max_rows // 2 first_half = max_rows - second_half if second_half: @@ -134,6 +161,10 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio= import polars as pl # noqa df = pl.DataFrame({df.columns[0]: ["..."]}) - return df - return df + try: + len(df) + return df + except ExpressionError: + # Ibis + return df.to_pandas() diff --git a/itables/javascript.py b/itables/javascript.py index 761783d1..dd9a213f 100644 --- a/itables/javascript.py +++ b/itables/javascript.py @@ -11,6 +11,8 @@ import numpy as np import pandas as pd +from .downsample import nrows + try: import pandas.io.formats.style as pd_style except ImportError: @@ -22,6 +24,12 @@ # Define pl.Series as pd.Series import pandas as pl +try: + import ibis.expr.types.relations as ibis_relations +except ImportError: + ibis_relations = None + + from IPython.display import HTML, Javascript, display import itables.options as opt @@ -102,6 +110,8 @@ def init_notebook_mode( pd_style.Styler._repr_html_ = _datatables_repr_ pl.DataFrame._repr_html_ = _datatables_repr_ pl.Series._repr_html_ = _datatables_repr_ + if ibis_relations is not None: + ibis_relations.Table._repr_html_ = _datatables_repr_ else: pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML if pd_style is not None: @@ -111,6 +121,9 @@ def init_notebook_mode( del pd.Series._repr_html_ if hasattr(pl.Series, "_repr_html_"): del pl.Series._repr_html_ + if ibis_relations is not None: + if hasattr(ibis_relations.Table, "_repr_html_"): + del ibis_relations.Table._repr_html_ if not connected: display(Javascript(read_package_file("external/jquery.min.js"))) @@ -601,12 +614,7 @@ def _min_rows(kwargs): def _df_fits_in_one_page(df, kwargs): """Display just the table (not the search box, etc...) if the rows fit on one 'page'""" - try: - # Pandas DF or Style - return len(df.index) <= _min_rows(kwargs) - except AttributeError: - # Polars - return len(df) <= _min_rows(kwargs) + return nrows(df) <= _min_rows(kwargs) def safe_reset_index(df): diff --git a/itables/sample_dfs.py b/itables/sample_dfs.py index 45adffdb..643df364 100644 --- a/itables/sample_dfs.py +++ b/itables/sample_dfs.py @@ -1,12 +1,7 @@ import math import string from datetime import datetime, timedelta - -try: - from functools import lru_cache -except ImportError: - from functools32 import lru_cache - +from functools import lru_cache from itertools import cycle import numpy as np @@ -105,7 +100,7 @@ def get_df_complex_index(): return df -def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False): +def get_dict_of_test_dfs(N=100, M=100, type="pandas"): NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M)) test_dfs = { @@ -266,8 +261,10 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False): } ), } + if type == "pandas": + return test_dfs - if polars: + if type == "polars": import polars as pl import pyarrow as pa @@ -279,23 +276,42 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False): pass return polars_dfs - if ibis: - import ibis as ib + if type == "ibis_memtable": + import ibis - con = ib.pandas.connect(test_dfs) ibis_dfs = {} - for key in test_dfs: + for key, df in test_dfs.items(): + # Ibis does not support tables with no columns + if not len(df.columns): + continue + try: + ibis_dfs[key] = ibis.memtable(df, name=key) + except (TypeError, ibis.common.exceptions.IbisInputError): + pass + + return ibis_dfs + + if type == "ibis_connect": + import ibis + + con = ibis.pandas.connect(test_dfs) + ibis_dfs = {} + for key, df in test_dfs.items(): + # Ibis does not support tables with no columns + if not len(df.columns): + continue + try: - ibis_dfs[key] = con.table(key) + ibis_dfs[f"{key}_connect"] = con.table(key) except (TypeError, AttributeError): pass return ibis_dfs - return test_dfs + raise NotImplementedError(type) -def get_dict_of_test_series(polars=False): +def get_dict_of_test_series(type="pandas"): series = {} for df_name, df in get_dict_of_test_dfs().items(): if len(df.columns) > 6: @@ -306,7 +322,10 @@ def get_dict_of_test_series(polars=False): continue series["{}.{}".format(df_name, col)] = df[col] - if polars: + if type == "pandas": + return series + + if type == "polars": import polars as pl import pyarrow as pa @@ -325,7 +344,7 @@ def get_dict_of_test_series(polars=False): return polars_series - return series + raise NotImplementedError(type) @lru_cache() diff --git a/tests/test_ibis.py b/tests/test_ibis.py index 487cbb59..e4344d9f 100644 --- a/tests/test_ibis.py +++ b/tests/test_ibis.py @@ -8,9 +8,21 @@ except ImportError as e: pytest.skip(str(e), allow_module_level=True) +# TODO Remove this (and find out how to evaluate count) +ibis.options.interactive = True + + +@pytest.mark.parametrize( + "name,df", + [(name, df) for name, df in get_dict_of_test_dfs(type="ibis_memtable").items()], +) +def test_show_ibis_memtable(name, df, use_to_html): + to_html_datatable(df, use_to_html) + @pytest.mark.parametrize( - "name,df", [(name, df) for name, df in get_dict_of_test_dfs(ibis=True).items()] + "name,df", + [(name, df) for name, df in get_dict_of_test_dfs(type="ibis_connect").items()], ) -def test_show_ibis_df(name, df, use_to_html): +def test_show_ibis_connect(name, df, use_to_html): to_html_datatable(df, use_to_html) diff --git a/tests/test_polars.py b/tests/test_polars.py index 19d40e68..fffc02c8 100644 --- a/tests/test_polars.py +++ b/tests/test_polars.py @@ -10,14 +10,14 @@ @pytest.mark.parametrize( - "name,x", [(name, x) for name, x in get_dict_of_test_series(polars=True).items()] + "name,x", [(name, x) for name, x in get_dict_of_test_series(type="polars").items()] ) def test_show_polars_series(name, x, use_to_html): to_html_datatable(x, use_to_html) @pytest.mark.parametrize( - "name,df", [(name, df) for name, df in get_dict_of_test_dfs(polars=True).items()] + "name,df", [(name, df) for name, df in get_dict_of_test_dfs(type="polars").items()] ) def test_show_polars_df(name, df, use_to_html): to_html_datatable(df, use_to_html)