diff --git a/docs/backends/app/backend_info_app.py b/docs/backends/app/backend_info_app.py deleted file mode 100644 index f4edd7ec6cdbc..0000000000000 --- a/docs/backends/app/backend_info_app.py +++ /dev/null @@ -1,209 +0,0 @@ -from __future__ import annotations - -import datetime -import tempfile -from pathlib import Path -from typing import Optional - -import pandas as pd -import requests -import sqlglot -import streamlit as st - -import ibis -from ibis import _ - -ONE_HOUR_IN_SECONDS = datetime.timedelta(hours=1).total_seconds() - -st.set_page_config(layout="wide") - -# Track all queries. We display them at the bottom of the page. -ibis.options.verbose = True -sql_queries = [] -ibis.options.verbose_log = lambda sql: sql_queries.append(sql) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def support_matrix_df(): - resp = requests.get("https://ibis-project.org/backends/raw_support_matrix.csv") - resp.raise_for_status() - - with tempfile.NamedTemporaryFile() as f: - f.write(resp.content) - return ( - ibis.read_csv(f.name) - .relabel({"FullOperation": "full_operation"}) - .mutate( - short_operation=_.full_operation.split(".")[-1], - operation_category=_.full_operation.split(".")[-2], - ) - .to_pandas() - ) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def backends_info_df(): - return pd.DataFrame( - { - "bigquery": ["string", "sql"], - "clickhouse": ["string", "sql"], - "dask": ["dataframe"], - "datafusion": ["sql"], - "druid": ["sqlalchemy", "sql"], - "duckdb": ["sqlalchemy", "sql"], - "exasol": ["sqlalchemy", "sql"], - "flink": ["string", "sql"], - "impala": ["string", "sql"], - "mssql": ["sqlalchemy", "sql"], - "mysql": ["sqlalchemy", "sql"], - "oracle": ["sqlalchemy", "sql"], - "pandas": ["dataframe"], - "polars": ["dataframe"], - "postgres": ["sqlalchemy", "sql"], - "pyspark": ["dataframe"], - "snowflake": ["sqlalchemy", "sql"], - "sqlite": ["sqlalchemy", "sql"], - "trino": ["sqlalchemy", "sql"], - }.items(), - columns=["backend_name", "categories"], - ) - - -backend_info_table = ibis.memtable(backends_info_df()) -support_matrix_table = ibis.memtable(support_matrix_df()) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def get_all_backend_categories(): - return ( - backend_info_table.select(category=_.categories.unnest()) - .distinct() - .order_by("category")["category"] - .to_pandas() - .tolist() - ) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def get_all_operation_categories(): - return ( - support_matrix_table.select(_.operation_category) - .distinct()["operation_category"] - .to_pandas() - .tolist() - ) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def get_backend_names(categories: Optional[list[str]] = None): - backend_expr = backend_info_table.mutate(category=_.categories.unnest()) - if categories: - backend_expr = backend_expr.filter(_.category.isin(categories)) - return ( - backend_expr.select(_.backend_name).distinct().backend_name.to_pandas().tolist() - ) - - -def get_selected_backend_name(): - backend_categories = get_all_backend_categories() - selected_categories_names = st.sidebar.multiselect( - "Backend category", - options=backend_categories, - default=backend_categories, - ) - return get_backend_names(selected_categories_names) - - -def get_backend_subset(subset): - return st.sidebar.multiselect("Backend name", options=subset, default=subset) - - -def get_selected_operation_categories(): - all_ops_categories = get_all_operation_categories() - - selected_ops_categories = st.sidebar.multiselect( - "Operation category", - options=sorted(all_ops_categories), - default=None, - ) - if not selected_ops_categories: - selected_ops_categories = all_ops_categories - show_geospatial = st.sidebar.checkbox("Include Geospatial ops", value=True) - if not show_geospatial and "geospatial" in selected_ops_categories: - selected_ops_categories.remove("geospatial") - return selected_ops_categories - - -current_backend_names = get_backend_subset(get_selected_backend_name()) -sort_by_coverage = st.sidebar.checkbox("Sort by API Coverage", value=False) -current_ops_categories = get_selected_operation_categories() - -hide_supported_by_all_backends = st.sidebar.selectbox( - "Operation compatibility", - ["Show all", "Show supported by all backends", "Hide supported by all backends"], - 0, -) -show_full_ops_name = st.sidebar.checkbox("Show full operation name", False) - -# Start ibis expression -table_expr = support_matrix_table - -# Add index to result -if show_full_ops_name: - table_expr = table_expr.mutate(index=_.full_operation) -else: - table_expr = table_expr.mutate(index=_.short_operation) -table_expr = table_expr.order_by(_.index) - -# Filter operations by selected categories -table_expr = table_expr.filter(_.operation_category.isin(current_ops_categories)) - -# Filter operation by compatibility -supported_backend_count = sum( - getattr(table_expr, backend_name).ifelse(1, 0) - for backend_name in current_backend_names -) -if hide_supported_by_all_backends == "Show supported by all backends": - table_expr = table_expr.filter( - supported_backend_count == len(current_backend_names) - ) -elif hide_supported_by_all_backends == "Hide supported by all backends": - table_expr = table_expr.filter( - supported_backend_count != len(current_backend_names) - ) - -# Show only selected backend -table_expr = table_expr[current_backend_names + ["index"]] - -# Execute query -df = table_expr.to_pandas() -df = df.set_index("index") - -# Display result -all_visible_ops_count = len(df.index) -if all_visible_ops_count: - # Compute coverage - coverage = ( - df.sum() - .sort_values(ascending=False) - .map(lambda n: f"{n} ({round(100 * n / all_visible_ops_count)}%)") - .to_frame(name="API Coverage") - .T - ) - - table = pd.concat([coverage, df.replace({True: "✔", False: "🚫"})]).loc[ - :, slice(None) if sort_by_coverage else sorted(df.columns) - ] - st.dataframe(table) -else: - st.write("No data") - -with st.expander("SQL queries"): - for sql_query in sql_queries: - pretty_sql_query = sqlglot.transpile( - sql_query, read="duckdb", write="duckdb", pretty=True - )[0] - st.code(pretty_sql_query, language="sql") - -with st.expander("Source code"): - st.code(Path(__file__).read_text()) diff --git a/docs/backends/app/requirements.txt b/docs/backends/app/requirements.txt deleted file mode 100644 index fea8a289cc9c0..0000000000000 --- a/docs/backends/app/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -ibis-framework[duckdb]>=4.0 -pandas -requests -streamlit -sqlglot diff --git a/docs/support_matrix.qmd b/docs/support_matrix.qmd index ad0444c7c6fdd..d805ad99274db 100644 --- a/docs/support_matrix.qmd +++ b/docs/support_matrix.qmd @@ -1,18 +1,46 @@ --- +title: "Operation support matrix" +format: dashboard hide: - toc --- -# Operation support matrix +```{python} +#| echo: false +!python ../gen_matrix.py +``` -We provide Ibis's operation support matrix as -a [Streamlit](https://streamlit.io/) app that shows supported operations for -each backend. Ibis defines a common API for analytics and data transformation -code that is transpiled to native code for each backend. Due to differences in -SQL dialects and upstream support for different operations in different -backends, support for the full breadth of the Ibis API varies. +```{python} +#| echo: false +import pandas as pd + +support_matrix = pd.read_csv("./backends/raw_support_matrix.csv") +support_matrix = support_matrix.assign( + Category=support_matrix.Operation.map(lambda op: op.rsplit(".", 1)[0].rsplit(".", 1)[-1]), + Operation=support_matrix.Operation.map(lambda op: op.rsplit(".", 1)[-1]), +).set_index(["Category", "Operation"]) +all_visible_ops_count = len(support_matrix) +coverage = pd.Index( + support_matrix.sum() + .map(lambda n: f"{n} ({round(100 * n / all_visible_ops_count)}%)") + .T +) +support_matrix.columns = pd.MultiIndex.from_tuples( + list(zip(support_matrix.columns, coverage)), names=("Backend", "API coverage") +) +support_matrix = support_matrix.replace({True: "✔", False: "🚫"}) +``` + +## {height=25%} + +::: {.card title="Welcome to the operation support matrix!"} -You can use this page to see which operations are supported on each backend. +This is a [Quarto dashboard](https://quarto.org/docs/dashboards/) that shows +the operations each backend supports. + +Due to differences in SQL dialects and upstream support for different +operations in different backends, support for the full breadth of the Ibis API +varies. ::: {.callout-tip} Backends with low coverage are good places to start contributing! @@ -20,38 +48,44 @@ Backends with low coverage are good places to start contributing! Each backend implements operations differently, but this is usually very similar to other backends. If you want to start contributing to ibis, it's a good idea to start by adding missing operations to backends that have low operation coverage. ::: -
- -
- -::: {.callout-note} -This app is built using [`streamlit`](https://streamlit.io/). - -You can develop the app locally by editing `docs/backends/app/backend_info_app.py` and opening a PR with your changes. +::: -Test your changes locally by running +### {width=25%} -```sh -$ streamlit run docs/backends/app/backend_info_app.py +```{python} +#| content: valuebox +#| title: "Number of backends" +import ibis + +dict( + value=len(ibis.util.backend_entry_points()), + color="info", + icon="signpost-split-fill", +) ``` -The changes will show up in the dev docs when your PR is merged! -::: - -## Raw Data +### {width=25%} ```{python} -#| echo: false -!python ../gen_matrix.py +#| content: valuebox +#| title: "Number of SQL backends" +import importlib +from ibis.backends.base.sql import BaseSQLBackend + +sql_backends = sum( + issubclass( + importlib.import_module(f"ibis.backends.{entry_point.name}").Backend, + BaseSQLBackend + ) + for entry_point in ibis.util.backend_entry_points() +) +dict(value=sql_backends, color="green", icon="database") ``` -You can also download data from the above tables in [CSV format](./backends/raw_support_matrix.csv). - -The code used to generate the linked CSV file is below. +## {height=70%} ```{python} -#| echo: false -#| output: asis -with open("../gen_matrix.py", mode="r") as f: - print(f"```python\n{f.read()}\n```") +from itables import show + +show(support_matrix, ordering=False, paging=False, buttons=["copy", "excel", "csv"]) ``` diff --git a/docs/theme-dark.scss b/docs/theme-dark.scss index c773fb85b37cd..fa1b97d7158c7 100644 --- a/docs/theme-dark.scss +++ b/docs/theme-dark.scss @@ -1,2 +1,3 @@ /*-- scss:defaults --*/ $code-color: #c2d94c; +$code-bg: #2b2b2b; diff --git a/gen_matrix.py b/gen_matrix.py index b850b8ca037f3..e6c21381dd7e8 100644 --- a/gen_matrix.py +++ b/gen_matrix.py @@ -44,7 +44,7 @@ def main(): with Path(ibis.__file__).parents[1].joinpath( "docs", "backends", "raw_support_matrix.csv" ).open(mode="w") as f: - df.to_csv(f, index_label="FullOperation") + df.to_csv(f, index_label="Operation") if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index d5efac432570a..7ab24a22c5f77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,6 +134,7 @@ sqlalchemy = ">=1.4,<3" altair = { version = ">=5.0.1,<6", python = ">=3.10,<3.13" } distributed = { version = ">=2022.9.1", python = ">=3.10,<3.13" } ipykernel = { version = ">=6.25.1,<7", python = ">=3.10,<3.13" } +itables = { version = ">=1.6.3,<2", python = ">=3.10,<3.13" } nbclient = { version = ">=0.8.0,<1", python = ">=3.10,<3.13" } plotly = { version = ">=5.16.1,<6", python = ">=3.10,<3.13" } plotnine = { version = ">=0.12.2,<1", python = ">=3.10,<3.13" } diff --git a/requirements-dev.txt b/requirements-dev.txt index 7855a13915d5f..ab9141b826d6b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -100,6 +100,7 @@ ipyleaflet==0.18.1 ; python_version >= "3.10" and python_version < "3.13" ipython==8.18.1 ; python_version >= "3.9" and python_version < "4.0" ipytree==0.2.2 ; python_version >= "3.10" and python_version < "3.13" ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "3.13" +itables==1.6.3 ; python_version >= "3.10" and python_version < "3.13" jaraco-classes==3.3.0 ; python_version >= "3.9" and python_version < "4.0" jedi==0.19.1 ; python_version >= "3.9" and python_version < "4.0" jeepney==0.8.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "linux"