diff --git a/docs/backends/app/backend_info_app.py b/docs/backends/app/backend_info_app.py deleted file mode 100644 index f4edd7ec6cdbc..0000000000000 --- a/docs/backends/app/backend_info_app.py +++ /dev/null @@ -1,209 +0,0 @@ -from __future__ import annotations - -import datetime -import tempfile -from pathlib import Path -from typing import Optional - -import pandas as pd -import requests -import sqlglot -import streamlit as st - -import ibis -from ibis import _ - -ONE_HOUR_IN_SECONDS = datetime.timedelta(hours=1).total_seconds() - -st.set_page_config(layout="wide") - -# Track all queries. We display them at the bottom of the page. -ibis.options.verbose = True -sql_queries = [] -ibis.options.verbose_log = lambda sql: sql_queries.append(sql) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def support_matrix_df(): - resp = requests.get("https://ibis-project.org/backends/raw_support_matrix.csv") - resp.raise_for_status() - - with tempfile.NamedTemporaryFile() as f: - f.write(resp.content) - return ( - ibis.read_csv(f.name) - .relabel({"FullOperation": "full_operation"}) - .mutate( - short_operation=_.full_operation.split(".")[-1], - operation_category=_.full_operation.split(".")[-2], - ) - .to_pandas() - ) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def backends_info_df(): - return pd.DataFrame( - { - "bigquery": ["string", "sql"], - "clickhouse": ["string", "sql"], - "dask": ["dataframe"], - "datafusion": ["sql"], - "druid": ["sqlalchemy", "sql"], - "duckdb": ["sqlalchemy", "sql"], - "exasol": ["sqlalchemy", "sql"], - "flink": ["string", "sql"], - "impala": ["string", "sql"], - "mssql": ["sqlalchemy", "sql"], - "mysql": ["sqlalchemy", "sql"], - "oracle": ["sqlalchemy", "sql"], - "pandas": ["dataframe"], - "polars": ["dataframe"], - "postgres": ["sqlalchemy", "sql"], - "pyspark": ["dataframe"], - "snowflake": ["sqlalchemy", "sql"], - "sqlite": ["sqlalchemy", "sql"], - "trino": ["sqlalchemy", "sql"], - }.items(), - columns=["backend_name", "categories"], - ) - - -backend_info_table = ibis.memtable(backends_info_df()) -support_matrix_table = ibis.memtable(support_matrix_df()) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def get_all_backend_categories(): - return ( - backend_info_table.select(category=_.categories.unnest()) - .distinct() - .order_by("category")["category"] - .to_pandas() - .tolist() - ) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def get_all_operation_categories(): - return ( - support_matrix_table.select(_.operation_category) - .distinct()["operation_category"] - .to_pandas() - .tolist() - ) - - -@st.cache_data(ttl=ONE_HOUR_IN_SECONDS) -def get_backend_names(categories: Optional[list[str]] = None): - backend_expr = backend_info_table.mutate(category=_.categories.unnest()) - if categories: - backend_expr = backend_expr.filter(_.category.isin(categories)) - return ( - backend_expr.select(_.backend_name).distinct().backend_name.to_pandas().tolist() - ) - - -def get_selected_backend_name(): - backend_categories = get_all_backend_categories() - selected_categories_names = st.sidebar.multiselect( - "Backend category", - options=backend_categories, - default=backend_categories, - ) - return get_backend_names(selected_categories_names) - - -def get_backend_subset(subset): - return st.sidebar.multiselect("Backend name", options=subset, default=subset) - - -def get_selected_operation_categories(): - all_ops_categories = get_all_operation_categories() - - selected_ops_categories = st.sidebar.multiselect( - "Operation category", - options=sorted(all_ops_categories), - default=None, - ) - if not selected_ops_categories: - selected_ops_categories = all_ops_categories - show_geospatial = st.sidebar.checkbox("Include Geospatial ops", value=True) - if not show_geospatial and "geospatial" in selected_ops_categories: - selected_ops_categories.remove("geospatial") - return selected_ops_categories - - -current_backend_names = get_backend_subset(get_selected_backend_name()) -sort_by_coverage = st.sidebar.checkbox("Sort by API Coverage", value=False) -current_ops_categories = get_selected_operation_categories() - -hide_supported_by_all_backends = st.sidebar.selectbox( - "Operation compatibility", - ["Show all", "Show supported by all backends", "Hide supported by all backends"], - 0, -) -show_full_ops_name = st.sidebar.checkbox("Show full operation name", False) - -# Start ibis expression -table_expr = support_matrix_table - -# Add index to result -if show_full_ops_name: - table_expr = table_expr.mutate(index=_.full_operation) -else: - table_expr = table_expr.mutate(index=_.short_operation) -table_expr = table_expr.order_by(_.index) - -# Filter operations by selected categories -table_expr = table_expr.filter(_.operation_category.isin(current_ops_categories)) - -# Filter operation by compatibility -supported_backend_count = sum( - getattr(table_expr, backend_name).ifelse(1, 0) - for backend_name in current_backend_names -) -if hide_supported_by_all_backends == "Show supported by all backends": - table_expr = table_expr.filter( - supported_backend_count == len(current_backend_names) - ) -elif hide_supported_by_all_backends == "Hide supported by all backends": - table_expr = table_expr.filter( - supported_backend_count != len(current_backend_names) - ) - -# Show only selected backend -table_expr = table_expr[current_backend_names + ["index"]] - -# Execute query -df = table_expr.to_pandas() -df = df.set_index("index") - -# Display result -all_visible_ops_count = len(df.index) -if all_visible_ops_count: - # Compute coverage - coverage = ( - df.sum() - .sort_values(ascending=False) - .map(lambda n: f"{n} ({round(100 * n / all_visible_ops_count)}%)") - .to_frame(name="API Coverage") - .T - ) - - table = pd.concat([coverage, df.replace({True: "✔", False: "🚫"})]).loc[ - :, slice(None) if sort_by_coverage else sorted(df.columns) - ] - st.dataframe(table) -else: - st.write("No data") - -with st.expander("SQL queries"): - for sql_query in sql_queries: - pretty_sql_query = sqlglot.transpile( - sql_query, read="duckdb", write="duckdb", pretty=True - )[0] - st.code(pretty_sql_query, language="sql") - -with st.expander("Source code"): - st.code(Path(__file__).read_text()) diff --git a/docs/backends/app/requirements.txt b/docs/backends/app/requirements.txt deleted file mode 100644 index fea8a289cc9c0..0000000000000 --- a/docs/backends/app/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -ibis-framework[duckdb]>=4.0 -pandas -requests -streamlit -sqlglot diff --git a/docs/support_matrix.qmd b/docs/support_matrix.qmd index ad0444c7c6fdd..d805ad99274db 100644 --- a/docs/support_matrix.qmd +++ b/docs/support_matrix.qmd @@ -1,18 +1,46 @@ --- +title: "Operation support matrix" +format: dashboard hide: - toc --- -# Operation support matrix +```{python} +#| echo: false +!python ../gen_matrix.py +``` -We provide Ibis's operation support matrix as -a [Streamlit](https://streamlit.io/) app that shows supported operations for -each backend. Ibis defines a common API for analytics and data transformation -code that is transpiled to native code for each backend. Due to differences in -SQL dialects and upstream support for different operations in different -backends, support for the full breadth of the Ibis API varies. +```{python} +#| echo: false +import pandas as pd + +support_matrix = pd.read_csv("./backends/raw_support_matrix.csv") +support_matrix = support_matrix.assign( + Category=support_matrix.Operation.map(lambda op: op.rsplit(".", 1)[0].rsplit(".", 1)[-1]), + Operation=support_matrix.Operation.map(lambda op: op.rsplit(".", 1)[-1]), +).set_index(["Category", "Operation"]) +all_visible_ops_count = len(support_matrix) +coverage = pd.Index( + support_matrix.sum() + .map(lambda n: f"{n} ({round(100 * n / all_visible_ops_count)}%)") + .T +) +support_matrix.columns = pd.MultiIndex.from_tuples( + list(zip(support_matrix.columns, coverage)), names=("Backend", "API coverage") +) +support_matrix = support_matrix.replace({True: "✔", False: "🚫"}) +``` + +## {height=25%} + +::: {.card title="Welcome to the operation support matrix!"} -You can use this page to see which operations are supported on each backend. +This is a [Quarto dashboard](https://quarto.org/docs/dashboards/) that shows +the operations each backend supports. + +Due to differences in SQL dialects and upstream support for different +operations in different backends, support for the full breadth of the Ibis API +varies. ::: {.callout-tip} Backends with low coverage are good places to start contributing! @@ -20,38 +48,44 @@ Backends with low coverage are good places to start contributing! Each backend implements operations differently, but this is usually very similar to other backends. If you want to start contributing to ibis, it's a good idea to start by adding missing operations to backends that have low operation coverage. ::: -