diff --git a/ibis/expr/format.py b/ibis/expr/format.py index 34be2a1d7ea3..973a0fd1d847 100644 --- a/ibis/expr/format.py +++ b/ibis/expr/format.py @@ -169,7 +169,7 @@ def __repr__(self): @public -def pretty(expr: ops.Node | ir.Expr, scope: Optional[dict[str, ir.Expr]] = None): +def pretty(expr: ops.Node | ir.Expr, scope: Optional[dict[str, ir.Expr]] = None) -> str: """Pretty print an expression. Parameters @@ -178,8 +178,11 @@ def pretty(expr: ops.Node | ir.Expr, scope: Optional[dict[str, ir.Expr]] = None) The expression to pretty print. scope A dictionary of expression to name mappings used to intermediate - assignments. If not provided, the names of the expressions will be - generated. + assignments. + If not provided the names of the expressions will either be + - the variable name in the defining scope if + `ibis.options.repr.show_variables` is enabled + - generated names like `r0`, `r1`, etc. otherwise Returns ------- @@ -193,6 +196,9 @@ def pretty(expr: ops.Node | ir.Expr, scope: Optional[dict[str, ir.Expr]] = None) else: raise TypeError(f"Expected an expression or a node, got {type(expr)}") + if scope is None and ibis.options.repr.show_variables: + scope = get_defining_scope(expr) + refs = {} refcnt = itertools.count() variables = {v.op(): k for k, v in (scope or {}).items()} diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index 976cda6faaf8..51beeb2a66ee 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -6,8 +6,11 @@ from typing import TYPE_CHECKING, Any, NoReturn from public import public +from rich.console import Console from rich.jupyter import JupyterMixin +from rich.text import Text +import ibis import ibis.expr.operations as ops from ibis.common.annotations import ValidationError from ibis.common.exceptions import IbisError, TranslationError @@ -15,6 +18,7 @@ from ibis.common.patterns import Coercible, CoercionError from ibis.config import _default_backend from ibis.config import options as opts +from ibis.expr.types.pretty import to_rich from ibis.util import experimental if TYPE_CHECKING: @@ -48,15 +52,68 @@ class Expr(Immutable, Coercible): __slots__ = ("_arg",) _arg: ops.Node - def __rich_console__(self, console, options): - if not opts.interactive: - from rich.text import Text + def _noninteractive_repr(self) -> str: + from ibis.expr.format import pretty + + return pretty(self) + + def _interactive_repr(self) -> str: + console = Console(force_terminal=False) + with console.capture() as capture: + try: + console.print(self) + except TranslationError as e: + lines = [ + "Translation to backend failed", + f"Error message: {e!r}", + "Expression repr follows:", + self._noninteractive_repr(), + ] + return "\n".join(lines) + return capture.get().rstrip() - return console.render(Text(repr(self)), options=options) - return self.__interactive_rich_console__(console, options) + def __repr__(self) -> str: + if ibis.options.interactive: + return self._interactive_repr() + else: + return self._noninteractive_repr() + + def __rich_console__(self, console: Console, options): + if console.is_jupyter: + # Rich infers a console width in jupyter notebooks, but since + # notebooks can use horizontal scroll bars we don't want to apply a + # limit here. Since rich requires an integer for max_width, we + # choose an arbitrarily large integer bound. Note that we need to + # handle this here rather than in `to_rich`, as this setting + # also needs to be forwarded to `console.render`. + options = options.update(max_width=1_000_000) + console_width = None + else: + console_width = options.max_width - def __interactive_rich_console__(self, console, options): - raise NotImplementedError() + try: + if opts.interactive: + rich_object = to_rich(self, console_width=console_width) + else: + rich_object = Text(self._noninteractive_repr()) + except Exception as e: + # In IPython exceptions inside of _repr_mimebundle_ are swallowed to + # allow calling several display functions and choosing to display + # the "best" result based on some priority. + # This behavior, though, means that exceptions that bubble up inside of the interactive repr + # are silently caught. + # + # We can't stop the exception from being swallowed, but we can force + # the display of that exception as we do here. + # + # A _very_ annoying caveat is that this exception is _not_ being + # ` raise`d, it is only being printed to the console. This means + # that you cannot "catch" it. + # + # This restriction is only present in IPython, not in other REPLs. + console.print_exception() + raise e + return console.render(rich_object, options=options) def __init__(self, arg: ops.Node) -> None: object.__setattr__(self, "_arg", arg) @@ -73,32 +130,6 @@ def __coerce__(cls, value): else: raise CoercionError("Unable to coerce value to an expression") - def __repr__(self) -> str: - from ibis.expr.format import get_defining_scope, pretty - - if opts.repr.show_variables: - scope = get_defining_scope(self) - else: - scope = None - - if opts.interactive: - from ibis.expr.types.pretty import simple_console - - with simple_console.capture() as capture: - try: - simple_console.print(self) - except TranslationError as e: - lines = [ - "Translation to backend failed", - f"Error message: {e!r}", - "Expression repr follows:", - pretty(self, scope=scope), - ] - return "\n".join(lines) - return capture.get().rstrip() - else: - return pretty(self, scope=scope) - def __reduce__(self): return (self.__class__, (self._arg,)) diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 6f359fa2c9a1..571c43a92426 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -14,12 +14,14 @@ from ibis.common.grounds import Singleton from ibis.expr.rewrites import rewrite_window_input from ibis.expr.types.core import Expr, _binop, _FixedTextJupyterMixin +from ibis.expr.types.pretty import to_rich from ibis.util import deprecated, warn_deprecated if TYPE_CHECKING: import pandas as pd import polars as pl import pyarrow as pa + import rich.table import ibis.expr.types as ir from ibis.formats.pyarrow import PyArrowData @@ -1178,20 +1180,6 @@ def to_pandas(self, **kwargs) -> pd.Series: @public class Scalar(Value): - def __interactive_rich_console__(self, console, options): - import rich.pretty - - interactive = ibis.options.repr.interactive - return console.render( - rich.pretty.Pretty( - self.execute(), - max_length=interactive.max_length, - max_string=interactive.max_string, - max_depth=interactive.max_depth, - ), - options=options, - ) - def __pyarrow_result__( self, table: pa.Table, data_mapper: type[PyArrowData] | None = None ) -> pa.Scalar: @@ -1307,10 +1295,60 @@ def __getitem__(self, _): def __array__(self, dtype=None): return self.execute().__array__(dtype) - def __interactive_rich_console__(self, console, options): - named = self.name(self.op().name) - projection = named.as_table() - return console.render(projection, options=options) + def preview( + self, + *, + max_rows: int | None = None, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, + console_width: int | float | None = None, + ) -> rich.table.Table: + """Print a subset as a single-column Rich Table. + + This is an explicit version of what you get when you inspect + this object in interactive mode, except with this version you + can pass formatting options. The options are the same as those exposed + in `ibis.options.interactive`. + + Parameters + ---------- + max_rows + Maximum number of rows to display + max_length + Maximum length for pretty-printed arrays and maps. + max_string + Maximum length for pretty-printed strings. + max_depth + Maximum depth for nested data types. + console_width + Width of the console in characters. If not specified, the width + will be inferred from the console. + + Examples + -------- + >>> import ibis + >>> t = ibis.examples.penguins.fetch() + >>> t.island.preview(max_rows=3, max_string=5) # doctest: +SKIP + ┏━━━━━━━━┓ + ┃ island ┃ + ┡━━━━━━━━┩ + │ stri… │ + ├────────┤ + │ Torg… │ + │ Torg… │ + │ Torg… │ + │ … │ + └────────┘ + """ + return to_rich( + self, + max_rows=max_rows, + max_length=max_length, + max_string=max_string, + max_depth=max_depth, + console_width=console_width, + ) def __pyarrow_result__( self, table: pa.Table, data_mapper: type[PyArrowData] | None = None diff --git a/ibis/expr/types/pretty.py b/ibis/expr/types/pretty.py index 31354895a0be..a4b8af54a21b 100644 --- a/ibis/expr/types/pretty.py +++ b/ibis/expr/types/pretty.py @@ -4,52 +4,63 @@ import json from functools import singledispatch from math import isfinite +from typing import TYPE_CHECKING from urllib.parse import urlparse import rich +import rich.table from rich.align import Align -from rich.console import Console +from rich.pretty import Pretty from rich.text import Text import ibis import ibis.expr.datatypes as dt -# A console with all color/markup disabled, used for `__repr__` -simple_console = Console(force_terminal=False) +if TYPE_CHECKING: + from ibis.expr.types import Column, Expr, Scalar, Table -def _format_nested(values): - interactive = ibis.options.repr.interactive +def _format_nested( + values, + *, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, +): return [ - rich.pretty.Pretty( + Pretty( v, - max_length=interactive.max_length, - max_string=interactive.max_string, - max_depth=interactive.max_depth, + max_length=max_length or ibis.options.repr.interactive.max_length, + max_string=max_string or ibis.options.repr.interactive.max_string, + max_depth=max_depth or ibis.options.repr.interactive.max_depth, ) for v in values ] @singledispatch -def format_values(dtype, values): - return _format_nested(values) +def format_values(dtype, values, **fmt_kwargs): + return _format_nested(values, **fmt_kwargs) @format_values.register(dt.Map) -def _(dtype, values): - return _format_nested([None if v is None else dict(v) for v in values]) +def _(dtype, values, **fmt_kwargs): + return _format_nested( + [None if v is None else dict(v) for v in values], **fmt_kwargs + ) @format_values.register(dt.GeoSpatial) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): import shapely - return _format_nested([None if v is None else shapely.from_wkb(v) for v in values]) + return _format_nested( + [None if v is None else shapely.from_wkb(v) for v in values], **fmt_kwargs + ) @format_values.register(dt.JSON) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): def try_json(v): if v is None: return None @@ -58,17 +69,17 @@ def try_json(v): except Exception: # noqa: BLE001 return v - return _format_nested([try_json(v) for v in values]) + return _format_nested([try_json(v) for v in values], **fmt_kwargs) @format_values.register(dt.Boolean) @format_values.register(dt.UUID) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): return [Text(str(v)) for v in values] @format_values.register(dt.Decimal) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): if dtype.scale is not None: fmt = f"{{:.{dtype.scale}f}}" return [Text.styled(fmt.format(v), "bold cyan") for v in values] @@ -78,12 +89,12 @@ def _(dtype, values): @format_values.register(dt.Integer) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): return [Text.styled(str(int(v)), "bold cyan") for v in values] @format_values.register(dt.Floating) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): floats = [float(v) for v in values] # Extract and format all finite floats finites = [f for f in floats if isfinite(f)] @@ -102,7 +113,7 @@ def _(dtype, values): @format_values.register(dt.Timestamp) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): if all(v.microsecond == 0 for v in values): timespec = "seconds" elif all(v.microsecond % 1000 == 0 for v in values): @@ -115,13 +126,13 @@ def _(dtype, values): @format_values.register(dt.Date) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): dates = [v.date() if isinstance(v, datetime.datetime) else v for v in values] return [Text.styled(d.isoformat(), "magenta") for d in dates] @format_values.register(dt.Time) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): times = [v.time() if isinstance(v, datetime.datetime) else v for v in values] if all(t.microsecond == 0 for t in times): timespec = "seconds" @@ -133,7 +144,7 @@ def _(dtype, values): @format_values.register(dt.Interval) -def _(dtype, values): +def _(dtype, values, **fmt_kwargs): return [Text.styled(str(v), "magenta") for v in values] @@ -149,8 +160,8 @@ def _(dtype, values): @format_values.register(dt.String) -def _(dtype, values): - max_string = ibis.options.repr.interactive.max_string +def _(dtype, values, *, max_string: int | None = None, **fmt_kwargs): + max_string = max_string or ibis.options.repr.interactive.max_string out = [] for v in values: v = str(v) @@ -181,7 +192,14 @@ def _(dtype, values): return out -def format_column(dtype, values): +def format_column( + dtype, + values, + *, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, +): import pandas as pd null_str = Text.styled("NULL", style="dim") @@ -199,7 +217,13 @@ def isnull(x): nonnull = [v for v in values if not isnull(v)] if nonnull: - formatted = format_values(dtype, nonnull) + formatted = format_values( + dtype, + nonnull, + max_length=max_length, + max_string=max_string, + max_depth=max_depth, + ) next_f = iter(formatted).__next__ out = [null_str if isnull(v) else next_f() for v in values] else: @@ -219,21 +243,76 @@ def isnull(x): return out, min_width, max_width -def format_dtype(dtype): - max_string = ibis.options.repr.interactive.max_string +def format_dtype(dtype, max_string: int) -> Text: strtyp = str(dtype) if len(strtyp) > max_string: strtyp = strtyp[: max_string - 1] + "…" return Text.styled(strtyp, "dim") -def to_rich_table(table, console_width=None): - if console_width is None: - console_width = float("inf") +def to_rich( + expr: Expr, + *, + max_rows: int | None = None, + max_columns: int | None = None, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, + console_width: int | float | None = None, +) -> Pretty: + """Truncate, evaluate, and render an Ibis expression as a rich object.""" + from ibis.expr.types import Scalar + + if isinstance(expr, Scalar): + return _to_rich_scalar( + expr, max_length=max_length, max_string=max_string, max_depth=max_depth + ) + else: + return _to_rich_table( + expr, + max_rows=max_rows, + max_columns=max_columns, + max_length=max_length, + max_string=max_string, + max_depth=max_depth, + console_width=console_width, + ) + +def _to_rich_scalar( + expr: Scalar, + *, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, +) -> Pretty: + return Pretty( + expr.execute(), + max_length=max_length or ibis.options.repr.interactive.max_length, + max_string=max_string or ibis.options.repr.interactive.max_string, + max_depth=max_depth or ibis.options.repr.interactive.max_depth, + ) + + +def _to_rich_table( + tablish: Table | Column, + *, + max_rows: int | None = None, + max_columns: int | None = None, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, + console_width: int | float | None = None, +) -> rich.table.Table: + max_rows = max_rows or ibis.options.repr.interactive.max_rows + max_columns = max_columns or ibis.options.repr.interactive.max_columns + console_width = console_width or float("inf") + max_string = max_string or ibis.options.repr.interactive.max_string + show_types = ibis.options.repr.interactive.show_types + + table = tablish.as_table() orig_ncols = len(table.columns) - max_columns = ibis.options.repr.interactive.max_columns if console_width == float("inf"): # When there's infinite display space, only subset columns # if an explicit limit has been set. @@ -260,10 +339,7 @@ def to_rich_table(table, console_width=None): if orig_ncols > len(computed_cols): table = table.select(*computed_cols) - # Compute the data and return a pandas dataframe - nrows = ibis.options.repr.interactive.max_rows - result = table.limit(nrows + 1).to_pyarrow() - + result = table.limit(max_rows + 1).to_pyarrow() # Now format the columns in order, stopping if the console width would # be exceeded. col_info = [] @@ -272,12 +348,14 @@ def to_rich_table(table, console_width=None): remaining = console_width - 1 # 1 char for left boundary for name, dtype in table.schema().items(): formatted, min_width, max_width = format_column( - dtype, result[name].to_pylist()[:nrows] + dtype, + result[name].to_pylist()[:max_rows], + max_length=max_length, + max_string=max_string, + max_depth=max_depth, ) - dtype_str = format_dtype(dtype) - if ibis.options.repr.interactive.show_types and not isinstance( - dtype, (dt.Struct, dt.Map, dt.Array) - ): + dtype_str = format_dtype(dtype, max_string) + if show_types and not isinstance(dtype, (dt.Struct, dt.Map, dt.Array)): # Don't truncate non-nested dtypes min_width = max(min_width, len(dtype_str)) @@ -375,7 +453,7 @@ def add_row(*args, **kwargs): else: add_row = rich_table.add_row - if ibis.options.repr.interactive.show_types: + if show_types: add_row( *(Align(s, align="left") for s in formatted_dtypes), end_section=True, @@ -385,7 +463,7 @@ def add_row(*args, **kwargs): add_row(*row) # If the rows are truncated, add a trailing ellipsis row - if len(result) > nrows: + if len(result) > max_rows: rich_table.add_row( *(Align("[dim]…[/]", align=c.justify) for c in rich_table.columns) ) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 3bd2090eb833..a2a96f53bb6e 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -24,6 +24,7 @@ from ibis.common.deferred import Deferred, Resolver from ibis.expr.types.core import Expr, _FixedTextJupyterMixin from ibis.expr.types.generic import ValueExpr, literal +from ibis.expr.types.pretty import to_rich from ibis.selectors import Selector from ibis.util import deprecated @@ -31,6 +32,7 @@ import pandas as pd import polars as pl import pyarrow as pa + from rich.table import Table as RichTable import ibis.expr.types as ir import ibis.selectors as s @@ -497,41 +499,73 @@ def _cast(self, schema: SchemaLike, cast_method: str = "cast") -> Table: cols.append(new_col) return self.select(*cols) - def __interactive_rich_console__(self, console, options): - from ibis.expr.types.pretty import to_rich_table - - if console.is_jupyter: - # Rich infers a console width in jupyter notebooks, but since - # notebooks can use horizontal scroll bars we don't want to apply a - # limit here. Since rich requires an integer for max_width, we - # choose an arbitrarily large integer bound. Note that we need to - # handle this here rather than in `to_rich_table`, as this setting - # also needs to be forwarded to `console.render`. - options = options.update(max_width=1_000_000) - width = None - else: - width = options.max_width + def preview( + self, + *, + max_rows: int | None = None, + max_columns: int | None = None, + max_length: int | None = None, + max_string: int | None = None, + max_depth: int | None = None, + console_width: int | float | None = None, + ) -> RichTable: + """Return a subset as a Rich Table. + + This is an explicit version of what you get when you inspect + this object in interactive mode, except with this version you + can pass formatting options. The options are the same as those exposed + in `ibis.options.interactive`. - try: - table = to_rich_table(self, width) - except Exception as e: - # In IPython exceptions inside of _repr_mimebundle_ are swallowed to - # allow calling several display functions and choosing to display - # the "best" result based on some priority. - # This behavior, though, means that exceptions that bubble up inside of the interactive repr - # are silently caught. - # - # We can't stop the exception from being swallowed, but we can force - # the display of that exception as we do here. - # - # A _very_ annoying caveat is that this exception is _not_ being - # ` raise`d, it is only being printed to the console. This means - # that you cannot "catch" it. - # - # This restriction is only present in IPython, not in other REPLs. - console.print_exception() - raise e - return console.render(table, options=options) + Parameters + ---------- + max_rows + Maximum number of rows to display + max_columns + Maximum number of columns to display + max_length + Maximum length for pretty-printed arrays and maps + max_string + Maximum length for pretty-printed strings + max_depth + Maximum depth for nested data types + console_width + Width of the console in characters. If not specified, the width + will be inferred from the console. + + Examples + -------- + >>> import ibis + >>> t = ibis.examples.penguins.fetch() + + Because the console_width is too small, only 2 columns are shown even though + we specified up to 3. + + >>> t.preview( + ... max_rows=3, + ... max_columns=3, + ... max_string=8, + ... console_width=30, + ... ) # doctest: +SKIP + ┏━━━━━━━━━┳━━━━━━━━━━┳━━━┓ + ┃ species ┃ island ┃ … ┃ + ┡━━━━━━━━━╇━━━━━━━━━━╇━━━┩ + │ string │ string │ … │ + ├─────────┼──────────┼───┤ + │ Adelie │ Torgers… │ … │ + │ Adelie │ Torgers… │ … │ + │ Adelie │ Torgers… │ … │ + │ … │ … │ … │ + └─────────┴──────────┴───┘ + """ + return to_rich( + self, + max_columns=max_columns, + max_rows=max_rows, + max_length=max_length, + max_string=max_string, + max_depth=max_depth, + console_width=console_width, + ) # TODO(kszucs): expose this method in the public API def _get_column(self, name: str | int) -> ir.Column: