diff --git a/ibis/selectors.py b/ibis/selectors.py index 2b89e1ffb35b..7978f996a4d6 100644 --- a/ibis/selectors.py +++ b/ibis/selectors.py @@ -362,13 +362,17 @@ def c(*names: str | ir.Column) -> Predicate: """Select specific column names.""" names = frozenset(col if isinstance(col, str) else col.get_name() for col in names) - def func(col: ir.Value) -> bool: - schema = col.op().rel.schema - if extra_cols := (names - schema.keys()): + @functools.cache + def check_delta(schema): + if extra_cols := names - schema._name_locs.keys(): raise exc.IbisInputError( f"Columns {extra_cols} are not present in {schema.names}" ) - return col.get_name() in names + + def func(col: ir.Value) -> bool: + op = col.op() + check_delta(op.rel.schema) + return op.name in names return where(func) diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/tests/benchmarks/test_benchmarks.py index f434ca13f435..4faf306dd0e8 100644 --- a/ibis/tests/benchmarks/test_benchmarks.py +++ b/ibis/tests/benchmarks/test_benchmarks.py @@ -22,9 +22,10 @@ import ibis.expr.datatypes as dt import ibis.expr.operations as ops import ibis.expr.types as ir +import ibis.selectors as s from ibis.backends import _get_backend_names -pytestmark = [pytest.mark.benchmark, pytest.mark.timeout(30)] +pytestmark = [pytest.mark.benchmark] def make_t(): @@ -967,3 +968,11 @@ def test_duckdb_timestamp_conversion(benchmark): con = ibis.duckdb.connect() series = benchmark(con.execute, expr) assert series.size == (stop - start).total_seconds() + + +@pytest.mark.parametrize("cols", [1_000, 10_000]) +def test_selectors(benchmark, cols): + t = ibis.table(name="t", schema={f"col{i}": "int" for i in range(cols)}) + n = cols - cols // 10 + sel = s.across(s.c(*[f"col{i}" for i in range(n)]), lambda c: c.cast("str")) + benchmark(sel.expand, t)