From a8ce8352a9020b0ba654bd620bc28b0e9fa535b6 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 19 Aug 2024 11:24:17 +0200 Subject: [PATCH] fix: Fix invalid state due to cached IR (#18262) --- crates/polars-plan/src/plans/conversion/dsl_to_ir.rs | 7 +++++-- crates/polars-plan/src/plans/conversion/stack_opt.rs | 7 +++++++ py-polars/tests/unit/test_schema.py | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs index 8a92d90ffa1c..8dba0f49f3c4 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs @@ -794,8 +794,11 @@ pub fn to_alp_impl( IR::Sink { input, payload } }, DslPlan::IR { node, dsl, version } => { - return if let (true, Some(node)) = (version == lp_arena.version(), node) { - Ok(node) + return if node.is_some() + && version == lp_arena.version() + && convert.used_arenas.insert(version) + { + Ok(node.unwrap()) } else { to_alp_impl(owned(dsl), expr_arena, lp_arena, convert) } diff --git a/crates/polars-plan/src/plans/conversion/stack_opt.rs b/crates/polars-plan/src/plans/conversion/stack_opt.rs index 6e05a872a8cf..8db4e82659d5 100644 --- a/crates/polars-plan/src/plans/conversion/stack_opt.rs +++ b/crates/polars-plan/src/plans/conversion/stack_opt.rs @@ -7,6 +7,12 @@ pub(super) struct ConversionOptimizer { scratch: Vec, simplify: Option, coerce: Option, + // IR's can be cached in the DSL. + // But if they are used multiple times in DSL (e.g. concat/join) + // then it can occur that we take a slot multiple times. + // So we keep track of the arena versions used and allow only + // one unique IR cache to be reused. + pub(super) used_arenas: PlHashSet, } impl ConversionOptimizer { @@ -27,6 +33,7 @@ impl ConversionOptimizer { scratch: Vec::with_capacity(8), simplify, coerce, + used_arenas: Default::default(), } } diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py index 8ccb4497ac0f..bb05752057b1 100644 --- a/py-polars/tests/unit/test_schema.py +++ b/py-polars/tests/unit/test_schema.py @@ -65,3 +65,9 @@ def test_schema_in_map_elements_returns_scalar() -> None: assert (q.collect_schema()) == schema assert q.collect().schema == schema + + +def test_ir_cache_unique_18198() -> None: + lf = pl.LazyFrame({"a": [1]}) + lf.collect_schema() + assert pl.concat([lf, lf]).collect().to_dict(as_series=False) == {"a": [1, 1]}