From 9aa25141071bde3428d8c6e52db11ad53590b499 Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 6 Jul 2024 17:02:10 +0200 Subject: [PATCH] perf: Keep more parallelism when CSE plan cache hits --- .../src/plans/optimizer/cache_states.rs | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/crates/polars-plan/src/plans/optimizer/cache_states.rs b/crates/polars-plan/src/plans/optimizer/cache_states.rs index 53c546613661..9dd51f067935 100644 --- a/crates/polars-plan/src/plans/optimizer/cache_states.rs +++ b/crates/polars-plan/src/plans/optimizer/cache_states.rs @@ -170,17 +170,37 @@ pub(super) fn set_cache_states( match lp { // don't allow parallelism as caches need each others work // also self-referencing plans can deadlock on the files they lock - Join { options, .. } if options.allow_parallel => { - if let Join { options, .. } = lp_arena.get_mut(frame.current) { - let options = Arc::make_mut(options); - options.allow_parallel = false; + Join { + options, + input_left, + input_right, + .. + } if options.allow_parallel => { + let has_cache_in_children = [*input_left, *input_right].iter().any(|node| { + (&*lp_arena) + .iter(*node) + .any(|(_, ir)| matches!(ir, IR::Cache { .. })) + }); + if has_cache_in_children { + if let Join { options, .. } = lp_arena.get_mut(frame.current) { + let options = Arc::make_mut(options); + options.allow_parallel = false; + } } }, // don't allow parallelism as caches need each others work // also self-referencing plans can deadlock on the files they lock - Union { options, .. } if options.parallel => { - if let Union { options, .. } = lp_arena.get_mut(frame.current) { - options.parallel = false; + Union { options, inputs } if options.parallel => { + // Only toggle if children have a cache, otherwise we loose potential parallelism for nothing. + let has_cache_in_children = inputs.iter().any(|node| { + (&*lp_arena) + .iter(*node) + .any(|(_, ir)| matches!(ir, IR::Cache { .. })) + }); + if has_cache_in_children { + if let Union { options, .. } = lp_arena.get_mut(frame.current) { + options.parallel = false; + } } }, Cache { input, id, .. } => {