diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 308abd1688a6..4e44e47f5968 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -33,16 +33,37 @@ logical-expr:
 
 physical-expr:
 - changed-files:
-  - any-glob-to-any-file: ['datafusion/physical-expr/**/*']
+  - any-glob-to-any-file: ['datafusion/physical-expr/**/*', 'datafusion/physical-expr-common/**/*', 'datafusion/physical-expr-aggregate/**/*', 'datafusion/physical-plan/**/*']
+
+catalog:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/catalog/**/*']
+
+common:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/common/**/*', 'datafusion/common-runtime/**/*']
+
+execution:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/execution/**/*']
+
+functions:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/functions/**/*', 'datafusion/functions-aggregate/**/*', 'datafusion/functions-aggregate-common', 'datafusion/functions-nested']
+
 
 optimizer:
 - changed-files:
-  - any-glob-to-any-file: ['datafusion/optimizer/**/*']
+  - any-glob-to-any-file: ['datafusion/optimizer/**/*', 'datafusion/physical-optimizer/**/*']
 
 core:
 - changed-files:
   - any-glob-to-any-file: ['datafusion/core/**/*']
 
+proto:
+  - changed-files:
+      - any-glob-to-any-file: ['datafusion/proto/**/*', 'datafusion/proto-common/**/*']
+
 substrait:
 - changed-files:
   - any-glob-to-any-file: ['datafusion/substrait/**/*']
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 134cde8976d6..90995c1d116a 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -347,13 +347,14 @@ dependencies = [
 
 [[package]]
 name = "assert_cmd"
-version = "2.0.15"
+version = "2.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37"
+checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d"
 dependencies = [
  "anstyle",
  "bstr",
  "doc-comment",
+ "libc",
  "predicates",
  "predicates-core",
  "predicates-tree",
@@ -386,7 +387,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -874,9 +875,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.8"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
+checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
 dependencies = [
  "jobserver",
  "libc",
@@ -1022,9 +1023,9 @@ dependencies = [
 
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "core2"
@@ -1037,9 +1038,9 @@ dependencies = [
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.12"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad"
 dependencies = [
  "libc",
 ]
@@ -1103,7 +1104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1240,6 +1241,7 @@ dependencies = [
  "num_cpus",
  "object_store",
  "parquet",
+ "paste",
  "sqlparser",
 ]
 
@@ -1762,7 +1764,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2441,9 +2443,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
+checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
 dependencies = [
  "hermit-abi 0.3.9",
  "libc",
@@ -2785,7 +2787,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3386,29 +3388,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.205"
+version = "1.0.207"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
+checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.205"
+version = "1.0.207"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
+checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.122"
+version = "1.0.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
+checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
 dependencies = [
  "itoa",
  "memchr",
@@ -3537,7 +3539,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3583,7 +3585,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3596,7 +3598,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3618,9 +3620,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.72"
+version = "2.0.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3684,7 +3686,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3778,7 +3780,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3875,7 +3877,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3920,7 +3922,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -4074,7 +4076,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
  "wasm-bindgen-shared",
 ]
 
@@ -4108,7 +4110,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4383,7 +4385,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.74",
 ]
 
 [[package]]
diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml
index 2ebca511c5c8..ff28d8e0c64a 100644
--- a/datafusion/catalog/Cargo.toml
+++ b/datafusion/catalog/Cargo.toml
@@ -17,6 +17,7 @@
 
 [package]
 name = "datafusion-catalog"
+description = "datafusion-catalog"
 authors.workspace = true
 edition.workspace = true
 homepage.workspace = true
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 85dfb2e8f73a..8435d0632576 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -60,6 +60,7 @@ libc = "0.2.140"
 num_cpus = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
+paste = "1.0.15"
 pyo3 = { version = "0.21.0", optional = true }
 sqlparser = { workspace = true }
 
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index f62acaf0493b..27a25d0c9dd5 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -481,13 +481,6 @@ macro_rules! unwrap_or_internal_err {
     };
 }
 
-macro_rules! with_dollar_sign {
-    ($($body:tt)*) => {
-        macro_rules! __with_dollar_sign { $($body)* }
-        __with_dollar_sign!($);
-    }
-}
-
 /// Add a macros for concise  DataFusionError::* errors declaration
 /// supports placeholders the same way as `format!`
 /// Examples:
@@ -501,37 +494,41 @@ macro_rules! with_dollar_sign {
 /// `NAME_DF_ERR` -  macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity
 /// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc
 macro_rules! make_error {
-    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => {
-        with_dollar_sign! {
-            ($d:tt) => {
-                /// Macro wraps `$ERR` to add backtrace feature
-                #[macro_export]
-                macro_rules! $NAME_DF_ERR {
-                    ($d($d args:expr),*) => {
-                        $crate::DataFusionError::$ERR(
-                            format!(
-                                "{}{}",
-                                format!($d($d args),*),
-                                $crate::DataFusionError::get_back_trace(),
-                            ).into()
-                        )
-                    }
+    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); };
+    (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => {
+        ::paste::paste!{
+            /// Macro wraps `$ERR` to add backtrace feature
+            #[macro_export]
+            macro_rules! $NAME_DF_ERR {
+                ($d($d args:expr),*) => {
+                    $crate::DataFusionError::$ERR(
+                        ::std::format!(
+                            "{}{}",
+                            ::std::format!($d($d args),*),
+                            $crate::DataFusionError::get_back_trace(),
+                        ).into()
+                    )
                 }
+            }
 
-                /// Macro wraps Err(`$ERR`) to add backtrace feature
-                #[macro_export]
-                macro_rules! $NAME_ERR {
-                    ($d($d args:expr),*) => {
-                        Err($crate::DataFusionError::$ERR(
-                            format!(
-                                "{}{}",
-                                format!($d($d args),*),
-                                $crate::DataFusionError::get_back_trace(),
-                            ).into()
-                        ))
-                    }
+            /// Macro wraps Err(`$ERR`) to add backtrace feature
+            #[macro_export]
+            macro_rules! $NAME_ERR {
+                ($d($d args:expr),*) => {
+                    Err($crate::[<_ $NAME_DF_ERR>]!($d($d args),*))
                 }
             }
+
+
+            // Note: Certain macros are used in this  crate, but not all.
+            // This macro generates a use or all of them in case they are needed
+            // so we allow unused code to avoid warnings when they are not used
+            #[doc(hidden)]
+            #[allow(unused)]
+            pub use $NAME_ERR as [<_ $NAME_ERR>];
+            #[doc(hidden)]
+            #[allow(unused)]
+            pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
         }
     };
 }
@@ -613,12 +610,6 @@ macro_rules! schema_err {
 
 // To avoid compiler error when using macro in the same crate:
 // macros from the current crate cannot be referred to by absolute paths
-pub use config_err as _config_err;
-pub use internal_datafusion_err as _internal_datafusion_err;
-pub use internal_err as _internal_err;
-pub use not_impl_err as _not_impl_err;
-pub use plan_datafusion_err as _plan_datafusion_err;
-pub use plan_err as _plan_err;
 pub use schema_err as _schema_err;
 
 /// Create a "field not found" DataFusion::SchemaError
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 8cd64e7d16a2..19af889e426a 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -73,6 +73,18 @@ pub use table_reference::{ResolvedTableReference, TableReference};
 pub use unnest::UnnestOptions;
 pub use utils::project_schema;
 
+// These are hidden from docs purely to avoid polluting the public view of what this crate exports.
+// These are just re-exports of macros by the same name, which gets around the 'cannot refer to
+// macro-expanded macro_export macros by their full path' error.
+// The design to get around this comes from this comment:
+// https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997
+#[doc(hidden)]
+pub use error::{
+    _config_datafusion_err, _exec_datafusion_err, _internal_datafusion_err,
+    _not_impl_datafusion_err, _plan_datafusion_err, _resources_datafusion_err,
+    _substrait_datafusion_err,
+};
+
 /// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always succeed.
 ///
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index d6b5310581d7..50ae4e3ca71f 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -25,7 +25,7 @@ use arrow_schema::{Schema, SchemaRef};
 
 /// Represents a value with a degree of certainty. `Precision` is used to
 /// propagate information the precision of statistical values.
-#[derive(Clone, PartialEq, Eq, Default)]
+#[derive(Clone, PartialEq, Eq, Default, Copy)]
 pub enum Precision<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
     /// The exact value is known
     Exact(T),
@@ -503,9 +503,9 @@ mod tests {
         let inexact_precision = Precision::Inexact(42);
         let absent_precision = Precision::<i32>::Absent;
 
-        assert_eq!(exact_precision.clone().to_inexact(), inexact_precision);
-        assert_eq!(inexact_precision.clone().to_inexact(), inexact_precision);
-        assert_eq!(absent_precision.clone().to_inexact(), absent_precision);
+        assert_eq!(exact_precision.to_inexact(), inexact_precision);
+        assert_eq!(inexact_precision.to_inexact(), inexact_precision);
+        assert_eq!(absent_precision.to_inexact(), absent_precision);
     }
 
     #[test]
@@ -545,4 +545,19 @@ mod tests {
         assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15));
         assert_eq!(precision1.multiply(&absent_precision), Precision::Absent);
     }
+
+    #[test]
+    fn test_precision_cloning() {
+        // Precision<usize> is copy
+        let precision: Precision<usize> = Precision::Exact(42);
+        let p2 = precision;
+        assert_eq!(precision, p2);
+
+        // Precision<ScalarValue> is not copy (requires .clone())
+        let precision: Precision<ScalarValue> =
+            Precision::Exact(ScalarValue::Int64(Some(42)));
+        // Clippy would complain about this if it were Copy
+        let p2 = precision.clone();
+        assert_eq!(precision, p2);
+    }
 }
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 58dc8f40b577..bf506c0551eb 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -29,15 +29,17 @@ use arrow::compute;
 use arrow::compute::{partition, SortColumn, SortOptions};
 use arrow::datatypes::{Field, SchemaRef, UInt32Type};
 use arrow::record_batch::RecordBatch;
+use arrow_array::cast::AsArray;
 use arrow_array::{
-    Array, FixedSizeListArray, LargeListArray, ListArray, RecordBatchOptions,
+    Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
+    RecordBatchOptions,
 };
 use arrow_schema::DataType;
 use sqlparser::ast::Ident;
 use sqlparser::dialect::GenericDialect;
 use sqlparser::parser::Parser;
 use std::borrow::{Borrow, Cow};
-use std::cmp::Ordering;
+use std::cmp::{min, Ordering};
 use std::collections::HashSet;
 use std::ops::Range;
 use std::sync::Arc;
@@ -440,6 +442,11 @@ pub fn arrays_into_list_array(
     ))
 }
 
+/// Helper function to convert a ListArray into a vector of ArrayRefs.
+pub fn list_to_arrays<O: OffsetSizeTrait>(a: ArrayRef) -> Vec<ArrayRef> {
+    a.as_list::<O>().iter().flatten().collect::<Vec<_>>()
+}
+
 /// Get the base type of a data type.
 ///
 /// Example
@@ -683,6 +690,69 @@ pub fn transpose<T>(original: Vec<Vec<T>>) -> Vec<Vec<T>> {
     }
 }
 
+/// Computes the `skip` and `fetch` parameters of a single limit that would be
+/// equivalent to two consecutive limits with the given `skip`/`fetch` parameters.
+///
+/// There are multiple cases to consider:
+///
+/// # Case 0: Parent and child are disjoint (`child_fetch <= skip`).
+///
+/// ```text
+///   Before merging:
+///                     |........skip........|---fetch-->|     Parent limit
+///    |...child_skip...|---child_fetch-->|                    Child limit
+/// ```
+///
+///   After merging:
+/// ```text
+///    |.........(child_skip + skip).........|
+/// ```
+///
+/// # Case 1: Parent is beyond child's range (`skip < child_fetch <= skip + fetch`).
+///
+///   Before merging:
+/// ```text
+///                     |...skip...|------------fetch------------>|   Parent limit
+///    |...child_skip...|-------------child_fetch------------>|       Child limit
+/// ```
+///
+///   After merging:
+/// ```text
+///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
+/// ```
+///
+///  # Case 2: Parent is within child's range (`skip + fetch < child_fetch`).
+///
+///   Before merging:
+/// ```text
+///                     |...skip...|---fetch-->|                   Parent limit
+///    |...child_skip...|-------------child_fetch------------>|    Child limit
+/// ```
+///
+///   After merging:
+/// ```text
+///    |....(child_skip + skip)....|---fetch-->|
+/// ```
+pub fn combine_limit(
+    parent_skip: usize,
+    parent_fetch: Option<usize>,
+    child_skip: usize,
+    child_fetch: Option<usize>,
+) -> (usize, Option<usize>) {
+    let combined_skip = child_skip.saturating_add(parent_skip);
+
+    let combined_fetch = match (parent_fetch, child_fetch) {
+        (Some(parent_fetch), Some(child_fetch)) => {
+            Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
+        }
+        (Some(parent_fetch), None) => Some(parent_fetch),
+        (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
+        (None, None) => None,
+    };
+
+    (combined_skip, combined_fetch)
+}
+
 #[cfg(test)]
 mod tests {
     use crate::ScalarValue::Null;
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index 17850ea7585a..34fb6226c1a2 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -244,7 +244,7 @@ impl FileScanConfig {
         }
 
         let table_stats = Statistics {
-            num_rows: self.statistics.num_rows.clone(),
+            num_rows: self.statistics.num_rows,
             // TODO correct byte size?
             total_byte_size: Precision::Absent,
             column_statistics: table_cols_stats,
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
index f9cce5f783ff..9de132169389 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs
@@ -341,14 +341,9 @@ pub fn build_row_filter(
     let mut candidates: Vec<FilterCandidate> = predicates
         .into_iter()
         .flat_map(|expr| {
-            if let Ok(candidate) =
-                FilterCandidateBuilder::new(expr.clone(), file_schema, table_schema)
-                    .build(metadata)
-            {
-                candidate
-            } else {
-                None
-            }
+            FilterCandidateBuilder::new(expr.clone(), file_schema, table_schema)
+                .build(metadata)
+                .unwrap_or_default()
         })
         .collect();
 
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
index 669755877680..6f89657defd3 100644
--- a/datafusion/core/src/datasource/statistics.rs
+++ b/datafusion/core/src/datasource/statistics.rs
@@ -18,17 +18,18 @@
 use std::mem;
 use std::sync::Arc;
 
-use super::listing::PartitionedFile;
-use crate::arrow::datatypes::{Schema, SchemaRef};
-use crate::error::Result;
-use crate::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
-use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics};
 use arrow_schema::DataType;
+use futures::{Stream, StreamExt};
 
 use datafusion_common::stats::Precision;
 use datafusion_common::ScalarValue;
 
-use futures::{Stream, StreamExt};
+use crate::arrow::datatypes::{Schema, SchemaRef};
+use crate::error::Result;
+use crate::functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
+use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics};
+
+use super::listing::PartitionedFile;
 
 /// Get all files as well as the file level summary statistics (no statistic for partition columns).
 /// If the optional `limit` is provided, includes only sufficient files. Needed to read up to
@@ -62,8 +63,8 @@ pub async fn get_statistics_with_limit(
         result_files.push(file);
 
         // First file, we set them directly from the file statistics.
-        num_rows = file_stats.num_rows.clone();
-        total_byte_size = file_stats.total_byte_size.clone();
+        num_rows = file_stats.num_rows;
+        total_byte_size = file_stats.total_byte_size;
         for (index, file_column) in
             file_stats.column_statistics.clone().into_iter().enumerate()
         {
@@ -93,10 +94,10 @@ pub async fn get_statistics_with_limit(
                 // counts across all the files in question. If any file does not
                 // provide any information or provides an inexact value, we demote
                 // the statistic precision to inexact.
-                num_rows = add_row_stats(file_stats.num_rows.clone(), num_rows);
+                num_rows = add_row_stats(file_stats.num_rows, num_rows);
 
                 total_byte_size =
-                    add_row_stats(file_stats.total_byte_size.clone(), total_byte_size);
+                    add_row_stats(file_stats.total_byte_size, total_byte_size);
 
                 for (file_col_stats, col_stats) in file_stats
                     .column_statistics
@@ -110,8 +111,7 @@ pub async fn get_statistics_with_limit(
                         distinct_count: _,
                     } = file_col_stats;
 
-                    col_stats.null_count =
-                        add_row_stats(file_nc.clone(), col_stats.null_count.clone());
+                    col_stats.null_count = add_row_stats(*file_nc, col_stats.null_count);
                     set_max_if_greater(file_max, &mut col_stats.max_value);
                     set_min_if_lesser(file_min, &mut col_stats.min_value)
                 }
@@ -192,7 +192,7 @@ pub(crate) fn get_col_stats(
                 None => None,
             };
             ColumnStatistics {
-                null_count: null_counts[i].clone(),
+                null_count: null_counts[i],
                 max_value: max_value.map(Precision::Exact).unwrap_or(Precision::Absent),
                 min_value: min_value.map(Precision::Exact).unwrap_or(Precision::Absent),
                 distinct_count: Precision::Absent,
diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs
index 98d118c027b7..a81942bf769e 100644
--- a/datafusion/core/src/datasource/view.rs
+++ b/datafusion/core/src/datasource/view.rs
@@ -19,17 +19,19 @@
 
 use std::{any::Any, sync::Arc};
 
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion_catalog::Session;
-use datafusion_common::Column;
-use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
-
 use crate::{
     error::Result,
     logical_expr::{Expr, LogicalPlan},
     physical_plan::ExecutionPlan,
 };
+use arrow::datatypes::SchemaRef;
+use async_trait::async_trait;
+use datafusion_catalog::Session;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::Column;
+use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown};
+use datafusion_optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule;
+use datafusion_optimizer::Analyzer;
 
 use crate::datasource::{TableProvider, TableType};
 
@@ -50,6 +52,7 @@ impl ViewTable {
         logical_plan: LogicalPlan,
         definition: Option<String>,
     ) -> Result<Self> {
+        let logical_plan = Self::apply_required_rule(logical_plan)?;
         let table_schema = logical_plan.schema().as_ref().to_owned().into();
 
         let view = Self {
@@ -61,6 +64,15 @@ impl ViewTable {
         Ok(view)
     }
 
+    fn apply_required_rule(logical_plan: LogicalPlan) -> Result<LogicalPlan> {
+        let options = ConfigOptions::default();
+        Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]).execute_and_check(
+            logical_plan,
+            &options,
+            |_, _| {},
+        )
+    }
+
     /// Get definition ref
     pub fn definition(&self) -> Option<&String> {
         self.definition.as_ref()
@@ -232,6 +244,26 @@ mod tests {
 
         assert_batches_eq!(expected, &results);
 
+        let view_sql =
+            "CREATE VIEW replace_xyz AS SELECT * REPLACE (column1*2 as column1) FROM xyz";
+        session_ctx.sql(view_sql).await?.collect().await?;
+
+        let results = session_ctx
+            .sql("SELECT * FROM replace_xyz")
+            .await?
+            .collect()
+            .await?;
+
+        let expected = [
+            "+---------+---------+---------+",
+            "| column1 | column2 | column3 |",
+            "+---------+---------+---------+",
+            "| 2       | 2       | 3       |",
+            "| 8       | 5       | 6       |",
+            "+---------+---------+---------+",
+        ];
+
+        assert_batches_eq!(expected, &results);
         Ok(())
     }
 
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index c63ffddd81b3..972a6f643733 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -718,7 +718,6 @@ impl SessionContext {
             }
             (_, Err(_)) => {
                 let table = Arc::new(ViewTable::try_new((*input).clone(), definition)?);
-
                 self.register_table(name, table)?;
                 self.return_empty_dataframe()
             }
diff --git a/datafusion/core/src/physical_optimizer/limit_pushdown.rs b/datafusion/core/src/physical_optimizer/limit_pushdown.rs
deleted file mode 100644
index d02737ff0959..000000000000
--- a/datafusion/core/src/physical_optimizer/limit_pushdown.rs
+++ /dev/null
@@ -1,661 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [`LimitPushdown`] pushes `LIMIT` down through `ExecutionPlan`s to reduce
-//! data transfer as much as possible.
-
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use crate::error::Result;
-use crate::physical_optimizer::PhysicalOptimizerRule;
-use crate::physical_plan::ExecutionPlan;
-
-use datafusion_common::config::ConfigOptions;
-use datafusion_common::plan_datafusion_err;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_optimizer::push_down_limit::combine_limit;
-use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-
-/// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
-/// the parent to the child if applicable.
-#[derive(Default)]
-pub struct LimitPushdown {}
-
-impl LimitPushdown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl PhysicalOptimizerRule for LimitPushdown {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        _config: &ConfigOptions,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        plan.transform_down(push_down_limits).data()
-    }
-
-    fn name(&self) -> &str {
-        "LimitPushdown"
-    }
-
-    fn schema_check(&self) -> bool {
-        true
-    }
-}
-
-/// This enumeration makes `skip` and `fetch` calculations easier by providing
-/// a single API for both local and global limit operators.
-#[derive(Debug)]
-enum LimitExec {
-    Global(GlobalLimitExec),
-    Local(LocalLimitExec),
-}
-
-impl LimitExec {
-    fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        match self {
-            Self::Global(global) => global.input(),
-            Self::Local(local) => local.input(),
-        }
-    }
-
-    fn fetch(&self) -> Option<usize> {
-        match self {
-            Self::Global(global) => global.fetch(),
-            Self::Local(local) => Some(local.fetch()),
-        }
-    }
-
-    fn skip(&self) -> usize {
-        match self {
-            Self::Global(global) => global.skip(),
-            Self::Local(_) => 0,
-        }
-    }
-
-    fn with_child(&self, child: Arc<dyn ExecutionPlan>) -> Self {
-        match self {
-            Self::Global(global) => {
-                Self::Global(GlobalLimitExec::new(child, global.skip(), global.fetch()))
-            }
-            Self::Local(local) => Self::Local(LocalLimitExec::new(child, local.fetch())),
-        }
-    }
-}
-
-impl From<LimitExec> for Arc<dyn ExecutionPlan> {
-    fn from(limit_exec: LimitExec) -> Self {
-        match limit_exec {
-            LimitExec::Global(global) => Arc::new(global),
-            LimitExec::Local(local) => Arc::new(local),
-        }
-    }
-}
-
-/// Pushes down the limit through the plan.
-pub fn push_down_limits(
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
-    let maybe_modified = if let Some(limit_exec) = extract_limit(&plan) {
-        let child = limit_exec.input();
-        if let Some(child_limit) = extract_limit(child) {
-            let merged = merge_limits(&limit_exec, &child_limit);
-            // Revisit current node in case of consecutive pushdowns
-            Some(push_down_limits(merged)?.data)
-        } else if child.supports_limit_pushdown() {
-            try_push_down_limit(&limit_exec, child.clone())?
-        } else {
-            add_fetch_to_child(&limit_exec, child.clone())
-        }
-    } else {
-        None
-    };
-
-    Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes))
-}
-
-/// Transforms the [`ExecutionPlan`] into a [`LimitExec`] if it is a
-/// [`GlobalLimitExec`] or a [`LocalLimitExec`].
-fn extract_limit(plan: &Arc<dyn ExecutionPlan>) -> Option<LimitExec> {
-    if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>() {
-        Some(LimitExec::Global(GlobalLimitExec::new(
-            global_limit.input().clone(),
-            global_limit.skip(),
-            global_limit.fetch(),
-        )))
-    } else {
-        plan.as_any()
-            .downcast_ref::<LocalLimitExec>()
-            .map(|local_limit| {
-                LimitExec::Local(LocalLimitExec::new(
-                    local_limit.input().clone(),
-                    local_limit.fetch(),
-                ))
-            })
-    }
-}
-
-/// Merge the limits of the parent and the child. If at least one of them is a
-/// [`GlobalLimitExec`], the result is also a [`GlobalLimitExec`]. Otherwise,
-/// the result is a [`LocalLimitExec`].
-fn merge_limits(
-    parent_limit_exec: &LimitExec,
-    child_limit_exec: &LimitExec,
-) -> Arc<dyn ExecutionPlan> {
-    // We can use the logic in `combine_limit` from the logical optimizer:
-    let (skip, fetch) = combine_limit(
-        parent_limit_exec.skip(),
-        parent_limit_exec.fetch(),
-        child_limit_exec.skip(),
-        child_limit_exec.fetch(),
-    );
-    match (parent_limit_exec, child_limit_exec) {
-        (LimitExec::Local(_), LimitExec::Local(_)) => {
-            // The fetch is present in this case, can unwrap.
-            Arc::new(LocalLimitExec::new(
-                child_limit_exec.input().clone(),
-                fetch.unwrap(),
-            ))
-        }
-        _ => Arc::new(GlobalLimitExec::new(
-            child_limit_exec.input().clone(),
-            skip,
-            fetch,
-        )),
-    }
-}
-
-/// Pushes down the limit through the child. If the child has a single input
-/// partition, simply swaps the parent and the child. Otherwise, adds a
-/// [`LocalLimitExec`] after in between in addition to swapping, because of
-/// multiple input partitions.
-fn try_push_down_limit(
-    limit_exec: &LimitExec,
-    child: Arc<dyn ExecutionPlan>,
-) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-    let grandchildren = child.children();
-    if let Some(&grandchild) = grandchildren.first() {
-        // GlobalLimitExec and LocalLimitExec must have an input after pushdown
-        if combines_input_partitions(&child) {
-            // We still need a LocalLimitExec after the child
-            if let Some(fetch) = limit_exec.fetch() {
-                let new_local_limit = Arc::new(LocalLimitExec::new(
-                    grandchild.clone(),
-                    fetch + limit_exec.skip(),
-                ));
-                let new_child = child.clone().with_new_children(vec![new_local_limit])?;
-                Ok(Some(limit_exec.with_child(new_child).into()))
-            } else {
-                Ok(None)
-            }
-        } else {
-            // Swap current with child
-            let new_limit = limit_exec.with_child(grandchild.clone());
-            let new_child = child.clone().with_new_children(vec![new_limit.into()])?;
-            Ok(Some(new_child))
-        }
-    } else {
-        // Operators supporting limit push down must have a child.
-        Err(plan_datafusion_err!(
-            "{:#?} must have a child to push down limit",
-            child
-        ))
-    }
-}
-
-fn combines_input_partitions(exec: &Arc<dyn ExecutionPlan>) -> bool {
-    let exec = exec.as_any();
-    exec.is::<CoalescePartitionsExec>() || exec.is::<SortPreservingMergeExec>()
-}
-
-/// Transforms child to the fetching version if supported. Removes the parent if
-/// skip is zero. Otherwise, keeps the parent.
-fn add_fetch_to_child(
-    limit_exec: &LimitExec,
-    child: Arc<dyn ExecutionPlan>,
-) -> Option<Arc<dyn ExecutionPlan>> {
-    let fetch = limit_exec.fetch();
-    let skip = limit_exec.skip();
-
-    let child_fetch = fetch.map(|f| f + skip);
-
-    if let Some(child_with_fetch) = child.with_fetch(child_fetch) {
-        if skip > 0 {
-            Some(limit_exec.with_child(child_with_fetch).into())
-        } else {
-            Some(child_with_fetch)
-        }
-    } else {
-        None
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use arrow_schema::{DataType, Field, Schema, SchemaRef};
-    use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-    use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::BinaryExpr;
-    use datafusion_physical_expr::expressions::{col, lit};
-    use datafusion_physical_expr::Partitioning;
-    use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
-    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use datafusion_physical_plan::empty::EmptyExec;
-    use datafusion_physical_plan::filter::FilterExec;
-    use datafusion_physical_plan::get_plan_string;
-    use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-    use datafusion_physical_plan::projection::ProjectionExec;
-    use datafusion_physical_plan::repartition::RepartitionExec;
-    use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
-
-    struct DummyStreamPartition {
-        schema: SchemaRef,
-    }
-    impl PartitionStream for DummyStreamPartition {
-        fn schema(&self) -> &SchemaRef {
-            &self.schema
-        }
-        fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream {
-            unreachable!()
-        }
-    }
-
-    #[test]
-    fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let global_limit = global_limit_exec(streaming_table, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let global_limit = global_limit_exec(streaming_table, 2, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=2, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "GlobalLimitExec: skip=2, fetch=5",
-            "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let repartition = repartition_exec(streaming_table)?;
-        let filter = filter_exec(schema.clone(), repartition)?;
-        let coalesce_batches = coalesce_batches_exec(filter);
-        let local_limit = local_limit_exec(coalesce_batches, 5);
-        let coalesce_partitions = coalesce_partitions_exec(local_limit);
-        let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    LocalLimitExec: fetch=5",
-            "      CoalesceBatchesExec: target_batch_size=8192",
-            "        FilterExec: c3@2 > 0",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    CoalesceBatchesExec: target_batch_size=8192, fetch=5",
-            "      FilterExec: c3@2 > 0",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn pushes_global_limit_exec_through_projection_exec() -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let filter = filter_exec(schema.clone(), streaming_table)?;
-        let projection = projection_exec(schema.clone(), filter)?;
-        let global_limit = global_limit_exec(projection, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "    FilterExec: c3@2 > 0",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "  GlobalLimitExec: skip=0, fetch=5",
-            "    FilterExec: c3@2 > 0",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone()).unwrap();
-        let coalesce_batches = coalesce_batches_exec(streaming_table);
-        let projection = projection_exec(schema.clone(), coalesce_batches)?;
-        let global_limit = global_limit_exec(projection, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
-            "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
-            "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
-    ) -> Result<()> {
-        let schema = create_schema();
-        let streaming_table = streaming_table_exec(schema.clone())?;
-        let repartition = repartition_exec(streaming_table)?;
-        let filter = filter_exec(schema.clone(), repartition)?;
-        let coalesce_partitions = coalesce_partitions_exec(filter);
-        let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    FilterExec: c3@2 > 0",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = [
-            "GlobalLimitExec: skip=0, fetch=5",
-            "  CoalescePartitionsExec",
-            "    LocalLimitExec: fetch=5",
-            "      FilterExec: c3@2 > 0",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
-        ];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_local_limit_with_local_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let child_local_limit = local_limit_exec(empty_exec, 10);
-        let parent_local_limit = local_limit_exec(child_local_limit, 20);
-
-        let initial = get_plan_string(&parent_local_limit);
-        let expected_initial = [
-            "LocalLimitExec: fetch=20",
-            "  LocalLimitExec: fetch=10",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?;
-
-        let expected = ["LocalLimitExec: fetch=10", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_global_limit_with_global_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let child_global_limit = global_limit_exec(empty_exec, 10, Some(30));
-        let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20));
-
-        let initial = get_plan_string(&parent_global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=10, fetch=20",
-            "  GlobalLimitExec: skip=10, fetch=30",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?;
-
-        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_global_limit_with_local_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let local_limit = local_limit_exec(empty_exec, 40);
-        let global_limit = global_limit_exec(local_limit, 20, Some(30));
-
-        let initial = get_plan_string(&global_limit);
-        let expected_initial = [
-            "GlobalLimitExec: skip=20, fetch=30",
-            "  LocalLimitExec: fetch=40",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
-
-        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn merges_local_limit_with_global_limit() -> Result<()> {
-        let schema = create_schema();
-        let empty_exec = empty_exec(schema);
-        let global_limit = global_limit_exec(empty_exec, 20, Some(30));
-        let local_limit = local_limit_exec(global_limit, 20);
-
-        let initial = get_plan_string(&local_limit);
-        let expected_initial = [
-            "LocalLimitExec: fetch=20",
-            "  GlobalLimitExec: skip=20, fetch=30",
-            "    EmptyExec",
-        ];
-
-        assert_eq!(initial, expected_initial);
-
-        let after_optimize =
-            LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?;
-
-        let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
-        assert_eq!(get_plan_string(&after_optimize), expected);
-
-        Ok(())
-    }
-
-    fn create_schema() -> SchemaRef {
-        Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Int32, true),
-            Field::new("c2", DataType::Int32, true),
-            Field::new("c3", DataType::Int32, true),
-        ]))
-    }
-
-    fn streaming_table_exec(schema: SchemaRef) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(StreamingTableExec::try_new(
-            schema.clone(),
-            vec![Arc::new(DummyStreamPartition {
-                schema: schema.clone(),
-            }) as _],
-            None,
-            None,
-            true,
-            None,
-        )?))
-    }
-
-    fn global_limit_exec(
-        input: Arc<dyn ExecutionPlan>,
-        skip: usize,
-        fetch: Option<usize>,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(GlobalLimitExec::new(input, skip, fetch))
-    }
-
-    fn local_limit_exec(
-        input: Arc<dyn ExecutionPlan>,
-        fetch: usize,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(LocalLimitExec::new(input, fetch))
-    }
-
-    fn projection_exec(
-        schema: SchemaRef,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(ProjectionExec::try_new(
-            vec![
-                (col("c1", schema.as_ref()).unwrap(), "c1".to_string()),
-                (col("c2", schema.as_ref()).unwrap(), "c2".to_string()),
-                (col("c3", schema.as_ref()).unwrap(), "c3".to_string()),
-            ],
-            input,
-        )?))
-    }
-
-    fn filter_exec(
-        schema: SchemaRef,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(FilterExec::try_new(
-            Arc::new(BinaryExpr::new(
-                col("c3", schema.as_ref()).unwrap(),
-                Operator::Gt,
-                lit(0),
-            )),
-            input,
-        )?))
-    }
-
-    fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        Arc::new(CoalesceBatchesExec::new(input, 8192))
-    }
-
-    fn coalesce_partitions_exec(
-        local_limit: Arc<dyn ExecutionPlan>,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(CoalescePartitionsExec::new(local_limit))
-    }
-
-    fn repartition_exec(
-        streaming_table: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(RepartitionExec::try_new(
-            streaming_table,
-            Partitioning::RoundRobinBatch(8),
-        )?))
-    }
-
-    fn empty_exec(schema: SchemaRef) -> Arc<dyn ExecutionPlan> {
-        Arc::new(EmptyExec::new(schema))
-    }
-}
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 9291d0b84865..0e68a05d855c 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -26,7 +26,6 @@ pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
 pub mod enforce_sorting;
 pub mod join_selection;
-pub mod limit_pushdown;
 pub mod limited_distinct_aggregation;
 pub mod optimizer;
 pub mod projection_pushdown;
diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index 55a0fa814552..90853c347672 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -251,7 +251,6 @@ pub fn bounded_window_exec(
                 "count".to_owned(),
                 &[col(col_name, &schema).unwrap()],
                 &[],
-                &[],
                 &sort_exprs,
                 Arc::new(WindowFrame::new(Some(false))),
                 schema.as_ref(),
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 7eb468f56eeb..9cc2f253f8da 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1510,7 +1510,6 @@ pub fn create_window_expr_with_name(
                 fun,
                 name,
                 &physical_args,
-                args,
                 &partition_by,
                 &order_by,
                 window_frame,
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 813862c4cc2f..d75d8e43370d 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -253,7 +253,6 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
 
     let partitionby_exprs = vec![];
     let orderby_exprs = vec![];
-    let logical_exprs = vec![];
     // Window frame starts with "UNBOUNDED PRECEDING":
     let start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None));
 
@@ -285,7 +284,6 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
                     &window_fn,
                     fn_name.to_string(),
                     &args,
-                    &logical_exprs,
                     &partitionby_exprs,
                     &orderby_exprs,
                     Arc::new(window_frame),
@@ -674,7 +672,6 @@ async fn run_window_test(
             &window_fn,
             fn_name.clone(),
             &args,
-            &[],
             &partitionby_exprs,
             &orderby_exprs,
             Arc::new(window_frame.clone()),
@@ -693,7 +690,6 @@ async fn run_window_test(
             &window_fn,
             fn_name,
             &args,
-            &[],
             &partitionby_exprs,
             &orderby_exprs,
             Arc::new(window_frame.clone()),
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
new file mode 100644
index 000000000000..8f3a47c95e9d
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -0,0 +1,427 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use datafusion::physical_optimizer::limit_pushdown::LimitPushdown;
+use datafusion_common::config::ConfigOptions;
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_expr::Operator;
+use datafusion_physical_expr::expressions::BinaryExpr;
+use datafusion_physical_expr::expressions::{col, lit};
+use datafusion_physical_expr::Partitioning;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::empty::EmptyExec;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
+use datafusion_physical_plan::{get_plan_string, ExecutionPlan};
+use std::sync::Arc;
+
+struct DummyStreamPartition {
+    schema: SchemaRef,
+}
+impl PartitionStream for DummyStreamPartition {
+    fn schema(&self) -> &SchemaRef {
+        &self.schema
+    }
+    fn execute(&self, _ctx: Arc<TaskContext>) -> SendableRecordBatchStream {
+        unreachable!()
+    }
+}
+
+#[test]
+fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let global_limit = global_limit_exec(streaming_table, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let global_limit = global_limit_exec(streaming_table, 2, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=2, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "GlobalLimitExec: skip=2, fetch=5",
+        "  StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let repartition = repartition_exec(streaming_table)?;
+    let filter = filter_exec(schema.clone(), repartition)?;
+    let coalesce_batches = coalesce_batches_exec(filter);
+    let local_limit = local_limit_exec(coalesce_batches, 5);
+    let coalesce_partitions = coalesce_partitions_exec(local_limit);
+    let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    LocalLimitExec: fetch=5",
+        "      CoalesceBatchesExec: target_batch_size=8192",
+        "        FilterExec: c3@2 > 0",
+        "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "            StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+        "      FilterExec: c3@2 > 0",
+        "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn pushes_global_limit_exec_through_projection_exec() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let filter = filter_exec(schema.clone(), streaming_table)?;
+    let projection = projection_exec(schema.clone(), filter)?;
+    let global_limit = global_limit_exec(projection, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "    FilterExec: c3@2 > 0",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "  GlobalLimitExec: skip=0, fetch=5",
+        "    FilterExec: c3@2 > 0",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone()).unwrap();
+    let coalesce_batches = coalesce_batches_exec(streaming_table);
+    let projection = projection_exec(schema.clone(), coalesce_batches)?;
+    let global_limit = global_limit_exec(projection, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "    CoalesceBatchesExec: target_batch_size=8192",
+        "      StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]",
+        "  CoalesceBatchesExec: target_batch_size=8192, fetch=5",
+        "    StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
+) -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let streaming_table = streaming_table_exec(schema.clone())?;
+    let repartition = repartition_exec(streaming_table)?;
+    let filter = filter_exec(schema.clone(), repartition)?;
+    let coalesce_partitions = coalesce_partitions_exec(filter);
+    let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    FilterExec: c3@2 > 0",
+        "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "        StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = [
+        "GlobalLimitExec: skip=0, fetch=5",
+        "  CoalescePartitionsExec",
+        "    LocalLimitExec: fetch=5",
+        "      FilterExec: c3@2 > 0",
+        "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+        "          StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true"
+    ];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_local_limit_with_local_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let child_local_limit = local_limit_exec(empty_exec, 10);
+    let parent_local_limit = local_limit_exec(child_local_limit, 20);
+
+    let initial = get_plan_string(&parent_local_limit);
+    let expected_initial = [
+        "LocalLimitExec: fetch=20",
+        "  LocalLimitExec: fetch=10",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?;
+
+    let expected = ["LocalLimitExec: fetch=10", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_global_limit_with_global_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let child_global_limit = global_limit_exec(empty_exec, 10, Some(30));
+    let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20));
+
+    let initial = get_plan_string(&parent_global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=10, fetch=20",
+        "  GlobalLimitExec: skip=10, fetch=30",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?;
+
+    let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_global_limit_with_local_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let local_limit = local_limit_exec(empty_exec, 40);
+    let global_limit = global_limit_exec(local_limit, 20, Some(30));
+
+    let initial = get_plan_string(&global_limit);
+    let expected_initial = [
+        "GlobalLimitExec: skip=20, fetch=30",
+        "  LocalLimitExec: fetch=40",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?;
+
+    let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+#[test]
+fn merges_local_limit_with_global_limit() -> datafusion_common::Result<()> {
+    let schema = create_schema();
+    let empty_exec = empty_exec(schema);
+    let global_limit = global_limit_exec(empty_exec, 20, Some(30));
+    let local_limit = local_limit_exec(global_limit, 20);
+
+    let initial = get_plan_string(&local_limit);
+    let expected_initial = [
+        "LocalLimitExec: fetch=20",
+        "  GlobalLimitExec: skip=20, fetch=30",
+        "    EmptyExec",
+    ];
+
+    assert_eq!(initial, expected_initial);
+
+    let after_optimize =
+        LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?;
+
+    let expected = ["GlobalLimitExec: skip=20, fetch=20", "  EmptyExec"];
+    assert_eq!(get_plan_string(&after_optimize), expected);
+
+    Ok(())
+}
+
+fn create_schema() -> SchemaRef {
+    Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Int32, true),
+        Field::new("c2", DataType::Int32, true),
+        Field::new("c3", DataType::Int32, true),
+    ]))
+}
+
+fn streaming_table_exec(
+    schema: SchemaRef,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(StreamingTableExec::try_new(
+        schema.clone(),
+        vec![Arc::new(DummyStreamPartition {
+            schema: schema.clone(),
+        }) as _],
+        None,
+        None,
+        true,
+        None,
+    )?))
+}
+
+fn global_limit_exec(
+    input: Arc<dyn ExecutionPlan>,
+    skip: usize,
+    fetch: Option<usize>,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(GlobalLimitExec::new(input, skip, fetch))
+}
+
+fn local_limit_exec(
+    input: Arc<dyn ExecutionPlan>,
+    fetch: usize,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(LocalLimitExec::new(input, fetch))
+}
+
+fn projection_exec(
+    schema: SchemaRef,
+    input: Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(ProjectionExec::try_new(
+        vec![
+            (col("c1", schema.as_ref()).unwrap(), "c1".to_string()),
+            (col("c2", schema.as_ref()).unwrap(), "c2".to_string()),
+            (col("c3", schema.as_ref()).unwrap(), "c3".to_string()),
+        ],
+        input,
+    )?))
+}
+
+fn filter_exec(
+    schema: SchemaRef,
+    input: Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(FilterExec::try_new(
+        Arc::new(BinaryExpr::new(
+            col("c3", schema.as_ref()).unwrap(),
+            Operator::Gt,
+            lit(0),
+        )),
+        input,
+    )?))
+}
+
+fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+    Arc::new(CoalesceBatchesExec::new(input, 8192))
+}
+
+fn coalesce_partitions_exec(
+    local_limit: Arc<dyn ExecutionPlan>,
+) -> Arc<dyn ExecutionPlan> {
+    Arc::new(CoalescePartitionsExec::new(local_limit))
+}
+
+fn repartition_exec(
+    streaming_table: Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    Ok(Arc::new(RepartitionExec::try_new(
+        streaming_table,
+        Partitioning::RoundRobinBatch(8),
+    )?))
+}
+
+fn empty_exec(schema: SchemaRef) -> Arc<dyn ExecutionPlan> {
+    Arc::new(EmptyExec::new(schema))
+}
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 0ee89a3d213c..904a8b9fbb38 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -16,3 +16,4 @@
 // under the License.
 
 mod aggregate_statistics;
+mod limit_pushdown;
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 05e365a0b988..251ac6cb8c0e 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -25,7 +25,7 @@ use crate::operator::Operator;
 use arrow::array::{new_empty_array, Array};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{
-    DataType, Field, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DataType, Field, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
 };
 use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result};
@@ -498,6 +498,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
         .or_else(|| string_numeric_coercion(lhs_type, rhs_type))
         .or_else(|| string_temporal_coercion(lhs_type, rhs_type))
         .or_else(|| binary_coercion(lhs_type, rhs_type))
+        .or_else(|| struct_coercion(lhs_type, rhs_type))
 }
 
 /// Coerce `lhs_type` and `rhs_type` to a common type for value exprs
@@ -780,6 +781,31 @@ fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType
     }
 }
 
+fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    match (lhs_type, rhs_type) {
+        (Struct(lhs_fields), Struct(rhs_fields)) => {
+            if lhs_fields.len() != rhs_fields.len() {
+                return None;
+            }
+
+            let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
+                .map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type()))
+                .collect::<Option<Vec<DataType>>>()?;
+
+            let fields = types
+                .into_iter()
+                .enumerate()
+                .map(|(i, datatype)| {
+                    Arc::new(Field::new(format!("c{i}"), datatype, true))
+                })
+                .collect::<Vec<FieldRef>>();
+            Some(Struct(fields.into()))
+        }
+        _ => None,
+    }
+}
+
 /// Returns the output type of applying mathematics operations such as
 /// `+` to arguments of `lhs_type` and `rhs_type`.
 fn mathematics_numerical_coercion(
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 5030a95d3c8a..b4d489cc7c1e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -41,7 +41,10 @@ use datafusion_common::{
     internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue,
     TableReference,
 };
-use sqlparser::ast::NullTreatment;
+use sqlparser::ast::{
+    display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
+    NullTreatment, RenameSelectItem, ReplaceSelectElement,
+};
 
 /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
 ///
@@ -315,7 +318,10 @@ pub enum Expr {
     ///
     /// This expr has to be resolved to a list of columns before translating logical
     /// plan into physical plan.
-    Wildcard { qualifier: Option<TableReference> },
+    Wildcard {
+        qualifier: Option<TableReference>,
+        options: WildcardOptions,
+    },
     /// List of grouping set expressions. Only valid in the context of an aggregate
     /// GROUP BY expression list
     GroupingSet(GroupingSet),
@@ -970,6 +976,89 @@ impl GroupingSet {
     }
 }
 
+/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
+#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
+pub struct WildcardOptions {
+    /// `[ILIKE...]`.
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub ilike: Option<IlikeSelectItem>,
+    /// `[EXCLUDE...]`.
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub exclude: Option<ExcludeSelectItem>,
+    /// `[EXCEPT...]`.
+    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except>
+    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except>
+    pub except: Option<ExceptSelectItem>,
+    /// `[REPLACE]`
+    ///  BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace>
+    ///  Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace>
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub replace: Option<PlannedReplaceSelectItem>,
+    /// `[RENAME ...]`.
+    ///  Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters>
+    pub rename: Option<RenameSelectItem>,
+}
+
+impl WildcardOptions {
+    pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self {
+        WildcardOptions {
+            ilike: self.ilike,
+            exclude: self.exclude,
+            except: self.except,
+            replace: Some(replace),
+            rename: self.rename,
+        }
+    }
+}
+
+impl Display for WildcardOptions {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        if let Some(ilike) = &self.ilike {
+            write!(f, " {ilike}")?;
+        }
+        if let Some(exclude) = &self.exclude {
+            write!(f, " {exclude}")?;
+        }
+        if let Some(except) = &self.except {
+            write!(f, " {except}")?;
+        }
+        if let Some(replace) = &self.replace {
+            write!(f, " {replace}")?;
+        }
+        if let Some(rename) = &self.rename {
+            write!(f, " {rename}")?;
+        }
+        Ok(())
+    }
+}
+
+/// The planned expressions for `REPLACE`
+#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
+pub struct PlannedReplaceSelectItem {
+    /// The original ast nodes
+    pub items: Vec<ReplaceSelectElement>,
+    /// The expression planned from the ast nodes. They will be used when expanding the wildcard.
+    pub planned_expressions: Vec<Expr>,
+}
+
+impl Display for PlannedReplaceSelectItem {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "REPLACE")?;
+        write!(f, " ({})", display_comma_separated(&self.items))?;
+        Ok(())
+    }
+}
+
+impl PlannedReplaceSelectItem {
+    pub fn items(&self) -> &[ReplaceSelectElement] {
+        &self.items
+    }
+
+    pub fn expressions(&self) -> &[Expr] {
+        &self.planned_expressions
+    }
+}
+
 /// Fixed seed for the hashing so that Ords are consistent across runs
 const SEED: ahash::RandomState = ahash::RandomState::with_seeds(0, 0, 0, 0);
 
@@ -1720,8 +1809,9 @@ impl Expr {
             Expr::ScalarSubquery(subquery) => {
                 subquery.hash(hasher);
             }
-            Expr::Wildcard { qualifier } => {
+            Expr::Wildcard { qualifier, options } => {
                 qualifier.hash(hasher);
+                options.hash(hasher);
             }
             Expr::GroupingSet(grouping_set) => {
                 mem::discriminant(grouping_set).hash(hasher);
@@ -2242,9 +2332,9 @@ impl fmt::Display for Expr {
                     write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list))
                 }
             }
-            Expr::Wildcard { qualifier } => match qualifier {
-                Some(qualifier) => write!(f, "{qualifier}.*"),
-                None => write!(f, "*"),
+            Expr::Wildcard { qualifier, options } => match qualifier {
+                Some(qualifier) => write!(f, "{qualifier}.*{options}"),
+                None => write!(f, "*{options}"),
             },
             Expr::GroupingSet(grouping_sets) => match grouping_sets {
                 GroupingSet::Rollup(exprs) => {
@@ -2543,9 +2633,10 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
         Expr::Sort { .. } => {
             internal_err!("Create physical name does not support sort expression")
         }
-        Expr::Wildcard { .. } => {
-            internal_err!("Create physical name does not support wildcard")
-        }
+        Expr::Wildcard { qualifier, options } => match qualifier {
+            Some(qualifier) => Ok(format!("{}.*{}", qualifier, options)),
+            None => Ok(format!("*{}", options)),
+        },
         Expr::Placeholder(_) => {
             internal_err!("Create physical name does not support placeholder")
         }
@@ -2558,7 +2649,12 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
 #[cfg(test)]
 mod test {
     use crate::expr_fn::col;
-    use crate::{case, lit, ColumnarValue, ScalarUDF, ScalarUDFImpl, Volatility};
+    use crate::{
+        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
+        ScalarUDF, ScalarUDFImpl, Volatility,
+    };
+    use sqlparser::ast;
+    use sqlparser::ast::{Ident, IdentWithAlias};
     use std::any::Any;
 
     #[test]
@@ -2859,4 +2955,109 @@ mod test {
         );
         assert_eq!(find_df_window_func("not_exist"), None)
     }
+
+    #[test]
+    fn test_display_wildcard() {
+        assert_eq!(format!("{}", wildcard()), "*");
+        assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*");
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    Some(IlikeSelectItem {
+                        pattern: "c1".to_string()
+                    }),
+                    None,
+                    None,
+                    None,
+                    None
+                ))
+            ),
+            "* ILIKE 'c1'"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    Some(ExcludeSelectItem::Multiple(vec![
+                        Ident::from("c1"),
+                        Ident::from("c2")
+                    ])),
+                    None,
+                    None,
+                    None
+                ))
+            ),
+            "* EXCLUDE (c1, c2)"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    None,
+                    Some(ExceptSelectItem {
+                        first_element: Ident::from("c1"),
+                        additional_elements: vec![Ident::from("c2")]
+                    }),
+                    None,
+                    None
+                ))
+            ),
+            "* EXCEPT (c1, c2)"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    None,
+                    None,
+                    Some(PlannedReplaceSelectItem {
+                        items: vec![ReplaceSelectElement {
+                            expr: ast::Expr::Identifier(Ident::from("c1")),
+                            column_name: Ident::from("a1"),
+                            as_keyword: false
+                        }],
+                        planned_expressions: vec![]
+                    }),
+                    None
+                ))
+            ),
+            "* REPLACE (c1 a1)"
+        );
+        assert_eq!(
+            format!(
+                "{}",
+                wildcard_with_options(wildcard_options(
+                    None,
+                    None,
+                    None,
+                    None,
+                    Some(RenameSelectItem::Multiple(vec![IdentWithAlias {
+                        ident: Ident::from("c1"),
+                        alias: Ident::from("a1")
+                    }]))
+                ))
+            ),
+            "* RENAME (c1 AS a1)"
+        )
+    }
+
+    fn wildcard_options(
+        opt_ilike: Option<IlikeSelectItem>,
+        opt_exclude: Option<ExcludeSelectItem>,
+        opt_except: Option<ExceptSelectItem>,
+        opt_replace: Option<PlannedReplaceSelectItem>,
+        opt_rename: Option<RenameSelectItem>,
+    ) -> WildcardOptions {
+        WildcardOptions {
+            ilike: opt_ilike,
+            exclude: opt_exclude,
+            except: opt_except,
+            replace: opt_replace,
+            rename: opt_rename,
+        }
+    }
 }
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index e9c5485656c8..4e6022399653 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -19,7 +19,7 @@
 
 use crate::expr::{
     AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery,
-    Placeholder, TryCast, Unnest, WindowFunction,
+    Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction,
 };
 use crate::function::{
     AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory,
@@ -37,7 +37,7 @@ use arrow::compute::kernels::cast_utils::{
     parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
 };
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{plan_err, Column, Result, ScalarValue};
+use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference};
 use sqlparser::ast::NullTreatment;
 use std::any::Any;
 use std::fmt::Debug;
@@ -119,7 +119,46 @@ pub fn placeholder(id: impl Into<String>) -> Expr {
 /// assert_eq!(p.to_string(), "*")
 /// ```
 pub fn wildcard() -> Expr {
-    Expr::Wildcard { qualifier: None }
+    Expr::Wildcard {
+        qualifier: None,
+        options: WildcardOptions::default(),
+    }
+}
+
+/// Create an '*' [`Expr::Wildcard`] expression with the wildcard options
+pub fn wildcard_with_options(options: WildcardOptions) -> Expr {
+    Expr::Wildcard {
+        qualifier: None,
+        options,
+    }
+}
+
+/// Create an 't.*' [`Expr::Wildcard`] expression that matches all columns from a specific table
+///
+/// # Example
+///
+/// ```rust
+/// # use datafusion_common::TableReference;
+/// # use datafusion_expr::{qualified_wildcard};
+/// let p = qualified_wildcard(TableReference::bare("t"));
+/// assert_eq!(p.to_string(), "t.*")
+/// ```
+pub fn qualified_wildcard(qualifier: impl Into<TableReference>) -> Expr {
+    Expr::Wildcard {
+        qualifier: Some(qualifier.into()),
+        options: WildcardOptions::default(),
+    }
+}
+
+/// Create an 't.*' [`Expr::Wildcard`] expression with the wildcard options
+pub fn qualified_wildcard_with_options(
+    qualifier: impl Into<TableReference>,
+    options: WildcardOptions,
+) -> Expr {
+    Expr::Wildcard {
+        qualifier: Some(qualifier.into()),
+        options,
+    }
 }
 
 /// Return a new expression `left <op> right`
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 0dc41d4a9ac1..32e621350ee2 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -248,6 +248,7 @@ fn coerce_exprs_for_schema(
                     Expr::Alias(Alias { expr, name, .. }) => {
                         Ok(expr.cast_to(new_type, src_schema)?.alias(name))
                     }
+                    Expr::Wildcard { .. } => Ok(expr),
                     _ => expr.cast_to(new_type, src_schema),
                 }
             } else {
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 73123819ba99..af35b9a9910d 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -28,8 +28,8 @@ use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
-    internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema,
-    Result, TableReference,
+    not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result,
+    TableReference,
 };
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -244,13 +244,7 @@ impl ExprSchemable for Expr {
                     )
                 })
             }
-            Expr::Wildcard { qualifier } => {
-                // Wildcard do not really have a type and do not appear in projections
-                match qualifier {
-                    Some(_) => internal_err!("QualifiedWildcard expressions are not valid in a logical query plan"),
-                    None => Ok(DataType::Null)
-                }
-            }
+            Expr::Wildcard { .. } => Ok(DataType::Null),
             Expr::GroupingSet(_) => {
                 // grouping sets do not really have a type and do not appear in projections
                 Ok(DataType::Null)
@@ -362,12 +356,7 @@ impl ExprSchemable for Expr {
             | Expr::SimilarTo(Like { expr, pattern, .. }) => {
                 Ok(expr.nullable(input_schema)? || pattern.nullable(input_schema)?)
             }
-            Expr::Wildcard { qualifier } => match qualifier {
-                Some(_) => internal_err!(
-                    "QualifiedWildcard expressions are not valid in a logical query plan"
-                ),
-                None => Ok(false),
-            },
+            Expr::Wildcard { .. } => Ok(false),
             Expr::GroupingSet(_) => {
                 // grouping sets do not really have the concept of nullable and do not appear
                 // in projections
@@ -548,7 +537,7 @@ mod tests {
     use super::*;
     use crate::{col, lit};
 
-    use datafusion_common::{DFSchema, ScalarValue};
+    use datafusion_common::{internal_err, DFSchema, ScalarValue};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 4ef346656ff4..2e53a682854c 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -20,7 +20,6 @@
 use std::any::Any;
 use std::cmp::Ordering;
 use std::collections::{HashMap, HashSet};
-use std::iter::zip;
 use std::sync::Arc;
 
 use crate::dml::CopyTo;
@@ -36,11 +35,10 @@ use crate::logical_plan::{
     Projection, Repartition, Sort, SubqueryAlias, TableScan, Union, Unnest, Values,
     Window,
 };
-use crate::type_coercion::binary::{comparison_coercion, values_coercion};
+use crate::type_coercion::binary::values_coercion;
 use crate::utils::{
-    can_hash, columnize_expr, compare_sort_expr, expand_qualified_wildcard,
-    expand_wildcard, expr_to_columns, find_valid_equijoin_key_pair,
-    group_window_expr_by_sort_keys,
+    can_hash, columnize_expr, compare_sort_expr, expr_to_columns,
+    find_valid_equijoin_key_pair, group_window_expr_by_sort_keys,
 };
 use crate::{
     and, binary_expr, logical_plan::tree_node::unwrap_arc, DmlStatement, Expr,
@@ -1316,7 +1314,7 @@ fn add_group_by_exprs_from_dependencies(
     Ok(group_expr)
 }
 /// Errors if one or more expressions have equal names.
-pub(crate) fn validate_unique_names<'a>(
+pub fn validate_unique_names<'a>(
     node_name: &str,
     expressions: impl IntoIterator<Item = &'a Expr>,
 ) -> Result<()> {
@@ -1339,95 +1337,14 @@ pub(crate) fn validate_unique_names<'a>(
     })
 }
 
-pub fn project_with_column_index(
-    expr: Vec<Expr>,
-    input: Arc<LogicalPlan>,
-    schema: DFSchemaRef,
-) -> Result<LogicalPlan> {
-    let alias_expr = expr
-        .into_iter()
-        .enumerate()
-        .map(|(i, e)| match e {
-            Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => {
-                e.unalias().alias(schema.field(i).name())
-            }
-            Expr::Column(Column {
-                relation: _,
-                ref name,
-            }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
-            Expr::Alias { .. } | Expr::Column { .. } => e,
-            _ => e.alias(schema.field(i).name()),
-        })
-        .collect::<Vec<_>>();
-
-    Projection::try_new_with_schema(alias_expr, input, schema)
-        .map(LogicalPlan::Projection)
-}
-
 /// Union two logical plans.
 pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalPlan> {
-    let left_col_num = left_plan.schema().fields().len();
-
-    // check union plan length same.
-    let right_col_num = right_plan.schema().fields().len();
-    if right_col_num != left_col_num {
-        return plan_err!(
-            "Union queries must have the same number of columns, (left is {left_col_num}, right is {right_col_num})");
-    }
-
-    // create union schema
-    let union_qualified_fields =
-        zip(left_plan.schema().iter(), right_plan.schema().iter())
-            .map(
-                |((left_qualifier, left_field), (_right_qualifier, right_field))| {
-                    let nullable = left_field.is_nullable() || right_field.is_nullable();
-                    let data_type = comparison_coercion(
-                        left_field.data_type(),
-                        right_field.data_type(),
-                    )
-                    .ok_or_else(|| {
-                        plan_datafusion_err!(
-                "UNION Column {} (type: {}) is not compatible with column {} (type: {})",
-                right_field.name(),
-                right_field.data_type(),
-                left_field.name(),
-                left_field.data_type()
-                )
-                    })?;
-                    Ok((
-                        left_qualifier.cloned(),
-                        Arc::new(Field::new(left_field.name(), data_type, nullable)),
-                    ))
-                },
-            )
-            .collect::<Result<Vec<_>>>()?;
-    let union_schema =
-        DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?;
-
-    let inputs = vec![left_plan, right_plan]
-        .into_iter()
-        .map(|p| {
-            let plan = coerce_plan_expr_for_schema(&p, &union_schema)?;
-            match plan {
-                LogicalPlan::Projection(Projection { expr, input, .. }) => {
-                    Ok(Arc::new(project_with_column_index(
-                        expr,
-                        input,
-                        Arc::new(union_schema.clone()),
-                    )?))
-                }
-                other_plan => Ok(Arc::new(other_plan)),
-            }
-        })
-        .collect::<Result<Vec<_>>>()?;
-
-    if inputs.is_empty() {
-        return plan_err!("Empty UNION");
-    }
-
+    // Temporarily use the schema from the left input and later rely on the analyzer to
+    // coerce the two schemas into a common one.
+    let schema = Arc::clone(left_plan.schema());
     Ok(LogicalPlan::Union(Union {
-        inputs,
-        schema: Arc::new(union_schema),
+        inputs: vec![Arc::new(left_plan), Arc::new(right_plan)],
+        schema,
     }))
 }
 
@@ -1440,22 +1357,11 @@ pub fn project(
     plan: LogicalPlan,
     expr: impl IntoIterator<Item = impl Into<Expr>>,
 ) -> Result<LogicalPlan> {
-    // TODO: move it into analyzer
-    let input_schema = plan.schema();
     let mut projected_expr = vec![];
     for e in expr {
         let e = e.into();
         match e {
-            Expr::Wildcard { qualifier: None } => {
-                projected_expr.extend(expand_wildcard(input_schema, &plan, None)?)
-            }
-            Expr::Wildcard {
-                qualifier: Some(qualifier),
-            } => projected_expr.extend(expand_qualified_wildcard(
-                &qualifier,
-                input_schema,
-                None,
-            )?),
+            Expr::Wildcard { .. } => projected_expr.push(e),
             _ => projected_expr.push(columnize_expr(normalize_col(e, &plan)?, &plan)?),
         }
     }
@@ -1807,26 +1713,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn plan_using_join_wildcard_projection() -> Result<()> {
-        let t2 = table_scan(Some("t2"), &employee_schema(), None)?.build()?;
-
-        let plan = table_scan(Some("t1"), &employee_schema(), None)?
-            .join_using(t2, JoinType::Inner, vec!["id"])?
-            .project(vec![Expr::Wildcard { qualifier: None }])?
-            .build()?;
-
-        // id column should only show up once in projection
-        let expected = "Projection: t1.id, t1.first_name, t1.last_name, t1.state, t1.salary, t2.first_name, t2.last_name, t2.state, t2.salary\
-        \n  Inner Join: Using t1.id = t2.id\
-        \n    TableScan: t1\
-        \n    TableScan: t2";
-
-        assert_eq!(expected, format!("{plan}"));
-
-        Ok(())
-    }
-
     #[test]
     fn plan_builder_union() -> Result<()> {
         let plan =
@@ -1881,23 +1767,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn plan_builder_union_different_num_columns_error() -> Result<()> {
-        let plan1 =
-            table_scan(TableReference::none(), &employee_schema(), Some(vec![3]))?;
-        let plan2 =
-            table_scan(TableReference::none(), &employee_schema(), Some(vec![3, 4]))?;
-
-        let expected = "Error during planning: Union queries must have the same number of columns, (left is 1, right is 2)";
-        let err_msg1 = plan1.clone().union(plan2.clone().build()?).unwrap_err();
-        let err_msg2 = plan1.union_distinct(plan2.build()?).unwrap_err();
-
-        assert_eq!(err_msg1.strip_backtrace(), expected);
-        assert_eq!(err_msg2.strip_backtrace(), expected);
-
-        Ok(())
-    }
-
     #[test]
     fn plan_builder_simple_distinct() -> Result<()> {
         let plan =
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index c5538d8880a7..2bab6d516a73 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -31,8 +31,9 @@ use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
 use crate::logical_plan::extension::UserDefinedLogicalNode;
 use crate::logical_plan::{DmlStatement, Statement};
 use crate::utils::{
-    enumerate_grouping_sets, exprlist_to_fields, find_out_reference_exprs,
-    grouping_set_expr_count, grouping_set_to_exprlist, split_conjunction,
+    enumerate_grouping_sets, exprlist_len, exprlist_to_fields, find_base_plan,
+    find_out_reference_exprs, grouping_set_expr_count, grouping_set_to_exprlist,
+    split_conjunction,
 };
 use crate::{
     build_join_schema, expr_vec_fmt, BinaryExpr, BuiltInWindowFunction,
@@ -1977,7 +1978,9 @@ impl Projection {
         input: Arc<LogicalPlan>,
         schema: DFSchemaRef,
     ) -> Result<Self> {
-        if expr.len() != schema.fields().len() {
+        if !expr.iter().any(|e| matches!(e, Expr::Wildcard { .. }))
+            && expr.len() != schema.fields().len()
+        {
             return plan_err!("Projection has mismatch between number of expressions ({}) and number of fields in schema ({})", expr.len(), schema.fields().len());
         }
         Ok(Self {
@@ -2763,20 +2766,48 @@ fn calc_func_dependencies_for_project(
     // Calculate expression indices (if present) in the input schema.
     let proj_indices = exprs
         .iter()
-        .filter_map(|expr| {
-            let expr_name = match expr {
-                Expr::Alias(alias) => {
-                    format!("{}", alias.expr)
-                }
-                _ => format!("{}", expr),
-            };
-            input_fields.iter().position(|item| *item == expr_name)
+        .map(|expr| match expr {
+            Expr::Wildcard { qualifier, options } => {
+                let wildcard_fields = exprlist_to_fields(
+                    vec![&Expr::Wildcard {
+                        qualifier: qualifier.clone(),
+                        options: options.clone(),
+                    }],
+                    input,
+                )?;
+                Ok::<_, DataFusionError>(
+                    wildcard_fields
+                        .into_iter()
+                        .filter_map(|(qualifier, f)| {
+                            let flat_name = qualifier
+                                .map(|t| format!("{}.{}", t, f.name()))
+                                .unwrap_or(f.name().clone());
+                            input_fields.iter().position(|item| *item == flat_name)
+                        })
+                        .collect::<Vec<_>>(),
+                )
+            }
+            Expr::Alias(alias) => Ok(input_fields
+                .iter()
+                .position(|item| *item == format!("{}", alias.expr))
+                .map(|i| vec![i])
+                .unwrap_or(vec![])),
+            _ => Ok(input_fields
+                .iter()
+                .position(|item| *item == format!("{}", expr))
+                .map(|i| vec![i])
+                .unwrap_or(vec![])),
         })
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .flatten()
         .collect::<Vec<_>>();
+
+    let len = exprlist_len(exprs, input.schema(), Some(find_base_plan(input).schema()))?;
     Ok(input
         .schema()
         .functional_dependencies()
-        .project_functional_dependencies(&proj_indices, exprs.len()))
+        .project_functional_dependencies(&proj_indices, len))
 }
 
 /// Sorts its input according to a list of sort expressions.
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 7b650d1ab448..4db5061e8fe7 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -21,7 +21,7 @@ use std::cmp::Ordering;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
-use crate::expr::{Alias, Sort, WindowFunction};
+use crate::expr::{Alias, Sort, WildcardOptions, WindowFunction};
 use crate::expr_rewriter::strip_outer_reference;
 use crate::{
     and, BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator,
@@ -34,11 +34,11 @@ use datafusion_common::tree_node::{
 };
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef, Result,
-    TableReference,
+    internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
+    DataFusionError, Result, TableReference,
 };
 
-use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, WildcardAdditionalOptions};
+use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem};
 
 pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
 
@@ -377,7 +377,7 @@ fn get_exprs_except_skipped(
 pub fn expand_wildcard(
     schema: &DFSchema,
     plan: &LogicalPlan,
-    wildcard_options: Option<&WildcardAdditionalOptions>,
+    wildcard_options: Option<&WildcardOptions>,
 ) -> Result<Vec<Expr>> {
     let using_columns = plan.using_columns()?;
     let mut columns_to_skip = using_columns
@@ -401,9 +401,9 @@ pub fn expand_wildcard(
                 .collect::<Vec<_>>()
         })
         .collect::<HashSet<_>>();
-    let excluded_columns = if let Some(WildcardAdditionalOptions {
-        opt_exclude,
-        opt_except,
+    let excluded_columns = if let Some(WildcardOptions {
+        exclude: opt_exclude,
+        except: opt_except,
         ..
     }) = wildcard_options
     {
@@ -420,7 +420,7 @@ pub fn expand_wildcard(
 pub fn expand_qualified_wildcard(
     qualifier: &TableReference,
     schema: &DFSchema,
-    wildcard_options: Option<&WildcardAdditionalOptions>,
+    wildcard_options: Option<&WildcardOptions>,
 ) -> Result<Vec<Expr>> {
     let qualified_indices = schema.fields_indices_with_qualified(qualifier);
     let projected_func_dependencies = schema
@@ -435,9 +435,9 @@ pub fn expand_qualified_wildcard(
     let qualified_dfschema =
         DFSchema::try_from_qualified_schema(qualifier.clone(), &qualified_schema)?
             .with_functional_dependencies(projected_func_dependencies)?;
-    let excluded_columns = if let Some(WildcardAdditionalOptions {
-        opt_exclude,
-        opt_except,
+    let excluded_columns = if let Some(WildcardOptions {
+        exclude: opt_exclude,
+        except: opt_except,
         ..
     }) = wildcard_options
     {
@@ -731,11 +731,129 @@ pub fn exprlist_to_fields<'a>(
     plan: &LogicalPlan,
 ) -> Result<Vec<(Option<TableReference>, Arc<Field>)>> {
     // look for exact match in plan's output schema
-    let input_schema = &plan.schema();
-    exprs
+    let wildcard_schema = find_base_plan(plan).schema();
+    let input_schema = plan.schema();
+    let result = exprs
         .into_iter()
-        .map(|e| e.to_field(input_schema))
-        .collect()
+        .map(|e| match e {
+            Expr::Wildcard { qualifier, options } => match qualifier {
+                None => {
+                    let excluded: Vec<String> = get_excluded_columns(
+                        options.exclude.as_ref(),
+                        options.except.as_ref(),
+                        wildcard_schema,
+                        None,
+                    )?
+                    .into_iter()
+                    .map(|c| c.flat_name())
+                    .collect();
+                    Ok::<_, DataFusionError>(
+                        wildcard_schema
+                            .field_names()
+                            .iter()
+                            .enumerate()
+                            .filter(|(_, s)| !excluded.contains(s))
+                            .map(|(i, _)| wildcard_schema.qualified_field(i))
+                            .map(|(qualifier, f)| {
+                                (qualifier.cloned(), Arc::new(f.to_owned()))
+                            })
+                            .collect::<Vec<_>>(),
+                    )
+                }
+                Some(qualifier) => {
+                    let excluded: Vec<String> = get_excluded_columns(
+                        options.exclude.as_ref(),
+                        options.except.as_ref(),
+                        wildcard_schema,
+                        Some(qualifier),
+                    )?
+                    .into_iter()
+                    .map(|c| c.flat_name())
+                    .collect();
+                    Ok(wildcard_schema
+                        .fields_with_qualified(qualifier)
+                        .into_iter()
+                        .filter_map(|field| {
+                            let flat_name = format!("{}.{}", qualifier, field.name());
+                            if excluded.contains(&flat_name) {
+                                None
+                            } else {
+                                Some((
+                                    Some(qualifier.clone()),
+                                    Arc::new(field.to_owned()),
+                                ))
+                            }
+                        })
+                        .collect::<Vec<_>>())
+                }
+            },
+            _ => Ok(vec![e.to_field(input_schema)?]),
+        })
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .flatten()
+        .collect();
+    Ok(result)
+}
+
+/// Find the suitable base plan to expand the wildcard expression recursively.
+/// When planning [LogicalPlan::Window] and [LogicalPlan::Aggregate], we will generate
+/// an intermediate plan based on the relation plan (e.g. [LogicalPlan::TableScan], [LogicalPlan::Subquery], ...).
+/// If we expand a wildcard expression basing the intermediate plan, we could get some duplicate fields.
+pub fn find_base_plan(input: &LogicalPlan) -> &LogicalPlan {
+    match input {
+        LogicalPlan::Window(window) => find_base_plan(&window.input),
+        LogicalPlan::Aggregate(agg) => find_base_plan(&agg.input),
+        _ => input,
+    }
+}
+
+/// Count the number of real fields. We should expand the wildcard expression to get the actual number.
+pub fn exprlist_len(
+    exprs: &[Expr],
+    schema: &DFSchemaRef,
+    wildcard_schema: Option<&DFSchemaRef>,
+) -> Result<usize> {
+    exprs
+        .iter()
+        .map(|e| match e {
+            Expr::Wildcard {
+                qualifier: None,
+                options,
+            } => {
+                let excluded = get_excluded_columns(
+                    options.exclude.as_ref(),
+                    options.except.as_ref(),
+                    wildcard_schema.unwrap_or(schema),
+                    None,
+                )?
+                .into_iter()
+                .collect::<HashSet<Column>>();
+                Ok(
+                    get_exprs_except_skipped(wildcard_schema.unwrap_or(schema), excluded)
+                        .len(),
+                )
+            }
+            Expr::Wildcard {
+                qualifier: Some(qualifier),
+                options,
+            } => {
+                let excluded = get_excluded_columns(
+                    options.exclude.as_ref(),
+                    options.except.as_ref(),
+                    wildcard_schema.unwrap_or(schema),
+                    Some(qualifier),
+                )?
+                .into_iter()
+                .collect::<HashSet<Column>>();
+                Ok(
+                    get_exprs_except_skipped(wildcard_schema.unwrap_or(schema), excluded)
+                        .len(),
+                )
+            }
+            _ => Ok(1),
+        })
+        .sum()
 }
 
 /// Convert an expression into Column expression if it's already provided as input plan.
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index 644221edd04d..3984b02c5fbb 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -20,6 +20,7 @@
 
 pub mod accumulate;
 pub mod bool_op;
+pub mod nulls;
 pub mod prim_op;
 
 use arrow::{
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
new file mode 100644
index 000000000000..25212f7f0f5f
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`set_nulls`], and [`filtered_null_mask`], utilities for working with nulls
+
+use arrow::array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
+use arrow::buffer::NullBuffer;
+
+/// Sets the validity mask for a `PrimitiveArray` to `nulls`
+/// replacing any existing null mask
+pub fn set_nulls<T: ArrowNumericType + Send>(
+    array: PrimitiveArray<T>,
+    nulls: Option<NullBuffer>,
+) -> PrimitiveArray<T> {
+    let (dt, values, _old_nulls) = array.into_parts();
+    PrimitiveArray::<T>::new(values, nulls).with_data_type(dt)
+}
+
+/// Converts a `BooleanBuffer` representing a filter to a `NullBuffer.
+///
+/// The `NullBuffer` is
+/// * `true` (representing valid) for values that were `true` in filter
+/// * `false` (representing null) for values that were `false` or `null` in filter
+fn filter_to_nulls(filter: &BooleanArray) -> Option<NullBuffer> {
+    let (filter_bools, filter_nulls) = filter.clone().into_parts();
+    let filter_bools = NullBuffer::from(filter_bools);
+    NullBuffer::union(Some(&filter_bools), filter_nulls.as_ref())
+}
+
+/// Compute an output validity mask for an array that has been filtered
+///
+/// This can be used to compute nulls for the output of
+/// [`GroupsAccumulator::convert_to_state`], which quickly applies an optional
+/// filter to the input rows by setting any filtered rows to NULL in the output.
+/// Subsequent applications of  aggregate functions that ignore NULLs (most of
+/// them) will thus ignore the filtered rows as well.
+///
+/// # Output element is `true` (and thus output is non-null)
+///
+/// A `true` in the output represents non null output for all values that were *both*:
+///
+/// * `true` in any `opt_filter` (aka values that passed the filter)
+///
+/// * `non null` in `input`
+///
+/// # Output element is `false` (and thus output is null)
+///
+/// A `false` in the output represents an input that was *either*:
+///
+/// * `null`
+///
+/// * filtered (aka the value was `false` or `null` in the filter)
+///
+/// # Example
+///
+/// ```text
+/// ┌─────┐           ┌─────┐            ┌─────┐
+/// │true │           │NULL │            │false│
+/// │true │    │      │true │            │true │
+/// │true │ ───┼───   │false│  ────────▶ │false│       filtered_nulls
+/// │false│    │      │NULL │            │false│
+/// │false│           │true │            │false│
+/// └─────┘           └─────┘            └─────┘
+/// array           opt_filter           output
+///  .nulls()
+///
+/// false = NULL    true  = pass          false = NULL       Meanings
+/// true  = valid   false = filter        true  = valid
+///                 NULL  = filter
+/// ```
+///
+/// [`GroupsAccumulator::convert_to_state`]: datafusion_expr_common::groups_accumulator::GroupsAccumulator
+pub fn filtered_null_mask(
+    opt_filter: Option<&BooleanArray>,
+    input: &dyn Array,
+) -> Option<NullBuffer> {
+    let opt_filter = opt_filter.and_then(filter_to_nulls);
+    NullBuffer::union(opt_filter.as_ref(), input.nulls())
+}
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 1be3cd6b0714..ddad76a8734b 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -19,8 +19,9 @@
 
 use arrow::array::{
     self, Array, ArrayRef, ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType,
-    AsArray, PrimitiveArray, PrimitiveBuilder, UInt64Array,
+    AsArray, BooleanArray, PrimitiveArray, PrimitiveBuilder, UInt64Array,
 };
+
 use arrow::compute::sum;
 use arrow::datatypes::{
     i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, DecimalType, Field,
@@ -34,7 +35,12 @@ use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
 };
+
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::{
+    filtered_null_mask, set_nulls,
+};
+
 use datafusion_functions_aggregate_common::utils::DecimalAverager;
 use log::debug;
 use std::any::Any;
@@ -551,6 +557,30 @@ where
         Ok(())
     }
 
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        let sums = values[0]
+            .as_primitive::<T>()
+            .clone()
+            .with_data_type(self.sum_data_type.clone());
+        let counts = UInt64Array::from_value(1, sums.len());
+
+        let nulls = filtered_null_mask(opt_filter, &sums);
+
+        // set nulls on the arrays
+        let counts = set_nulls(counts, nulls.clone());
+        let sums = set_nulls(sums, nulls);
+
+        Ok(vec![Arc::new(counts) as ArrayRef, Arc::new(sums)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
     fn size(&self) -> usize {
         self.counts.capacity() * std::mem::size_of::<u64>()
             + self.sums.capacity() * std::mem::size_of::<T>()
diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs
index e218b501dcf1..b6068fdff0d5 100644
--- a/datafusion/functions-nested/src/map.rs
+++ b/datafusion/functions-nested/src/map.rs
@@ -15,17 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::make_array::make_array;
+use std::any::Any;
+use std::collections::VecDeque;
+use std::sync::Arc;
+
 use arrow::array::ArrayData;
-use arrow_array::{Array, ArrayRef, MapArray, StructArray};
+use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray};
 use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::{DataType, Field, SchemaBuilder};
+
 use datafusion_common::{exec_err, ScalarValue};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
-use std::any::Any;
-use std::collections::VecDeque;
-use std::sync::Arc;
+
+use crate::make_array::make_array;
 
 /// Returns a map created from a key list and a value list
 pub fn map(keys: Vec<Expr>, values: Vec<Expr>) -> Expr {
@@ -56,11 +59,11 @@ fn make_map_batch(args: &[ColumnarValue]) -> datafusion_common::Result<ColumnarV
         );
     }
 
+    let data_type = args[0].data_type();
     let can_evaluate_to_const = can_evaluate_to_const(args);
-
     let key = get_first_array_ref(&args[0])?;
     let value = get_first_array_ref(&args[1])?;
-    make_map_batch_internal(key, value, can_evaluate_to_const)
+    make_map_batch_internal(key, value, can_evaluate_to_const, data_type)
 }
 
 fn get_first_array_ref(
@@ -73,7 +76,7 @@ fn get_first_array_ref(
             ScalarValue::FixedSizeList(array) => Ok(array.value(0)),
             _ => exec_err!("Expected array, got {:?}", value),
         },
-        ColumnarValue::Array(array) => exec_err!("Expected scalar, got {:?}", array),
+        ColumnarValue::Array(array) => Ok(array.to_owned()),
     }
 }
 
@@ -81,6 +84,7 @@ fn make_map_batch_internal(
     keys: ArrayRef,
     values: ArrayRef,
     can_evaluate_to_const: bool,
+    data_type: DataType,
 ) -> datafusion_common::Result<ColumnarValue> {
     if keys.null_count() > 0 {
         return exec_err!("map key cannot be null");
@@ -90,6 +94,14 @@ fn make_map_batch_internal(
         return exec_err!("map requires key and value lists to have the same length");
     }
 
+    if !can_evaluate_to_const {
+        return if let DataType::LargeList(..) = data_type {
+            make_map_array_internal::<i64>(keys, values)
+        } else {
+            make_map_array_internal::<i32>(keys, values)
+        };
+    }
+
     let key_field = Arc::new(Field::new("key", keys.data_type().clone(), false));
     let value_field = Arc::new(Field::new("value", values.data_type().clone(), true));
     let mut entry_struct_buffer: VecDeque<(Arc<Field>, ArrayRef)> = VecDeque::new();
@@ -190,7 +202,6 @@ impl ScalarUDFImpl for MapFunc {
         make_map_batch(args)
     }
 }
-
 fn get_element_type(data_type: &DataType) -> datafusion_common::Result<&DataType> {
     match data_type {
         DataType::List(element) => Ok(element.data_type()),
@@ -202,3 +213,115 @@ fn get_element_type(data_type: &DataType) -> datafusion_common::Result<&DataType
         ),
     }
 }
+
+/// Helper function to create MapArray from array of values to support arrays for Map scalar function
+///
+/// ``` text
+/// Format of input KEYS and VALUES column
+///         keys                        values
+/// +---------------------+       +---------------------+
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k11, k12, k13] | |       | | [v11, v12, v13] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k21, k22, k23] | |       | | [v21, v22, v23] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | |[k31, k32, k33]  | |       | |[v31, v32, v33]  | |
+/// | +-----------------+ |       | +-----------------+ |
+/// +---------------------+       +---------------------+
+/// ```
+/// Flattened keys and values array to user create `StructArray`,
+/// which serves as inner child for `MapArray`
+///
+/// ``` text
+/// Flattened           Flattened
+/// Keys                Values
+/// +-----------+      +-----------+
+/// | +-------+ |      | +-------+ |
+/// | |  k11  | |      | |  v11  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k12  | |      | |  v12  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k13  | |      | |  v13  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k21  | |      | |  v21  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k22  | |      | |  v22  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k23  | |      | |  v23  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k31  | |      | |  v31  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k32  | |      | |  v32  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k33  | |      | |  v33  | |
+/// | +-------+ |      | +-------+ |
+/// +-----------+      +-----------+
+/// ```text
+
+fn make_map_array_internal<O: OffsetSizeTrait>(
+    keys: ArrayRef,
+    values: ArrayRef,
+) -> datafusion_common::Result<ColumnarValue> {
+    let mut offset_buffer = vec![O::zero()];
+    let mut running_offset = O::zero();
+
+    let keys = datafusion_common::utils::list_to_arrays::<O>(keys);
+    let values = datafusion_common::utils::list_to_arrays::<O>(values);
+
+    let mut key_array_vec = vec![];
+    let mut value_array_vec = vec![];
+    for (k, v) in keys.iter().zip(values.iter()) {
+        running_offset = running_offset.add(O::usize_as(k.len()));
+        offset_buffer.push(running_offset);
+        key_array_vec.push(k.as_ref());
+        value_array_vec.push(v.as_ref());
+    }
+
+    // concatenate all the arrays
+    let flattened_keys = arrow::compute::concat(key_array_vec.as_ref())?;
+    if flattened_keys.null_count() > 0 {
+        return exec_err!("keys cannot be null");
+    }
+    let flattened_values = arrow::compute::concat(value_array_vec.as_ref())?;
+
+    let fields = vec![
+        Arc::new(Field::new("key", flattened_keys.data_type().clone(), false)),
+        Arc::new(Field::new(
+            "value",
+            flattened_values.data_type().clone(),
+            true,
+        )),
+    ];
+
+    let struct_data = ArrayData::builder(DataType::Struct(fields.into()))
+        .len(flattened_keys.len())
+        .add_child_data(flattened_keys.to_data())
+        .add_child_data(flattened_values.to_data())
+        .build()?;
+
+    let map_data = ArrayData::builder(DataType::Map(
+        Arc::new(Field::new(
+            "entries",
+            struct_data.data_type().clone(),
+            false,
+        )),
+        false,
+    ))
+    .len(keys.len())
+    .add_child_data(struct_data)
+    .add_buffer(Buffer::from_slice_ref(offset_buffer.as_slice()))
+    .build()?;
+    Ok(ColumnarValue::Array(Arc::new(MapArray::from(map_data))))
+}
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 8c5121397284..062a4a104d54 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -94,6 +94,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         nvl2(),
         arrow_typeof(),
         named_struct(),
+        get_field(),
         coalesce(),
     ]
 }
diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs
index 4f48ab188403..6048eeeaa554 100644
--- a/datafusion/functions/src/datetime/common.rs
+++ b/datafusion/functions/src/datetime/common.rs
@@ -28,7 +28,9 @@ use chrono::{DateTime, TimeZone, Utc};
 use itertools::Either;
 
 use datafusion_common::cast::as_generic_string_array;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarType, ScalarValue};
+use datafusion_common::{
+    exec_err, unwrap_or_internal_err, DataFusionError, Result, ScalarType, ScalarValue,
+};
 use datafusion_expr::ColumnarValue;
 
 /// Error message if nanosecond conversion request beyond supported interval
@@ -227,46 +229,34 @@ where
         // if the first argument is a scalar utf8 all arguments are expected to be scalar utf8
         ColumnarValue::Scalar(scalar) => match scalar {
             ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
-                let mut val: Option<Result<ColumnarValue>> = None;
-                let mut err: Option<DataFusionError> = None;
+                let a = a.as_ref();
+                // ASK: Why do we trust `a` to be non-null at this point?
+                let a = unwrap_or_internal_err!(a);
 
-                match a {
-                    Some(a) => {
-                        // enumerate all the values finding the first one that returns an Ok result
-                        for (pos, v) in args.iter().enumerate().skip(1) {
-                            if let ColumnarValue::Scalar(s) = v {
-                                if let ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x) =
-                                    s
-                                {
-                                    if let Some(s) = x {
-                                        match op(a.as_str(), s.as_str()) {
-                                            Ok(r) => {
-                                                val = Some(Ok(ColumnarValue::Scalar(
-                                                    S::scalar(Some(op2(r))),
-                                                )));
-                                                break;
-                                            }
-                                            Err(e) => {
-                                                err = Some(e);
-                                            }
-                                        }
-                                    }
-                                } else {
-                                    return exec_err!("Unsupported data type {s:?} for function {name}, arg # {pos}");
-                                }
-                            } else {
-                                return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}");
+                let mut ret = None;
+
+                for (pos, v) in args.iter().enumerate().skip(1) {
+                    let ColumnarValue::Scalar(
+                        ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x),
+                    ) = v
+                    else {
+                        return exec_err!("Unsupported data type {v:?} for function {name}, arg # {pos}");
+                    };
+
+                    if let Some(s) = x {
+                        match op(a.as_str(), s.as_str()) {
+                            Ok(r) => {
+                                ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some(
+                                    op2(r),
+                                )))));
+                                break;
                             }
+                            Err(e) => ret = Some(Err(e)),
                         }
                     }
-                    None => (),
                 }
 
-                if let Some(v) = val {
-                    v
-                } else {
-                    Err(err.unwrap())
-                }
+                unwrap_or_internal_err!(ret)
             }
             other => {
                 exec_err!("Unsupported data type {other:?} for function {name}")
diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs
index e491c0b55508..cc5ffa73c8f1 100644
--- a/datafusion/functions/src/datetime/to_date.rs
+++ b/datafusion/functions/src/datetime/to_date.rs
@@ -58,7 +58,7 @@ impl ToDateFunc {
                 },
                 "to_date",
             ),
-            n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
+            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
                 args,
                 |s, format| {
                     string_to_timestamp_nanos_formatted(s, format)
@@ -72,7 +72,7 @@ impl ToDateFunc {
                 |n| n,
                 "to_date",
             ),
-            _ => exec_err!("Unsupported 0 argument count for function to_date"),
+            0 => exec_err!("Unsupported 0 argument count for function to_date"),
         }
     }
 }
diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs
index 349928d09664..371a11c82c54 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -16,9 +16,8 @@
 // under the License.
 
 use arrow::array::{ArrayRef, OffsetSizeTrait};
-use std::any::Any;
-
 use arrow::datatypes::DataType;
+use std::any::Any;
 
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::function::Hint;
@@ -32,7 +31,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed.
 /// btrim('xyxtrimyyx', 'xyz') = 'trim'
 fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Both)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Both, use_string_view)
 }
 
 #[derive(Debug)]
@@ -52,7 +52,15 @@ impl BTrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
             aliases: vec![String::from("trim")],
@@ -79,7 +87,7 @@ impl ScalarUDFImpl for BTrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 btrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -87,7 +95,10 @@ impl ScalarUDFImpl for BTrimFunc {
                 btrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function btrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function btrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 
diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs
index d36bd5cecc47..7037c1d1c3c3 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -25,7 +25,7 @@ use arrow::array::{
 use arrow::buffer::{Buffer, MutableBuffer, NullBuffer};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::Result;
 use datafusion_common::{exec_err, ScalarValue};
 use datafusion_expr::ColumnarValue;
@@ -49,6 +49,7 @@ impl Display for TrimType {
 pub(crate) fn general_trim<T: OffsetSizeTrait>(
     args: &[ArrayRef],
     trim_type: TrimType,
+    use_string_view: bool,
 ) -> Result<ArrayRef> {
     let func = match trim_type {
         TrimType::Left => |input, pattern: &str| {
@@ -68,6 +69,74 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
         },
     };
 
+    if use_string_view {
+        string_view_trim::<T>(trim_type, func, args)
+    } else {
+        string_trim::<T>(trim_type, func, args)
+    }
+}
+
+// removing 'a will cause compiler complaining lifetime of `func`
+fn string_view_trim<'a, T: OffsetSizeTrait>(
+    trim_type: TrimType,
+    func: fn(&'a str, &'a str) -> &'a str,
+    args: &'a [ArrayRef],
+) -> Result<ArrayRef> {
+    let string_array = as_string_view_array(&args[0])?;
+
+    match args.len() {
+        1 => {
+            let result = string_array
+                .iter()
+                .map(|string| string.map(|string: &str| func(string, " ")))
+                .collect::<GenericStringArray<T>>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        2 => {
+            let characters_array = as_string_view_array(&args[1])?;
+
+            if characters_array.len() == 1 {
+                if characters_array.is_null(0) {
+                    return Ok(new_null_array(
+                        // The schema is expecting utf8 as null
+                        &DataType::Utf8,
+                        string_array.len(),
+                    ));
+                }
+
+                let characters = characters_array.value(0);
+                let result = string_array
+                    .iter()
+                    .map(|item| item.map(|string| func(string, characters)))
+                    .collect::<GenericStringArray<T>>();
+                return Ok(Arc::new(result) as ArrayRef);
+            }
+
+            let result = string_array
+                .iter()
+                .zip(characters_array.iter())
+                .map(|(string, characters)| match (string, characters) {
+                    (Some(string), Some(characters)) => Some(func(string, characters)),
+                    _ => None,
+                })
+                .collect::<GenericStringArray<T>>();
+
+            Ok(Arc::new(result) as ArrayRef)
+        }
+        other => {
+            exec_err!(
+            "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
+            )
+        }
+    }
+}
+
+fn string_trim<'a, T: OffsetSizeTrait>(
+    trim_type: TrimType,
+    func: fn(&'a str, &'a str) -> &'a str,
+    args: &'a [ArrayRef],
+) -> Result<ArrayRef> {
     let string_array = as_generic_string_array::<T>(&args[0])?;
 
     match args.len() {
@@ -84,7 +153,10 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
 
             if characters_array.len() == 1 {
                 if characters_array.is_null(0) {
-                    return Ok(new_null_array(args[0].data_type(), args[0].len()));
+                    return Ok(new_null_array(
+                        string_array.data_type(),
+                        string_array.len(),
+                    ));
                 }
 
                 let characters = characters_array.value(0);
@@ -109,7 +181,7 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
         other => {
             exec_err!(
             "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
-        )
+            )
         }
     }
 }
diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs
index b72cf0f66fa6..03a1795954d0 100644
--- a/datafusion/functions/src/string/ends_with.rs
+++ b/datafusion/functions/src/string/ends_with.rs
@@ -18,12 +18,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, OffsetSizeTrait};
+use arrow::array::ArrayRef;
 use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::Boolean;
 
-use datafusion_common::cast::as_generic_string_array;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
@@ -43,14 +41,15 @@ impl Default for EndsWithFunc {
 
 impl EndsWithFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![Utf8, LargeUtf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![LargeUtf8, LargeUtf8]),
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
@@ -72,15 +71,16 @@ impl ScalarUDFImpl for EndsWithFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Boolean)
+        Ok(DataType::Boolean)
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(ends_with::<i32>, vec![])(args),
-            DataType::LargeUtf8 => make_scalar_function(ends_with::<i64>, vec![])(args),
+            DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => {
+                make_scalar_function(ends_with, vec![])(args)
+            }
             other => {
-                exec_err!("Unsupported data type {other:?} for function ends_with")
+                internal_err!("Unsupported data type {other:?} for function ends_with. Expected Utf8, LargeUtf8 or Utf8View")?
             }
         }
     }
@@ -88,11 +88,8 @@ impl ScalarUDFImpl for EndsWithFunc {
 
 /// Returns true if string ends with suffix.
 /// ends_with('alphabet', 'abet') = 't'
-pub fn ends_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let left = as_generic_string_array::<T>(&args[0])?;
-    let right = as_generic_string_array::<T>(&args[1])?;
-
-    let result = arrow::compute::kernels::comparison::ends_with(left, right)?;
+pub fn ends_with(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let result = arrow::compute::kernels::comparison::ends_with(&args[0], &args[1])?;
 
     Ok(Arc::new(result) as ArrayRef)
 }
diff --git a/datafusion/functions/src/string/initcap.rs b/datafusion/functions/src/string/initcap.rs
index 864179d130fd..4e1eb213ef57 100644
--- a/datafusion/functions/src/string/initcap.rs
+++ b/datafusion/functions/src/string/initcap.rs
@@ -18,10 +18,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
@@ -45,7 +45,7 @@ impl InitcapFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
@@ -73,6 +73,7 @@ impl ScalarUDFImpl for InitcapFunc {
         match args[0].data_type() {
             DataType::Utf8 => make_scalar_function(initcap::<i32>, vec![])(args),
             DataType::LargeUtf8 => make_scalar_function(initcap::<i64>, vec![])(args),
+            DataType::Utf8View => make_scalar_function(initcap_utf8view, vec![])(args),
             other => {
                 exec_err!("Unsupported data type {other:?} for function initcap")
             }
@@ -88,28 +89,41 @@ fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     // first map is the iterator, second is for the `Option<_>`
     let result = string_array
         .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut char_vector = Vec::<char>::new();
-                let mut previous_character_letter_or_number = false;
-                for c in string.chars() {
-                    if previous_character_letter_or_number {
-                        char_vector.push(c.to_ascii_lowercase());
-                    } else {
-                        char_vector.push(c.to_ascii_uppercase());
-                    }
-                    previous_character_letter_or_number = c.is_ascii_uppercase()
-                        || c.is_ascii_lowercase()
-                        || c.is_ascii_digit();
-                }
-                char_vector.iter().collect::<String>()
-            })
-        })
+        .map(initcap_string)
         .collect::<GenericStringArray<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
+fn initcap_utf8view(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_view_array = as_string_view_array(&args[0])?;
+
+    let result = string_view_array
+        .iter()
+        .map(initcap_string)
+        .collect::<StringArray>();
+
+    Ok(Arc::new(result) as ArrayRef)
+}
+
+fn initcap_string(string: Option<&str>) -> Option<String> {
+    let mut char_vector = Vec::<char>::new();
+    string.map(|string: &str| {
+        char_vector.clear();
+        let mut previous_character_letter_or_number = false;
+        for c in string.chars() {
+            if previous_character_letter_or_number {
+                char_vector.push(c.to_ascii_lowercase());
+            } else {
+                char_vector.push(c.to_ascii_uppercase());
+            }
+            previous_character_letter_or_number =
+                c.is_ascii_uppercase() || c.is_ascii_lowercase() || c.is_ascii_digit();
+        }
+        char_vector.iter().collect::<String>()
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use crate::string::initcap::InitcapFunc;
@@ -153,6 +167,44 @@ mod tests {
             Utf8,
             StringArray
         );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "hi THOMAS".to_string()
+            )))],
+            Ok(Some("Hi Thomas")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "hi THOMAS wIth M0re ThAN 12 ChaRs".to_string()
+            )))],
+            Ok(Some("Hi Thomas With M0re Than 12 Chars")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                "".to_string()
+            )))],
+            Ok(Some("")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            InitcapFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(None))],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
+        );
 
         Ok(())
     }
diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs
index 3edf6de8c863..430c402a50c5 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -22,7 +22,7 @@ use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 
 use crate::utils::{make_scalar_function, utf8_to_int_type};
-use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
 use datafusion_common::utils::datafusion_strsim;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
@@ -42,10 +42,13 @@ impl Default for LevenshteinFunc {
 
 impl LevenshteinFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Utf8]), Exact(vec![LargeUtf8, LargeUtf8])],
+                vec![
+                    Exact(vec![DataType::Utf8View, DataType::Utf8View]),
+                    Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -71,7 +74,9 @@ impl ScalarUDFImpl for LevenshteinFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(levenshtein::<i32>, vec![])(args),
+            DataType::Utf8View | DataType::Utf8 => {
+                make_scalar_function(levenshtein::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(levenshtein::<i64>, vec![])(args),
             other => {
                 exec_err!("Unsupported data type {other:?} for function levenshtein")
@@ -89,10 +94,26 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             args.len()
         );
     }
-    let str1_array = as_generic_string_array::<T>(&args[0])?;
-    let str2_array = as_generic_string_array::<T>(&args[1])?;
+
     match args[0].data_type() {
+        DataType::Utf8View => {
+            let str1_array = as_string_view_array(&args[0])?;
+            let str2_array = as_string_view_array(&args[1])?;
+            let result = str1_array
+                .iter()
+                .zip(str2_array.iter())
+                .map(|(string1, string2)| match (string1, string2) {
+                    (Some(string1), Some(string2)) => {
+                        Some(datafusion_strsim::levenshtein(string1, string2) as i32)
+                    }
+                    _ => None,
+                })
+                .collect::<Int32Array>();
+            Ok(Arc::new(result) as ArrayRef)
+        }
         DataType::Utf8 => {
+            let str1_array = as_generic_string_array::<T>(&args[0])?;
+            let str2_array = as_generic_string_array::<T>(&args[1])?;
             let result = str1_array
                 .iter()
                 .zip(str2_array.iter())
@@ -106,6 +127,8 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             Ok(Arc::new(result) as ArrayRef)
         }
         DataType::LargeUtf8 => {
+            let str1_array = as_generic_string_array::<T>(&args[0])?;
+            let str2_array = as_generic_string_array::<T>(&args[1])?;
             let result = str1_array
                 .iter()
                 .zip(str2_array.iter())
@@ -120,7 +143,7 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
         other => {
             exec_err!(
-                "levenshtein was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
+                "levenshtein was called with {other} datatype arguments. It requires Utf8View, Utf8 or LargeUtf8."
             )
         }
     }
diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs
index de14bbaa2bcf..b7b27afcee1f 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with leading characters removed. If the characters are not specified, whitespace is removed.
 /// ltrim('zzzytest', 'xyz') = 'test'
 fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Left)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Left, use_string_view)
 }
 
 #[derive(Debug)]
@@ -51,7 +52,15 @@ impl LtrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for LtrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 ltrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for LtrimFunc {
                 ltrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function ltrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function ltrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 }
diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs
index 12980fab1f11..f792914d862e 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -43,7 +43,7 @@ impl OctetLengthFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
@@ -84,6 +84,9 @@ impl ScalarUDFImpl for OctetLengthFunc {
                 ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
                     ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
                 )),
+                ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
+                    ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
+                )),
                 _ => unreachable!(),
             },
         }
@@ -176,6 +179,36 @@ mod tests {
             Int32,
             Int32Array
         );
+        test_function!(
+            OctetLengthFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                String::from("joséjoséjoséjosé")
+            )))],
+            Ok(Some(20)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            OctetLengthFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                String::from("josé")
+            )))],
+            Ok(Some(5)),
+            i32,
+            Int32,
+            Int32Array
+        );
+        test_function!(
+            OctetLengthFunc::new(),
+            &[ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
+                String::from("")
+            )))],
+            Ok(Some(0)),
+            i32,
+            Int32,
+            Int32Array
+        );
 
         Ok(())
     }
diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs
index 2d29b50cb173..ec53f3ed7430 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with trailing characters removed. If the characters are not specified, whitespace is removed.
 /// rtrim('testxxzx', 'xyz') = 'test'
 fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Right)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Right, use_string_view)
 }
 
 #[derive(Debug)]
@@ -51,7 +52,15 @@ impl RtrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for RtrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 rtrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for RtrimFunc {
                 rtrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function rtrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function rtrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 }
diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs
index 7c864bc191d7..41a2b9d9e72d 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -19,11 +19,11 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    ArrayRef, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+    ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
+    PrimitiveArray,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 
-use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -46,7 +46,11 @@ impl FindInSetFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Utf8]), Exact(vec![LargeUtf8, LargeUtf8])],
+                vec![
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -71,41 +75,52 @@ impl ScalarUDFImpl for FindInSetFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(find_in_set::<Int32Type>, vec![])(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(find_in_set::<Int64Type>, vec![])(args)
-            }
-            other => {
-                exec_err!("Unsupported data type {other:?} for function find_in_set")
-            }
-        }
+        make_scalar_function(find_in_set, vec![])(args)
     }
 }
 
 ///Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings
 ///A string list is a string composed of substrings separated by , characters.
-pub fn find_in_set<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: OffsetSizeTrait,
-{
+fn find_in_set(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 2 {
         return exec_err!(
             "find_in_set was called with {} arguments. It requires 2.",
             args.len()
         );
     }
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            let string_array = args[0].as_string::<i32>();
+            let str_list_array = args[1].as_string::<i32>();
+            find_in_set_general::<Int32Type, _>(string_array, str_list_array)
+        }
+        DataType::LargeUtf8 => {
+            let string_array = args[0].as_string::<i64>();
+            let str_list_array = args[1].as_string::<i64>();
+            find_in_set_general::<Int64Type, _>(string_array, str_list_array)
+        }
+        DataType::Utf8View => {
+            let string_array = args[0].as_string_view();
+            let str_list_array = args[1].as_string_view();
+            find_in_set_general::<Int32Type, _>(string_array, str_list_array)
+        }
+        other => {
+            exec_err!("Unsupported data type {other:?} for function find_in_set")
+        }
+    }
+}
 
-    let str_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[0])?;
-    let str_list_array: &GenericStringArray<T::Native> =
-        as_generic_string_array::<T::Native>(&args[1])?;
-
-    let result = str_array
-        .iter()
-        .zip(str_list_array.iter())
+pub fn find_in_set_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
+    string_array: V,
+    str_list_array: V,
+) -> Result<ArrayRef>
+where
+    T::Native: OffsetSizeTrait,
+{
+    let string_iter = ArrayIter::new(string_array);
+    let str_list_iter = ArrayIter::new(str_list_array);
+    let result = string_iter
+        .zip(str_list_iter)
         .map(|(string, str_list)| match (string, str_list) {
             (Some(string), Some(str_list)) => {
                 let mut res = 0;
diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs
index 20cbbe020ff1..9d542bb2c006 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -19,17 +19,21 @@ use std::any::Any;
 use std::cmp::{max, Ordering};
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
+use arrow::array::{
+    Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
+    OffsetSizeTrait,
+};
 use arrow::datatypes::DataType;
 
-use datafusion_common::cast::{as_generic_string_array, as_int64_array};
+use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_common::cast::{
+    as_generic_string_array, as_int64_array, as_string_view_array,
+};
 use datafusion_common::exec_err;
 use datafusion_common::Result;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
 #[derive(Debug)]
 pub struct RightFunc {
     signature: Signature,
@@ -46,7 +50,11 @@ impl RightFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])],
+                vec![
+                    Exact(vec![Utf8View, Int64]),
+                    Exact(vec![Utf8, Int64]),
+                    Exact(vec![LargeUtf8, Int64]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -72,9 +80,14 @@ impl ScalarUDFImpl for RightFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(right::<i32>, vec![])(args),
+            DataType::Utf8 | DataType::Utf8View => {
+                make_scalar_function(right::<i32>, vec![])(args)
+            }
             DataType::LargeUtf8 => make_scalar_function(right::<i64>, vec![])(args),
-            other => exec_err!("Unsupported data type {other:?} for function right"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function right,\
+            expected Utf8View, Utf8 or LargeUtf8."
+            ),
         }
     }
 }
@@ -83,11 +96,26 @@ impl ScalarUDFImpl for RightFunc {
 /// right('abcde', 2) = 'de'
 /// The implementation uses UTF-8 code points as characters
 pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
     let n_array = as_int64_array(&args[1])?;
+    if args[0].data_type() == &DataType::Utf8View {
+        // string_view_right(args)
+        let string_array = as_string_view_array(&args[0])?;
+        right_impl::<T, _>(&mut string_array.iter(), n_array)
+    } else {
+        // string_right::<T>(args)
+        let string_array = &as_generic_string_array::<T>(&args[0])?;
+        right_impl::<T, _>(&mut string_array.iter(), n_array)
+    }
+}
 
-    let result = string_array
-        .iter()
+// Currently the return type can only be Utf8 or LargeUtf8, to reach fully support, we need
+// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able to return Utf8View
+// See https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
+fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+    string_array_iter: &mut ArrayIter<V>,
+    n_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let result = string_array_iter
         .zip(n_array.iter())
         .map(|(string, n)| match (string, n) {
             (Some(string), Some(n)) => match n.cmp(&0) {
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 8ff00917dcb1..593dab2bc9a2 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -48,7 +48,13 @@ impl AnalyzerRule for CountWildcardRule {
 }
 
 fn is_wildcard(expr: &Expr) -> bool {
-    matches!(expr, Expr::Wildcard { qualifier: None })
+    matches!(
+        expr,
+        Expr::Wildcard {
+            qualifier: None,
+            ..
+        }
+    )
 }
 
 fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
new file mode 100644
index 000000000000..53ba3042f522
--- /dev/null
+++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
@@ -0,0 +1,304 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use crate::AnalyzerRule;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::tree_node::{Transformed, TransformedResult};
+use datafusion_common::{Column, Result};
+use datafusion_expr::builder::validate_unique_names;
+use datafusion_expr::expr::PlannedReplaceSelectItem;
+use datafusion_expr::utils::{
+    expand_qualified_wildcard, expand_wildcard, find_base_plan,
+};
+use datafusion_expr::{Expr, LogicalPlan, Projection, SubqueryAlias};
+
+#[derive(Default)]
+pub struct ExpandWildcardRule {}
+
+impl ExpandWildcardRule {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl AnalyzerRule for ExpandWildcardRule {
+    fn analyze(&self, plan: LogicalPlan, _: &ConfigOptions) -> Result<LogicalPlan> {
+        // Because the wildcard expansion is based on the schema of the input plan,
+        // using `transform_up_with_subqueries` here.
+        plan.transform_up_with_subqueries(expand_internal).data()
+    }
+
+    fn name(&self) -> &str {
+        "expand_wildcard_rule"
+    }
+}
+
+fn expand_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
+    match plan {
+        LogicalPlan::Projection(Projection { expr, input, .. }) => {
+            let projected_expr = expand_exprlist(&input, expr)?;
+            validate_unique_names("Projections", projected_expr.iter())?;
+            Ok(Transformed::yes(
+                Projection::try_new(projected_expr, Arc::clone(&input))
+                    .map(LogicalPlan::Projection)?,
+            ))
+        }
+        // Teh schema of the plan should also be updated if the child plan is transformed.
+        LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => {
+            Ok(Transformed::yes(
+                SubqueryAlias::try_new(input, alias).map(LogicalPlan::SubqueryAlias)?,
+            ))
+        }
+        _ => Ok(Transformed::no(plan)),
+    }
+}
+
+fn expand_exprlist(input: &LogicalPlan, expr: Vec<Expr>) -> Result<Vec<Expr>> {
+    let mut projected_expr = vec![];
+    let input = find_base_plan(input);
+    for e in expr {
+        match e {
+            Expr::Wildcard { qualifier, options } => {
+                if let Some(qualifier) = qualifier {
+                    let expanded = expand_qualified_wildcard(
+                        &qualifier,
+                        input.schema(),
+                        Some(&options),
+                    )?;
+                    // If there is a REPLACE statement, replace that column with the given
+                    // replace expression. Column name remains the same.
+                    let replaced = if let Some(replace) = options.replace {
+                        replace_columns(expanded, replace)?
+                    } else {
+                        expanded
+                    };
+                    projected_expr.extend(replaced);
+                } else {
+                    let expanded =
+                        expand_wildcard(input.schema(), input, Some(&options))?;
+                    // If there is a REPLACE statement, replace that column with the given
+                    // replace expression. Column name remains the same.
+                    let replaced = if let Some(replace) = options.replace {
+                        replace_columns(expanded, replace)?
+                    } else {
+                        expanded
+                    };
+                    projected_expr.extend(replaced);
+                }
+            }
+            // A workaround to handle the case when the column name is "*".
+            // We transform the expression to a Expr::Column through [Column::from_name] in many places.
+            // It would also convert the wildcard expression to a column expression with name "*".
+            Expr::Column(Column {
+                ref relation,
+                ref name,
+            }) => {
+                if name.eq("*") {
+                    if let Some(qualifier) = relation {
+                        projected_expr.extend(expand_qualified_wildcard(
+                            qualifier,
+                            input.schema(),
+                            None,
+                        )?);
+                    } else {
+                        projected_expr.extend(expand_wildcard(
+                            input.schema(),
+                            input,
+                            None,
+                        )?);
+                    }
+                } else {
+                    projected_expr.push(e.clone());
+                }
+            }
+            _ => projected_expr.push(e),
+        }
+    }
+    Ok(projected_expr)
+}
+
+/// If there is a REPLACE statement in the projected expression in the form of
+/// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces
+/// that column with the given replace expression. Column name remains the same.
+/// Multiple REPLACEs are also possible with comma separations.
+fn replace_columns(
+    mut exprs: Vec<Expr>,
+    replace: PlannedReplaceSelectItem,
+) -> Result<Vec<Expr>> {
+    for expr in exprs.iter_mut() {
+        if let Expr::Column(Column { name, .. }) = expr {
+            if let Some((_, new_expr)) = replace
+                .items()
+                .iter()
+                .zip(replace.expressions().iter())
+                .find(|(item, _)| item.column_name.value == *name)
+            {
+                *expr = new_expr.clone().alias(name.clone())
+            }
+        }
+    }
+    Ok(exprs)
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::datatypes::{DataType, Field, Schema};
+
+    use datafusion_common::{JoinType, TableReference};
+    use datafusion_expr::{
+        col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder,
+    };
+
+    use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan};
+    use crate::Analyzer;
+
+    use super::*;
+
+    fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> {
+        assert_analyzed_plan_eq_display_indent(
+            Arc::new(ExpandWildcardRule::new()),
+            plan,
+            expected,
+        )
+    }
+
+    #[test]
+    fn test_expand_wildcard() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![wildcard()])?
+            .build()?;
+        let expected =
+            "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_expand_qualified_wildcard() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![qualified_wildcard(TableReference::bare("test"))])?
+            .build()?;
+        let expected =
+            "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_expand_qualified_wildcard_in_subquery() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .project(vec![qualified_wildcard(TableReference::bare("test"))])?
+            .build()?;
+        let plan = LogicalPlanBuilder::from(plan)
+            .project(vec![wildcard()])?
+            .build()?;
+        let expected =
+            "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_expand_wildcard_in_subquery() -> Result<()> {
+        let projection_a = LogicalPlanBuilder::from(test_table_scan()?)
+            .project(vec![col("a")])?
+            .build()?;
+        let subquery = LogicalPlanBuilder::from(projection_a)
+            .project(vec![wildcard()])?
+            .build()?;
+        let plan = LogicalPlanBuilder::from(test_table_scan()?)
+            .filter(in_subquery(col("a"), Arc::new(subquery)))?
+            .project(vec![wildcard()])?
+            .build()?;
+        let expected = "\
+        Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\
+        \n  Filter: test.a IN (<subquery>) [a:UInt32, b:UInt32, c:UInt32]\
+        \n    Subquery: [a:UInt32]\
+        \n      Projection: test.a [a:UInt32]\
+        \n        Projection: test.a [a:UInt32]\
+        \n          TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
+        \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
+        assert_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_subquery_schema() -> Result<()> {
+        let analyzer = Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]);
+        let options = ConfigOptions::default();
+        let subquery = LogicalPlanBuilder::from(test_table_scan()?)
+            .project(vec![wildcard()])?
+            .build()?;
+        let plan = LogicalPlanBuilder::from(subquery)
+            .alias("sub")?
+            .project(vec![wildcard()])?
+            .build()?;
+        let analyzed_plan = analyzer.execute_and_check(plan, &options, |_, _| {})?;
+        for x in analyzed_plan.inputs() {
+            for field in x.schema().fields() {
+                assert_ne!(field.name(), "*");
+            }
+        }
+        Ok(())
+    }
+
+    fn employee_schema() -> Schema {
+        Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("first_name", DataType::Utf8, false),
+            Field::new("last_name", DataType::Utf8, false),
+            Field::new("state", DataType::Utf8, false),
+            Field::new("salary", DataType::Int32, false),
+        ])
+    }
+
+    #[test]
+    fn plan_using_join_wildcard_projection() -> Result<()> {
+        let t2 = table_scan(Some("t2"), &employee_schema(), None)?.build()?;
+
+        let plan = table_scan(Some("t1"), &employee_schema(), None)?
+            .join_using(t2, JoinType::Inner, vec!["id"])?
+            .project(vec![wildcard()])?
+            .build()?;
+
+        let expected = "Projection: *\
+        \n  Inner Join: Using t1.id = t2.id\
+        \n    TableScan: t1\
+        \n    TableScan: t2";
+
+        assert_eq!(expected, format!("{plan}"));
+
+        let analyzer = Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]);
+        let options = ConfigOptions::default();
+
+        let analyzed_plan = analyzer.execute_and_check(plan, &options, |_, _| {})?;
+
+        // id column should only show up once in projection
+        let expected = "Projection: t1.id, t1.first_name, t1.last_name, t1.state, t1.salary, t2.first_name, t2.last_name, t2.state, t2.salary\
+        \n  Inner Join: Using t1.id = t2.id\
+        \n    TableScan: t1\
+        \n    TableScan: t2";
+        assert_eq!(expected, format!("{analyzed_plan}"));
+
+        Ok(())
+    }
+}
diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
index 73ab37cb11d8..b69b8410da49 100644
--- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs
+++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
@@ -23,6 +23,7 @@ use crate::analyzer::AnalyzerRule;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Column, Result};
+use datafusion_expr::expr::WildcardOptions;
 use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder, TableScan};
 
 /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`]
@@ -93,7 +94,10 @@ fn generate_projection_expr(
             )));
         }
     } else {
-        exprs.push(Expr::Wildcard { qualifier: None });
+        exprs.push(Expr::Wildcard {
+            qualifier: None,
+            options: WildcardOptions::default(),
+        });
     }
     Ok(exprs)
 }
@@ -178,7 +182,7 @@ mod tests {
         let plan = scan.filter(col("x.a").eq(lit(1)))?.build()?;
         let expected = "Filter: x.a = Int32(1)\
         \n  SubqueryAlias: x\
-        \n    Projection: y.a, y.b\
+        \n    Projection: *\
         \n      TableScan: y";
 
         assert_analyzed_plan_eq(Arc::new(InlineTableScan::new()), plan, expected)
diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs
index 91ee8a9e1033..6e2afeca88c9 100644
--- a/datafusion/optimizer/src/analyzer/mod.rs
+++ b/datafusion/optimizer/src/analyzer/mod.rs
@@ -30,6 +30,7 @@ use datafusion_expr::expr_rewriter::FunctionRewrite;
 use datafusion_expr::{Expr, LogicalPlan};
 
 use crate::analyzer::count_wildcard_rule::CountWildcardRule;
+use crate::analyzer::expand_wildcard_rule::ExpandWildcardRule;
 use crate::analyzer::inline_table_scan::InlineTableScan;
 use crate::analyzer::subquery::check_subquery_expr;
 use crate::analyzer::type_coercion::TypeCoercion;
@@ -38,6 +39,7 @@ use crate::utils::log_plan;
 use self::function_rewrite::ApplyFunctionRewrites;
 
 pub mod count_wildcard_rule;
+pub mod expand_wildcard_rule;
 pub mod function_rewrite;
 pub mod inline_table_scan;
 pub mod subquery;
@@ -89,6 +91,9 @@ impl Analyzer {
     pub fn new() -> Self {
         let rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>> = vec![
             Arc::new(InlineTableScan::new()),
+            // Every rule that will generate [Expr::Wildcard] should be placed in front of [ExpandWildcardRule].
+            Arc::new(ExpandWildcardRule::new()),
+            // [Expr::Wildcard] should be expanded before [TypeCoercion]
             Arc::new(TypeCoercion::new()),
             Arc::new(CountWildcardRule::new()),
         ];
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 2bb859d84ad7..40efbba6de7a 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -17,20 +17,26 @@
 
 //! Optimizer rule for type validation and coercion
 
+use std::collections::HashMap;
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, IntervalUnit};
+use itertools::izip;
 
+use arrow::datatypes::{DataType, Field, IntervalUnit};
+
+use crate::analyzer::AnalyzerRule;
+use crate::utils::NamePreserver;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
-    DataFusionError, Result, ScalarValue,
+    exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column,
+    DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::expr::{
-    self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction,
-    WindowFunction,
+    self, Alias, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like,
+    ScalarFunction, WindowFunction,
 };
+use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
 use datafusion_expr::expr_schema::cast_subquery;
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::Subquery;
@@ -47,13 +53,10 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator, ScalarUDF,
-    WindowFrame, WindowFrameBound, WindowFrameUnits,
+    AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, Join, LogicalPlan, Operator,
+    Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
 };
 
-use crate::analyzer::AnalyzerRule;
-use crate::utils::NamePreserver;
-
 #[derive(Default)]
 pub struct TypeCoercion {}
 
@@ -120,8 +123,8 @@ fn analyze_internal(
         expr.rewrite(&mut expr_rewrite)?
             .map_data(|expr| original_name.restore(expr))
     })?
-    // coerce join expressions specially
-    .map_data(|plan| expr_rewrite.coerce_joins(plan))?
+    // some plans need extra coercion after their expressions are coerced
+    .map_data(|plan| expr_rewrite.coerce_plan(plan))?
     // recompute the schema after the expressions have been rewritten as the types may have changed
     .map_data(|plan| plan.recompute_schema())
 }
@@ -135,6 +138,14 @@ impl<'a> TypeCoercionRewriter<'a> {
         Self { schema }
     }
 
+    fn coerce_plan(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
+        match plan {
+            LogicalPlan::Join(join) => self.coerce_join(join),
+            LogicalPlan::Union(union) => Self::coerce_union(union),
+            _ => Ok(plan),
+        }
+    }
+
     /// Coerce join equality expressions and join filter
     ///
     /// Joins must be treated specially as their equality expressions are stored
@@ -143,11 +154,7 @@ impl<'a> TypeCoercionRewriter<'a> {
     ///
     /// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored
     /// as a list of `(t1.a, t2.b), (t1.x, t2.y)`
-    fn coerce_joins(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
-        let LogicalPlan::Join(mut join) = plan else {
-            return Ok(plan);
-        };
-
+    fn coerce_join(&mut self, mut join: Join) -> Result<LogicalPlan> {
         join.on = join
             .on
             .into_iter()
@@ -168,6 +175,33 @@ impl<'a> TypeCoercionRewriter<'a> {
         Ok(LogicalPlan::Join(join))
     }
 
+    /// Coerce the union’s inputs to a common schema compatible with all inputs.
+    /// This occurs after wildcard expansion and the coercion of the input expressions.
+    fn coerce_union(union_plan: Union) -> Result<LogicalPlan> {
+        let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?);
+        let new_inputs = union_plan
+            .inputs
+            .iter()
+            .map(|p| {
+                let plan = coerce_plan_expr_for_schema(p, &union_schema)?;
+                match plan {
+                    LogicalPlan::Projection(Projection { expr, input, .. }) => {
+                        Ok(Arc::new(project_with_column_index(
+                            expr,
+                            input,
+                            Arc::clone(&union_schema),
+                        )?))
+                    }
+                    other_plan => Ok(Arc::new(other_plan)),
+                }
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(LogicalPlan::Union(Union {
+            inputs: new_inputs,
+            schema: union_schema,
+        }))
+    }
+
     fn coerce_join_filter(&self, expr: Expr) -> Result<Expr> {
         let expr_type = expr.get_type(self.schema)?;
         match expr_type {
@@ -774,6 +808,92 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result<Case> {
     Ok(Case::new(case_expr, when_then, else_expr))
 }
 
+/// Get a common schema that is compatible with all inputs of UNION.
+fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
+    let base_schema = inputs[0].schema();
+    let mut union_datatypes = base_schema
+        .fields()
+        .iter()
+        .map(|f| f.data_type().clone())
+        .collect::<Vec<_>>();
+    let mut union_nullabilities = base_schema
+        .fields()
+        .iter()
+        .map(|f| f.is_nullable())
+        .collect::<Vec<_>>();
+
+    for (i, plan) in inputs.iter().enumerate().skip(1) {
+        let plan_schema = plan.schema();
+        if plan_schema.fields().len() != base_schema.fields().len() {
+            return plan_err!(
+                "Union schemas have different number of fields: \
+                query 1 has {} fields whereas query {} has {} fields",
+                base_schema.fields().len(),
+                i + 1,
+                plan_schema.fields().len()
+            );
+        }
+        // coerce data type and nullablity for each field
+        for (union_datatype, union_nullable, plan_field) in izip!(
+            union_datatypes.iter_mut(),
+            union_nullabilities.iter_mut(),
+            plan_schema.fields()
+        ) {
+            let coerced_type =
+                comparison_coercion(union_datatype, plan_field.data_type()).ok_or_else(
+                    || {
+                        plan_datafusion_err!(
+                            "Incompatible inputs for Union: Previous inputs were \
+                            of type {}, but got incompatible type {} on column '{}'",
+                            union_datatype,
+                            plan_field.data_type(),
+                            plan_field.name()
+                        )
+                    },
+                )?;
+            *union_datatype = coerced_type;
+            *union_nullable = *union_nullable || plan_field.is_nullable();
+        }
+    }
+    let union_qualified_fields = izip!(
+        base_schema.iter(),
+        union_datatypes.into_iter(),
+        union_nullabilities
+    )
+    .map(|((qualifier, field), datatype, nullable)| {
+        let field = Arc::new(Field::new(field.name().clone(), datatype, nullable));
+        (qualifier.cloned(), field)
+    })
+    .collect::<Vec<_>>();
+    DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())
+}
+
+/// See `<https://github.com/apache/datafusion/pull/2108>`
+fn project_with_column_index(
+    expr: Vec<Expr>,
+    input: Arc<LogicalPlan>,
+    schema: DFSchemaRef,
+) -> Result<LogicalPlan> {
+    let alias_expr = expr
+        .into_iter()
+        .enumerate()
+        .map(|(i, e)| match e {
+            Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => {
+                e.unalias().alias(schema.field(i).name())
+            }
+            Expr::Column(Column {
+                relation: _,
+                ref name,
+            }) if name != schema.field(i).name() => e.alias(schema.field(i).name()),
+            Expr::Alias { .. } | Expr::Column { .. } => e,
+            _ => e.alias(schema.field(i).name()),
+        })
+        .collect::<Vec<_>>();
+
+    Projection::try_new_with_schema(alias_expr, input, schema)
+        .map(LogicalPlan::Projection)
+}
+
 #[cfg(test)]
 mod test {
     use std::any::Any;
@@ -1286,7 +1406,6 @@ mod test {
         .eq(cast(lit("1998-03-18"), DataType::Date32));
         let empty = empty();
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
-        dbg!(&plan);
         let expected =
             "Projection: CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8(\"1998-03-18\") AS Date32) AS Timestamp(Nanosecond, None))\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
@@ -1473,7 +1592,6 @@ mod test {
         ));
         let empty = empty();
         let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
-        dbg!(&plan);
         let expected =
             "Projection: CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None)) - CAST(Utf8(\"1998-03-18\") AS Timestamp(Nanosecond, None))\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?;
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs
index cc8cf1f56c18..5f41e4f137b1 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/eliminate_nested_union.rs
@@ -114,8 +114,11 @@ fn extract_plan_from_distinct(plan: Arc<LogicalPlan>) -> Arc<LogicalPlan> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::analyzer::type_coercion::TypeCoercion;
+    use crate::analyzer::Analyzer;
     use crate::test::*;
     use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::config::ConfigOptions;
     use datafusion_expr::{col, logical_plan::table_scan};
 
     fn schema() -> Schema {
@@ -127,7 +130,14 @@ mod tests {
     }
 
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
-        assert_optimized_plan_eq(Arc::new(EliminateNestedUnion::new()), plan, expected)
+        let options = ConfigOptions::default();
+        let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())])
+            .execute_and_check(plan, &options, |_, _| {})?;
+        assert_optimized_plan_eq(
+            Arc::new(EliminateNestedUnion::new()),
+            analyzed_plan,
+            expected,
+        )
     }
 
     #[test]
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 612aac1d152d..4d8f1dbdb955 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -24,6 +24,7 @@ use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::tree_node::Transformed;
+use datafusion_common::utils::combine_limit;
 use datafusion_common::Result;
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan};
@@ -217,69 +218,6 @@ fn transformed_limit(
     })))
 }
 
-/// Computes the `skip` and `fetch` parameters of a single limit that would be
-/// equivalent to two consecutive limits with the given `skip`/`fetch` parameters.
-///
-/// There are multiple cases to consider:
-///
-/// # Case 0: Parent and child are disjoint (`child_fetch <= skip`).
-///
-/// ```text
-///   Before merging:
-///                     |........skip........|---fetch-->|     Parent limit
-///    |...child_skip...|---child_fetch-->|                    Child limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |.........(child_skip + skip).........|
-/// ```
-///
-/// # Case 1: Parent is beyond child's range (`skip < child_fetch <= skip + fetch`).
-///
-///   Before merging:
-/// ```text
-///                     |...skip...|------------fetch------------>|   Parent limit
-///    |...child_skip...|-------------child_fetch------------>|       Child limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |....(child_skip + skip)....|---(child_fetch - skip)-->|
-/// ```
-///
-///  # Case 2: Parent is within child's range (`skip + fetch < child_fetch`).
-///
-///   Before merging:
-/// ```text
-///                     |...skip...|---fetch-->|                   Parent limit
-///    |...child_skip...|-------------child_fetch------------>|    Child limit
-/// ```
-///
-///   After merging:
-/// ```text
-///    |....(child_skip + skip)....|---fetch-->|
-/// ```
-pub fn combine_limit(
-    parent_skip: usize,
-    parent_fetch: Option<usize>,
-    child_skip: usize,
-    child_fetch: Option<usize>,
-) -> (usize, Option<usize>) {
-    let combined_skip = child_skip.saturating_add(parent_skip);
-
-    let combined_fetch = match (parent_fetch, child_fetch) {
-        (Some(parent_fetch), Some(child_fetch)) => {
-            Some(min(parent_fetch, child_fetch.saturating_sub(parent_skip)))
-        }
-        (Some(parent_fetch), None) => Some(parent_fetch),
-        (None, Some(child_fetch)) => Some(child_fetch.saturating_sub(parent_skip)),
-        (None, None) => None,
-    };
-
-    (combined_skip, combined_fetch)
-}
-
 /// Adds a limit to the inputs of a join, if possible
 fn push_down_join(mut join: Join, limit: usize) -> Transformed<Join> {
     use JoinType::*;
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index aaa5eec3955c..93dd49b17492 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -335,6 +335,27 @@ fn test_propagate_empty_relation_inner_join_and_unions() {
     assert_eq!(expected, format!("{plan}"));
 }
 
+#[test]
+fn select_wildcard_with_repeated_column() {
+    let sql = "SELECT *, col_int32 FROM test";
+    let err = test_sql(sql).expect_err("query should have failed");
+    assert_eq!(
+        "expand_wildcard_rule\ncaused by\nError during planning: Projections require unique expression names but the expression \"test.col_int32\" at position 0 and \"test.col_int32\" at position 7 have the same name. Consider aliasing (\"AS\") one of them.",
+        err.strip_backtrace()
+    );
+}
+
+#[test]
+fn select_wildcard_with_repeated_column_but_is_aliased() {
+    let sql = "SELECT *, col_int32 as col_32 FROM test";
+
+    let plan = test_sql(sql).unwrap();
+    let expected = "Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64, test.col_ts_nano_none, test.col_ts_nano_utc, test.col_int32 AS col_32\
+    \n  TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]";
+
+    assert_eq!(expected, format!("{plan}"));
+}
+
 fn test_sql(sql: &str) -> Result<LogicalPlan> {
     // parse the SQL
     let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ...
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 8a34f34a82db..9eda1277c263 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -44,12 +44,12 @@ fn criterion_benchmark(c: &mut Criterion) {
         if i % 7 == 0 {
             c2.append_null();
         } else {
-            c2.append_value(&format!("string {i}"));
+            c2.append_value(format!("string {i}"));
         }
         if i % 9 == 0 {
             c3.append_null();
         } else {
-            c3.append_value(&format!("other string {i}"));
+            c3.append_value(format!("other string {i}"));
         }
     }
     let c1 = Arc::new(c1.finish());
diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs
index bcf1c8e510b1..3eac62a4df08 100644
--- a/datafusion/physical-expr/src/analysis.rs
+++ b/datafusion/physical-expr/src/analysis.rs
@@ -119,7 +119,7 @@ impl ExprBoundaries {
         Ok(ExprBoundaries {
             column,
             interval,
-            distinct_count: col_stats.distinct_count.clone(),
+            distinct_count: col_stats.distinct_count,
         })
     }
 
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 583a4ef32542..c6afb5c05985 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -1146,7 +1146,7 @@ mod tests {
             if i % 7 == 0 {
                 c2.append_null();
             } else {
-                c2.append_value(&format!("string {i}"));
+                c2.append_value(format!("string {i}"));
             }
         }
         let c1 = Arc::new(c1.finish());
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 8a3885030b9d..dfc70551ccf6 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -28,7 +28,6 @@ use crate::PhysicalExpr;
 use arrow::array::*;
 use arrow::buffer::BooleanBuffer;
 use arrow::compute::kernels::boolean::{not, or_kleene};
-use arrow::compute::kernels::cmp::eq;
 use arrow::compute::take;
 use arrow::datatypes::*;
 use arrow::util::bit_iterator::BitIndexIterator;
@@ -41,7 +40,8 @@ use datafusion_common::hash_utils::HashValue;
 use datafusion_common::{
     exec_err, internal_err, not_impl_err, DFSchema, Result, ScalarValue,
 };
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, Operator};
+use datafusion_physical_expr_common::datum::compare_op_for_nested;
 
 use ahash::RandomState;
 use hashbrown::hash_map::RawEntryMut;
@@ -361,7 +361,11 @@ impl PhysicalExpr for InListExpr {
                     |result, expr| -> Result<BooleanArray> {
                         Ok(or_kleene(
                             &result,
-                            &eq(&value, &expr?.into_array(num_rows)?)?,
+                            &compare_op_for_nested(
+                                Operator::Eq,
+                                &value,
+                                &expr?.into_array(num_rows)?,
+                            )?,
                         )?)
                     },
                 )?;
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 8108493a0d3b..d54e6dbcab8f 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -18,6 +18,7 @@
 #![deny(clippy::clone_on_ref_ptr)]
 
 pub mod aggregate_statistics;
+pub mod limit_pushdown;
 mod optimizer;
 pub mod output_requirements;
 
diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs
new file mode 100644
index 000000000000..2b787980585a
--- /dev/null
+++ b/datafusion/physical-optimizer/src/limit_pushdown.rs
@@ -0,0 +1,253 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`LimitPushdown`] pushes `LIMIT` down through `ExecutionPlan`s to reduce
+//! data transfer as much as possible.
+
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use crate::PhysicalOptimizerRule;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::plan_datafusion_err;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+use datafusion_common::utils::combine_limit;
+use datafusion_common::Result;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion_physical_plan::ExecutionPlan;
+
+/// This rule inspects [`ExecutionPlan`]'s and pushes down the fetch limit from
+/// the parent to the child if applicable.
+#[derive(Default)]
+pub struct LimitPushdown {}
+
+impl LimitPushdown {
+    #[allow(missing_docs)]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PhysicalOptimizerRule for LimitPushdown {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        _config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        plan.transform_down(push_down_limits).data()
+    }
+
+    fn name(&self) -> &str {
+        "LimitPushdown"
+    }
+
+    fn schema_check(&self) -> bool {
+        true
+    }
+}
+
+/// This enumeration makes `skip` and `fetch` calculations easier by providing
+/// a single API for both local and global limit operators.
+#[derive(Debug)]
+enum LimitExec {
+    Global(GlobalLimitExec),
+    Local(LocalLimitExec),
+}
+
+impl LimitExec {
+    fn input(&self) -> &Arc<dyn ExecutionPlan> {
+        match self {
+            Self::Global(global) => global.input(),
+            Self::Local(local) => local.input(),
+        }
+    }
+
+    fn fetch(&self) -> Option<usize> {
+        match self {
+            Self::Global(global) => global.fetch(),
+            Self::Local(local) => Some(local.fetch()),
+        }
+    }
+
+    fn skip(&self) -> usize {
+        match self {
+            Self::Global(global) => global.skip(),
+            Self::Local(_) => 0,
+        }
+    }
+
+    fn with_child(&self, child: Arc<dyn ExecutionPlan>) -> Self {
+        match self {
+            Self::Global(global) => {
+                Self::Global(GlobalLimitExec::new(child, global.skip(), global.fetch()))
+            }
+            Self::Local(local) => Self::Local(LocalLimitExec::new(child, local.fetch())),
+        }
+    }
+}
+
+impl From<LimitExec> for Arc<dyn ExecutionPlan> {
+    fn from(limit_exec: LimitExec) -> Self {
+        match limit_exec {
+            LimitExec::Global(global) => Arc::new(global),
+            LimitExec::Local(local) => Arc::new(local),
+        }
+    }
+}
+
+/// Pushes down the limit through the plan.
+pub fn push_down_limits(
+    plan: Arc<dyn ExecutionPlan>,
+) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
+    let maybe_modified = if let Some(limit_exec) = extract_limit(&plan) {
+        let child = limit_exec.input();
+        if let Some(child_limit) = extract_limit(child) {
+            let merged = merge_limits(&limit_exec, &child_limit);
+            // Revisit current node in case of consecutive pushdowns
+            Some(push_down_limits(merged)?.data)
+        } else if child.supports_limit_pushdown() {
+            try_push_down_limit(&limit_exec, Arc::clone(child))?
+        } else {
+            add_fetch_to_child(&limit_exec, Arc::clone(child))
+        }
+    } else {
+        None
+    };
+
+    Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes))
+}
+
+/// Transforms the [`ExecutionPlan`] into a [`LimitExec`] if it is a
+/// [`GlobalLimitExec`] or a [`LocalLimitExec`].
+fn extract_limit(plan: &Arc<dyn ExecutionPlan>) -> Option<LimitExec> {
+    if let Some(global_limit) = plan.as_any().downcast_ref::<GlobalLimitExec>() {
+        Some(LimitExec::Global(GlobalLimitExec::new(
+            Arc::clone(global_limit.input()),
+            global_limit.skip(),
+            global_limit.fetch(),
+        )))
+    } else {
+        plan.as_any()
+            .downcast_ref::<LocalLimitExec>()
+            .map(|local_limit| {
+                LimitExec::Local(LocalLimitExec::new(
+                    Arc::clone(local_limit.input()),
+                    local_limit.fetch(),
+                ))
+            })
+    }
+}
+
+/// Merge the limits of the parent and the child. If at least one of them is a
+/// [`GlobalLimitExec`], the result is also a [`GlobalLimitExec`]. Otherwise,
+/// the result is a [`LocalLimitExec`].
+fn merge_limits(
+    parent_limit_exec: &LimitExec,
+    child_limit_exec: &LimitExec,
+) -> Arc<dyn ExecutionPlan> {
+    // We can use the logic in `combine_limit` from the logical optimizer:
+    let (skip, fetch) = combine_limit(
+        parent_limit_exec.skip(),
+        parent_limit_exec.fetch(),
+        child_limit_exec.skip(),
+        child_limit_exec.fetch(),
+    );
+    match (parent_limit_exec, child_limit_exec) {
+        (LimitExec::Local(_), LimitExec::Local(_)) => {
+            // The fetch is present in this case, can unwrap.
+            Arc::new(LocalLimitExec::new(
+                Arc::clone(child_limit_exec.input()),
+                fetch.unwrap(),
+            ))
+        }
+        _ => Arc::new(GlobalLimitExec::new(
+            Arc::clone(child_limit_exec.input()),
+            skip,
+            fetch,
+        )),
+    }
+}
+
+/// Pushes down the limit through the child. If the child has a single input
+/// partition, simply swaps the parent and the child. Otherwise, adds a
+/// [`LocalLimitExec`] after in between in addition to swapping, because of
+/// multiple input partitions.
+fn try_push_down_limit(
+    limit_exec: &LimitExec,
+    child: Arc<dyn ExecutionPlan>,
+) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+    let grandchildren = child.children();
+    if let Some(&grandchild) = grandchildren.first() {
+        // GlobalLimitExec and LocalLimitExec must have an input after pushdown
+        if combines_input_partitions(&child) {
+            // We still need a LocalLimitExec after the child
+            if let Some(fetch) = limit_exec.fetch() {
+                let new_local_limit = Arc::new(LocalLimitExec::new(
+                    Arc::clone(grandchild),
+                    fetch + limit_exec.skip(),
+                ));
+                let new_child =
+                    Arc::clone(&child).with_new_children(vec![new_local_limit])?;
+                Ok(Some(limit_exec.with_child(new_child).into()))
+            } else {
+                Ok(None)
+            }
+        } else {
+            // Swap current with child
+            let new_limit = limit_exec.with_child(Arc::clone(grandchild));
+            let new_child = child.with_new_children(vec![new_limit.into()])?;
+            Ok(Some(new_child))
+        }
+    } else {
+        // Operators supporting limit push down must have a child.
+        Err(plan_datafusion_err!(
+            "{:#?} must have a child to push down limit",
+            child
+        ))
+    }
+}
+
+fn combines_input_partitions(exec: &Arc<dyn ExecutionPlan>) -> bool {
+    let exec = exec.as_any();
+    exec.is::<CoalescePartitionsExec>() || exec.is::<SortPreservingMergeExec>()
+}
+
+/// Transforms child to the fetching version if supported. Removes the parent if
+/// skip is zero. Otherwise, keeps the parent.
+fn add_fetch_to_child(
+    limit_exec: &LimitExec,
+    child: Arc<dyn ExecutionPlan>,
+) -> Option<Arc<dyn ExecutionPlan>> {
+    let fetch = limit_exec.fetch();
+    let skip = limit_exec.skip();
+
+    let child_fetch = fetch.map(|f| f + skip);
+
+    if let Some(child_with_fetch) = child.with_fetch(child_fetch) {
+        if skip > 0 {
+            Some(limit_exec.with_child(child_with_fetch).into())
+        } else {
+            Some(child_with_fetch)
+        }
+    } else {
+        None
+    }
+}
+
+// See tests in datafusion/core/tests/physical_optimizer
diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
index f971d8f1f0aa..fdfdd349e36e 100644
--- a/datafusion/physical-optimizer/src/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -286,3 +286,5 @@ fn require_top_ordering_helper(
         Ok((plan, false))
     }
 }
+
+// See tests in datafusion/core/tests/physical_optimizer
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index ed3d6d49f9f3..b3221752d034 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -100,22 +100,24 @@ struct SpillState {
 ///
 /// See "partial aggregation" discussion on [`GroupedHashAggregateStream`]
 struct SkipAggregationProbe {
-    /// Number of processed input rows
+    /// Number of processed input rows (updated during probing)
     input_rows: usize,
-    /// Number of total group values for `input_rows`
+    /// Number of total group values for `input_rows` (updated during probing)
     num_groups: usize,
 
-    /// Aggregation ratio check should be performed only when the
-    /// number of input rows exceeds this threshold
+    /// Aggregation ratio check performed when the number of input rows exceeds
+    /// this threshold (from `SessionConfig`)
     probe_rows_threshold: usize,
-    /// Maximum allowed value of `input_rows` / `num_groups` to
-    /// continue aggregation
+    /// Maximum ratio of `num_groups` to `input_rows` for continuing aggregation
+    /// (from `SessionConfig`). If the ratio exceeds this value, aggregation
+    /// is skipped and input rows are directly converted to output
     probe_ratio_threshold: f64,
 
-    /// Flag indicating that further data aggregation mey be skipped
+    /// Flag indicating further data aggregation may be skipped (decision made
+    /// when probing complete)
     should_skip: bool,
-    /// Flag indicating that further updates of `SkipAggregationProbe`
-    /// state won't make any effect
+    /// Flag indicating further updates of `SkipAggregationProbe` state won't
+    /// make any effect (set either while probing or on probing completion)
     is_locked: bool,
 
     /// Number of rows where state was output without aggregation.
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 13c10c535c08..5589027694fe 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -20,24 +20,24 @@
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll};
+
+use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
+use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
+use crate::{
+    DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
+};
 
 use arrow::array::{AsArray, StringViewBuilder};
 use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use arrow_array::{Array, ArrayRef};
-use futures::stream::{Stream, StreamExt};
-
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 
-use crate::{
-    DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
-};
-
-use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
-use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
+use futures::ready;
+use futures::stream::{Stream, StreamExt};
 
 /// `CoalesceBatchesExec` combines small batches into larger batches for more
 /// efficient use of vectorized processing by later operators.
@@ -202,8 +202,9 @@ impl ExecutionPlan for CoalesceBatchesExec {
                 self.target_batch_size,
                 self.fetch,
             ),
-            is_closed: false,
             baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
+            // Start by pulling data
+            inner_state: CoalesceBatchesStreamState::Pull,
         }))
     }
 
@@ -236,10 +237,11 @@ struct CoalesceBatchesStream {
     input: SendableRecordBatchStream,
     /// Buffer for combining batches
     coalescer: BatchCoalescer,
-    /// Whether the stream has finished returning all of its data or not
-    is_closed: bool,
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
+    /// The current inner state of the stream. This state dictates the current
+    /// action or operation to be performed in the streaming process.
+    inner_state: CoalesceBatchesStreamState,
 }
 
 impl Stream for CoalesceBatchesStream {
@@ -259,45 +261,98 @@ impl Stream for CoalesceBatchesStream {
     }
 }
 
+/// Enumeration of possible states for `CoalesceBatchesStream`.
+/// It represents different stages in the lifecycle of a stream of record batches.
+///
+/// An example of state transition:
+/// Notation:
+/// `[3000]`: A batch with size 3000
+/// `{[2000], [3000]}`: `CoalesceBatchStream`'s internal buffer with 2 batches buffered
+/// Input of `CoalesceBatchStream` will generate three batches `[2000], [3000], [4000]`
+/// The coalescing procedure will go through the following steps with 4096 coalescing threshold:
+/// 1. Read the first batch and get it buffered.
+/// - initial state: `Pull`
+/// - initial buffer: `{}`
+/// - updated buffer: `{[2000]}`
+/// - next state: `Pull`
+/// 2. Read the second batch, the coalescing target is reached since 2000 + 3000 > 4096
+/// - initial state: `Pull`
+/// - initial buffer: `{[2000]}`
+/// - updated buffer: `{[2000], [3000]}`
+/// - next state: `ReturnBuffer`
+/// 4. Two batches in the batch get merged and consumed by the upstream operator.
+/// - initial state: `ReturnBuffer`
+/// - initial buffer: `{[2000], [3000]}`
+/// - updated buffer: `{}`
+/// - next state: `Pull`
+/// 5. Read the third input batch.
+/// - initial state: `Pull`
+/// - initial buffer: `{}`
+/// - updated buffer: `{[4000]}`
+/// - next state: `Pull`
+/// 5. The input is ended now. Jump to exhaustion state preparing the finalized data.
+/// - initial state: `Pull`
+/// - initial buffer: `{[4000]}`
+/// - updated buffer: `{[4000]}`
+/// - next state: `Exhausted`
+#[derive(Debug, Clone, Eq, PartialEq)]
+enum CoalesceBatchesStreamState {
+    /// State to pull a new batch from the input stream.
+    Pull,
+    /// State to return a buffered batch.
+    ReturnBuffer,
+    /// State indicating that the stream is exhausted.
+    Exhausted,
+}
+
 impl CoalesceBatchesStream {
     fn poll_next_inner(
         self: &mut Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Result<RecordBatch>>> {
-        // Get a clone (uses same underlying atomic) as self gets borrowed below
         let cloned_time = self.baseline_metrics.elapsed_compute().clone();
-
-        if self.is_closed {
-            return Poll::Ready(None);
-        }
         loop {
-            let input_batch = self.input.poll_next_unpin(cx);
-            // records time on drop
-            let _timer = cloned_time.timer();
-            match ready!(input_batch) {
-                Some(result) => {
-                    let Ok(input_batch) = result else {
-                        return Poll::Ready(Some(result)); // pass back error
-                    };
-                    // Buffer the batch and either get more input if not enough
-                    // rows yet or output
-                    match self.coalescer.push_batch(input_batch) {
-                        Ok(None) => continue,
-                        res => {
-                            if self.coalescer.limit_reached() {
-                                self.is_closed = true;
+            match &self.inner_state {
+                CoalesceBatchesStreamState::Pull => {
+                    // Attempt to pull the next batch from the input stream.
+                    let input_batch = ready!(self.input.poll_next_unpin(cx));
+                    // Start timing the operation. The timer records time upon being dropped.
+                    let _timer = cloned_time.timer();
+
+                    match input_batch {
+                        Some(Ok(batch)) => match self.coalescer.push_batch(batch) {
+                            CoalescerState::Continue => {}
+                            CoalescerState::LimitReached => {
+                                self.inner_state = CoalesceBatchesStreamState::Exhausted;
                             }
-                            return Poll::Ready(res.transpose());
+                            CoalescerState::TargetReached => {
+                                self.inner_state =
+                                    CoalesceBatchesStreamState::ReturnBuffer;
+                            }
+                        },
+                        None => {
+                            // End of input stream, but buffered batches might still be present.
+                            self.inner_state = CoalesceBatchesStreamState::Exhausted;
                         }
+                        other => return Poll::Ready(other),
                     }
                 }
-                None => {
-                    self.is_closed = true;
-                    // we have reached the end of the input stream but there could still
-                    // be buffered batches
-                    return match self.coalescer.finish() {
-                        Ok(None) => Poll::Ready(None),
-                        res => Poll::Ready(res.transpose()),
+                CoalesceBatchesStreamState::ReturnBuffer => {
+                    // Combine buffered batches into one batch and return it.
+                    let batch = self.coalescer.finish_batch()?;
+                    // Set to pull state for the next iteration.
+                    self.inner_state = CoalesceBatchesStreamState::Pull;
+                    return Poll::Ready(Some(Ok(batch)));
+                }
+                CoalesceBatchesStreamState::Exhausted => {
+                    // Handle the end of the input stream.
+                    return if self.coalescer.buffer.is_empty() {
+                        // If buffer is empty, return None indicating the stream is fully consumed.
+                        Poll::Ready(None)
+                    } else {
+                        // If the buffer still contains batches, prepare to return them.
+                        let batch = self.coalescer.finish_batch()?;
+                        Poll::Ready(Some(Ok(batch)))
                     };
                 }
             }
@@ -364,90 +419,72 @@ impl BatchCoalescer {
         Arc::clone(&self.schema)
     }
 
-    /// Add a batch, returning a batch if the target batch size or limit is reached
-    fn push_batch(&mut self, batch: RecordBatch) -> Result<Option<RecordBatch>> {
-        // discard empty batches
-        if batch.num_rows() == 0 {
-            return Ok(None);
-        }
-
-        // past limit
-        if self.limit_reached() {
-            return Ok(None);
-        }
-
+    /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns
+    /// a variant of [`CoalescerState`] indicating the final state of the buffer.
+    fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
         let batch = gc_string_view_batch(&batch);
+        if self.limit_reached(&batch) {
+            CoalescerState::LimitReached
+        } else if self.target_reached(batch) {
+            CoalescerState::TargetReached
+        } else {
+            CoalescerState::Continue
+        }
+    }
 
-        // Handle fetch limit:
-        if let Some(fetch) = self.fetch {
-            if self.total_rows + batch.num_rows() >= fetch {
-                // We have reached the fetch limit.
+    /// The function checks if the buffer can reach the specified limit after getting `batch`.
+    /// If it does, it slices the received batch as needed, updates the buffer with it, and
+    /// finally returns `true`. Otherwise; the function does nothing and returns `false`.
+    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
+        match self.fetch {
+            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
+                // Limit is reached
                 let remaining_rows = fetch - self.total_rows;
                 debug_assert!(remaining_rows > 0);
-                self.total_rows = fetch;
-                // Trim the batch and add to buffered batches:
+
                 let batch = batch.slice(0, remaining_rows);
                 self.buffered_rows += batch.num_rows();
+                self.total_rows = fetch;
                 self.buffer.push(batch);
-                // Combine buffered batches:
-                let batch = concat_batches(&self.schema, &self.buffer)?;
-                // Reset the buffer state and return final batch:
-                self.buffer.clear();
-                self.buffered_rows = 0;
-                return Ok(Some(batch));
+                true
             }
+            _ => false,
         }
-        self.total_rows += batch.num_rows();
-
-        // batch itself is already big enough and we have no buffered rows so
-        // return it directly
-        if batch.num_rows() >= self.target_batch_size && self.buffer.is_empty() {
-            return Ok(Some(batch));
-        }
-        // add to the buffered batches
-        self.buffered_rows += batch.num_rows();
-        self.buffer.push(batch);
-        // check to see if we have enough batches yet
-        let batch = if self.buffered_rows >= self.target_batch_size {
-            // combine the batches and return
-            let batch = concat_batches(&self.schema, &self.buffer)?;
-            // reset buffer state
-            self.buffer.clear();
-            self.buffered_rows = 0;
-            // return batch
-            Some(batch)
-        } else {
-            None
-        };
-        Ok(batch)
     }
 
-    /// Finish the coalescing process, returning all buffered data as a final,
-    /// single batch, if any
-    fn finish(&mut self) -> Result<Option<RecordBatch>> {
-        if self.buffer.is_empty() {
-            Ok(None)
+    /// Updates the buffer with the given batch. If the target batch size is reached,
+    /// the function returns `true`. Otherwise, it returns `false`.
+    fn target_reached(&mut self, batch: RecordBatch) -> bool {
+        if batch.num_rows() == 0 {
+            false
         } else {
-            // combine the batches and return
-            let batch = concat_batches(&self.schema, &self.buffer)?;
-            // reset buffer state
-            self.buffer.clear();
-            self.buffered_rows = 0;
-            // return batch
-            Ok(Some(batch))
+            self.total_rows += batch.num_rows();
+            self.buffered_rows += batch.num_rows();
+            self.buffer.push(batch);
+            self.buffered_rows >= self.target_batch_size
         }
     }
 
-    /// returns true if there is a limit and it has been reached
-    pub fn limit_reached(&self) -> bool {
-        if let Some(fetch) = self.fetch {
-            self.total_rows >= fetch
-        } else {
-            false
-        }
+    /// Concatenates and returns all buffered batches, and clears the buffer.
+    fn finish_batch(&mut self) -> Result<RecordBatch> {
+        let batch = concat_batches(&self.schema, &self.buffer)?;
+        self.buffer.clear();
+        self.buffered_rows = 0;
+        Ok(batch)
     }
 }
 
+/// This enumeration acts as a status indicator for the [`BatchCoalescer`] after a
+/// [`BatchCoalescer::push_batch()`] operation.
+enum CoalescerState {
+    /// Neither the limit nor the target batch size is reached.
+    Continue,
+    /// The sufficient row count to produce a complete query result is reached.
+    LimitReached,
+    /// The specified minimum number of rows a batch should have is reached.
+    TargetReached,
+}
+
 /// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
 ///
 /// This function decides when to consolidate the StringView into a new buffer
@@ -521,11 +558,13 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
 
 #[cfg(test)]
 mod tests {
+    use std::ops::Range;
+
     use super::*;
+
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_array::builder::ArrayBuilder;
     use arrow_array::{StringViewArray, UInt32Array};
-    use std::ops::Range;
 
     #[test]
     fn test_coalesce() {
@@ -670,16 +709,25 @@ mod tests {
             // create a single large input batch for output comparison
             let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
 
-            let mut coalescer = BatchCoalescer::new(schema, target_batch_size, fetch);
+            let mut coalescer =
+                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
 
             let mut output_batches = vec![];
             for batch in input_batches {
-                if let Some(batch) = coalescer.push_batch(batch).unwrap() {
-                    output_batches.push(batch);
+                match coalescer.push_batch(batch) {
+                    CoalescerState::Continue => {}
+                    CoalescerState::LimitReached => {
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                        break;
+                    }
+                    CoalescerState::TargetReached => {
+                        coalescer.buffered_rows = 0;
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                    }
                 }
             }
-            if let Some(batch) = coalescer.finish().unwrap() {
-                output_batches.push(batch);
+            if coalescer.buffered_rows != 0 {
+                output_batches.extend(coalescer.buffer);
             }
 
             // make sure we got the expected number of output batches and content
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index fa9108057cfe..568987b14798 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -324,7 +324,7 @@ fn collect_new_statistics(
                     (Precision::Inexact(lower), Precision::Inexact(upper))
                 };
                 ColumnStatistics {
-                    null_count: input_column_stats[idx].null_count.clone().to_inexact(),
+                    null_count: input_column_stats[idx].null_count.to_inexact(),
                     max_value,
                     min_value,
                     distinct_count: distinct_count.to_inexact(),
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index b8a58e4d0d30..80d8815bdebc 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -827,12 +827,12 @@ fn estimate_join_cardinality(
         JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
             let ij_cardinality = estimate_inner_join_cardinality(
                 Statistics {
-                    num_rows: left_stats.num_rows.clone(),
+                    num_rows: left_stats.num_rows,
                     total_byte_size: Precision::Absent,
                     column_statistics: left_col_stats,
                 },
                 Statistics {
-                    num_rows: right_stats.num_rows.clone(),
+                    num_rows: right_stats.num_rows,
                     total_byte_size: Precision::Absent,
                     column_statistics: right_col_stats,
                 },
@@ -1024,7 +1024,7 @@ fn max_distinct_count(
     stats: &ColumnStatistics,
 ) -> Precision<usize> {
     match &stats.distinct_count {
-        dc @ (Precision::Exact(_) | Precision::Inexact(_)) => dc.clone(),
+        &dc @ (Precision::Exact(_) | Precision::Inexact(_)) => dc,
         _ => {
             // The number can never be greater than the number of rows we have
             // minus the nulls (since they don't count as distinct values).
@@ -2054,9 +2054,7 @@ mod tests {
             );
             assert_eq!(
                 partial_join_stats.map(|s| s.column_statistics),
-                expected_cardinality
-                    .clone()
-                    .map(|_| [left_col_stats, right_col_stats].concat())
+                expected_cardinality.map(|_| [left_col_stats, right_col_stats].concat())
             );
         }
         Ok(())
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 6311107f7b58..29ead35895fe 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -1196,7 +1196,7 @@ mod tests {
         RecordBatchStream, SendableRecordBatchStream, TaskContext,
     };
     use datafusion_expr::{
-        Expr, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
+        WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
     };
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_physical_expr::expressions::{col, Column, NthValue};
@@ -1303,10 +1303,7 @@ mod tests {
         let window_fn = WindowFunctionDefinition::AggregateUDF(count_udaf());
         let col_expr =
             Arc::new(Column::new(schema.fields[0].name(), 0)) as Arc<dyn PhysicalExpr>;
-        let log_expr =
-            Expr::Column(datafusion_common::Column::from(schema.fields[0].name()));
         let args = vec![col_expr];
-        let log_args = vec![log_expr];
         let partitionby_exprs = vec![col(hash, &schema)?];
         let orderby_exprs = vec![PhysicalSortExpr {
             expr: col(order_by, &schema)?,
@@ -1327,7 +1324,6 @@ mod tests {
                 &window_fn,
                 fn_name,
                 &args,
-                &log_args,
                 &partitionby_exprs,
                 &orderby_exprs,
                 Arc::new(window_frame.clone()),
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 2e6ad4e1a14f..1fd0ca36b1eb 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -32,8 +32,8 @@ use arrow::datatypes::Schema;
 use arrow_schema::{DataType, Field, SchemaRef};
 use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::{
-    BuiltInWindowFunction, Expr, PartitionEvaluator, WindowFrame,
-    WindowFunctionDefinition, WindowUDF,
+    BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition,
+    WindowUDF,
 };
 use datafusion_physical_expr::equivalence::collapse_lex_req;
 use datafusion_physical_expr::{
@@ -94,7 +94,6 @@ pub fn create_window_expr(
     fun: &WindowFunctionDefinition,
     name: String,
     args: &[Arc<dyn PhysicalExpr>],
-    _logical_args: &[Expr],
     partition_by: &[Arc<dyn PhysicalExpr>],
     order_by: &[PhysicalSortExpr],
     window_frame: Arc<WindowFrame>,
@@ -746,7 +745,6 @@ mod tests {
                 &[col("a", &schema)?],
                 &[],
                 &[],
-                &[],
                 Arc::new(WindowFrame::new(None)),
                 schema.as_ref(),
                 false,
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 6c4c07428bd3..6cbea5f0cfcc 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -22,8 +22,8 @@ use datafusion_common::{
     exec_datafusion_err, internal_err, plan_datafusion_err, Result, ScalarValue,
     TableReference, UnnestOptions,
 };
-use datafusion_expr::expr::Unnest;
 use datafusion_expr::expr::{Alias, Placeholder};
+use datafusion_expr::expr::{Unnest, WildcardOptions};
 use datafusion_expr::ExprFunctionExt;
 use datafusion_expr::{
     expr::{self, InList, Sort, WindowFunction},
@@ -556,7 +556,10 @@ pub fn parse_expr(
         ))),
         ExprType::Wildcard(protobuf::Wildcard { qualifier }) => {
             let qualifier = qualifier.to_owned().map(|x| x.try_into()).transpose()?;
-            Ok(Expr::Wildcard { qualifier })
+            Ok(Expr::Wildcard {
+                qualifier,
+                options: WildcardOptions::default(),
+            })
         }
         ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode {
             fun_name,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index ab81ce8af9cb..c7361c89c328 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -582,7 +582,7 @@ pub fn serialize_expr(
                 expr_type: Some(ExprType::InList(expr)),
             }
         }
-        Expr::Wildcard { qualifier } => protobuf::LogicalExprNode {
+        Expr::Wildcard { qualifier, .. } => protobuf::LogicalExprNode {
             expr_type: Some(ExprType::Wildcard(protobuf::Wildcard {
                 qualifier: qualifier.to_owned().map(|x| x.into()),
             })),
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index bc0a19336bae..b2f92f4b2ee4 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -169,13 +169,10 @@ pub fn parse_physical_window_expr(
     // TODO: Remove extended_schema if functions are all UDAF
     let extended_schema =
         schema_add_window_field(&window_node_expr, input_schema, &fun, &name)?;
-    // approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
-    let logical_exprs = &[];
     create_window_expr(
         &fun,
         name,
         &window_node_expr,
-        logical_exprs,
         &partition_by,
         &order_by,
         Arc::new(window_frame),
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index b5d28f40a68f..0f6722dd375b 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -477,7 +477,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             ExprType::AggregateExpr(agg_node) => {
                                 let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node.expr.iter()
                                     .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
-                                let _ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
+                                let ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
                                     .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
                                 agg_node.aggregate_function.as_ref().map(|func| {
                                     match func {
@@ -487,14 +487,12 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                                 None => registry.udaf(udaf_name)?
                                             };
 
-                                            // TODO: approx_percentile_cont and approx_percentile_cont_weight are not supported for UDAF from protobuf yet.
-                                            // TODO: `order by` is not supported for UDAF yet
-                                            // https://github.com/apache/datafusion/issues/11804
                                             AggregateExprBuilder::new(agg_udf, input_phy_expr)
                                                 .schema(Arc::clone(&physical_schema))
                                                 .alias(name)
                                                 .with_ignore_nulls(agg_node.ignore_nulls)
                                                 .with_distinct(agg_node.distinct)
+                                                .order_by(ordering_req)
                                                 .build()
                                         }
                                     }
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index a18fa03b2d15..eb7cc5c4b9c5 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -58,7 +58,7 @@ use datafusion_common::{
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
     self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction,
-    Sort, Unnest,
+    Sort, Unnest, WildcardOptions,
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
@@ -1977,7 +1977,10 @@ fn roundtrip_unnest() {
 
 #[test]
 fn roundtrip_wildcard() {
-    let test_expr = Expr::Wildcard { qualifier: None };
+    let test_expr = Expr::Wildcard {
+        qualifier: None,
+        options: WildcardOptions::default(),
+    };
 
     let ctx = SessionContext::new();
     roundtrip_expr_test(test_expr, ctx);
@@ -1987,6 +1990,7 @@ fn roundtrip_wildcard() {
 fn roundtrip_qualified_wildcard() {
     let test_expr = Expr::Wildcard {
         qualifier: Some("foo".into()),
+        options: WildcardOptions::default(),
     };
 
     let ctx = SessionContext::new();
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 1a9c6d40ebe6..6766468ef443 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -25,6 +25,8 @@ use std::vec;
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
+use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
+use datafusion_functions_aggregate::array_agg::array_agg_udaf;
 use datafusion_functions_aggregate::min_max::max_udaf;
 use prost::Message;
 
@@ -412,6 +414,70 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
     roundtrip_test(Arc::new(agg))
 }
 
+#[test]
+fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
+    let field_a = Field::new("a", DataType::Int64, false);
+    let field_b = Field::new("b", DataType::Int64, false);
+    let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+
+    let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
+        vec![(col("a", &schema)?, "unused".to_string())];
+
+    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![AggregateExprBuilder::new(
+        approx_percentile_cont_udaf(),
+        vec![col("b", &schema)?, lit(0.5)],
+    )
+    .schema(Arc::clone(&schema))
+    .alias("APPROX_PERCENTILE_CONT(b, 0.5)")
+    .build()?];
+
+    let agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::new_single(groups.clone()),
+        aggregates.clone(),
+        vec![None],
+        Arc::new(EmptyExec::new(schema.clone())),
+        schema,
+    )?;
+    roundtrip_test(Arc::new(agg))
+}
+
+#[test]
+fn rountrip_aggregate_with_sort() -> Result<()> {
+    let field_a = Field::new("a", DataType::Int64, false);
+    let field_b = Field::new("b", DataType::Int64, false);
+    let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+
+    let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
+        vec![(col("a", &schema)?, "unused".to_string())];
+    let sort_exprs = vec![PhysicalSortExpr {
+        expr: col("b", &schema)?,
+        options: SortOptions {
+            descending: false,
+            nulls_first: true,
+        },
+    }];
+
+    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+        vec![
+            AggregateExprBuilder::new(array_agg_udaf(), vec![col("b", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("ARRAY_AGG(b)")
+                .order_by(sort_exprs)
+                .build()?,
+        ];
+
+    let agg = AggregateExec::try_new(
+        AggregateMode::Final,
+        PhysicalGroupBy::new_single(groups.clone()),
+        aggregates.clone(),
+        vec![None],
+        Arc::new(EmptyExec::new(schema.clone())),
+        schema,
+    )?;
+    roundtrip_test(Arc::new(agg))
+}
+
 #[test]
 fn roundtrip_aggregate_udaf() -> Result<()> {
     let field_a = Field::new("a", DataType::Int64, false);
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index d16d08b041ae..b95414a8cafd 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -22,6 +22,7 @@ use datafusion_common::{
     internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
     Dependency, Result,
 };
+use datafusion_expr::expr::WildcardOptions;
 use datafusion_expr::planner::PlannerResult;
 use datafusion_expr::{
     expr, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFunctionDefinition,
@@ -420,13 +421,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 name: _,
                 arg: FunctionArgExpr::Wildcard,
                 operator: _,
-            } => Ok(Expr::Wildcard { qualifier: None }),
+            } => Ok(Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }),
             FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => {
                 self.sql_expr_to_logical_expr(arg, schema, planner_context)
             }
-            FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => {
-                Ok(Expr::Wildcard { qualifier: None })
-            }
+            FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }),
             _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"),
         }
     }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index edb0002842a8..7c94e5ead5c3 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -27,10 +27,10 @@ use sqlparser::ast::{
 
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
-    ScalarValue,
+    ScalarValue, TableReference,
 };
-use datafusion_expr::expr::InList;
 use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::expr::{InList, WildcardOptions};
 use datafusion_expr::{
     lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
     Operator, TryCast,
@@ -661,6 +661,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 }
                 not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
             }
+            SQLExpr::Wildcard => Ok(Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }),
+            SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard {
+                qualifier: Some(TableReference::from(object_name.to_string())),
+                options: WildcardOptions::default(),
+            }),
+            SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
             _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
         }
     }
@@ -670,7 +679,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         &self,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
-        values: Vec<sqlparser::ast::Expr>,
+        values: Vec<SQLExpr>,
         fields: Vec<StructField>,
     ) -> Result<Expr> {
         if !fields.is_empty() {
@@ -695,6 +704,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
     }
 
+    fn parse_tuple(
+        &self,
+        schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+        values: Vec<SQLExpr>,
+    ) -> Result<Expr> {
+        match values.first() {
+            Some(SQLExpr::Identifier(_)) | Some(SQLExpr::Value(_)) => {
+                self.parse_struct(schema, planner_context, values, vec![])
+            }
+            None => not_impl_err!("Empty tuple not supported yet"),
+            _ => {
+                not_impl_err!("Only identifiers and literals are supported in tuples")
+            }
+        }
+    }
+
     fn sql_position_to_expr(
         &self,
         substr_expr: SQLExpr,
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 40dd368f9e80..2df8d89c59bc 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -519,14 +519,7 @@ impl<'a> DFParser<'a> {
             Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
             Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
             Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
-            Token::Number(ref n, l) => match n.parse() {
-                Ok(n) => Ok(Value::Number(n, l)),
-                // The tokenizer should have ensured `n` is an integer
-                // so this should not be possible
-                Err(e) => parser_err!(format!(
-                    "Unexpected error: could not parse '{n}' as number: {e}"
-                )),
-            },
+            Token::Number(n, l) => Ok(Value::Number(n, l)),
             _ => self.parser.expected("string or numeric value", next_token),
         }
     }
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 95a44dace31a..339234d9965c 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -27,23 +27,23 @@ use crate::utils::{
 };
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+use datafusion_common::UnnestOptions;
 use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
-use datafusion_common::{Column, UnnestOptions};
-use datafusion_expr::expr::Alias;
+use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
 use datafusion_expr::expr_rewriter::{
     normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols,
 };
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::utils::{
-    expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns,
-    find_aggregate_exprs, find_window_exprs,
+    expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs,
 };
 use datafusion_expr::{
-    Aggregate, Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
+    qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter,
+    GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
 };
 use sqlparser::ast::{
     Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderByExpr,
-    ReplaceSelectItem, WildcardAdditionalOptions, WindowType,
+    WildcardAdditionalOptions, WindowType,
 };
 use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
 
@@ -82,7 +82,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // handle named windows before processing the projection expression
         check_conflicting_windows(&select.named_window)?;
         match_window_definitions(&mut select.projection, &select.named_window)?;
-        // process the SELECT expressions, with wildcards expanded.
+        // process the SELECT expressions
         let select_exprs = self.prepare_select_exprs(
             &base_plan,
             select.projection,
@@ -515,8 +515,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     }
 
     /// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
-    ///
-    /// Wildcards are expanded into the concrete list of columns.
     fn prepare_select_exprs(
         &self,
         plan: &LogicalPlan,
@@ -570,49 +568,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
             SelectItem::Wildcard(options) => {
                 Self::check_wildcard_options(&options)?;
-
                 if empty_from {
                     return plan_err!("SELECT * with no tables specified is not valid");
                 }
-                // do not expand from outer schema
-                let expanded_exprs =
-                    expand_wildcard(plan.schema().as_ref(), plan, Some(&options))?;
-                // If there is a REPLACE statement, replace that column with the given
-                // replace expression. Column name remains the same.
-                if let Some(replace) = options.opt_replace {
-                    self.replace_columns(
-                        plan,
-                        empty_from,
-                        planner_context,
-                        expanded_exprs,
-                        replace,
-                    )
-                } else {
-                    Ok(expanded_exprs)
-                }
+                let planned_options = self.plan_wildcard_options(
+                    plan,
+                    empty_from,
+                    planner_context,
+                    options,
+                )?;
+                Ok(vec![wildcard_with_options(planned_options)])
             }
             SelectItem::QualifiedWildcard(object_name, options) => {
                 Self::check_wildcard_options(&options)?;
                 let qualifier = idents_to_table_reference(object_name.0, false)?;
-                // do not expand from outer schema
-                let expanded_exprs = expand_qualified_wildcard(
-                    &qualifier,
-                    plan.schema().as_ref(),
-                    Some(&options),
+                let planned_options = self.plan_wildcard_options(
+                    plan,
+                    empty_from,
+                    planner_context,
+                    options,
                 )?;
-                // If there is a REPLACE statement, replace that column with the given
-                // replace expression. Column name remains the same.
-                if let Some(replace) = options.opt_replace {
-                    self.replace_columns(
-                        plan,
-                        empty_from,
-                        planner_context,
-                        expanded_exprs,
-                        replace,
-                    )
-                } else {
-                    Ok(expanded_exprs)
-                }
+                Ok(vec![qualified_wildcard_with_options(
+                    qualifier,
+                    planned_options,
+                )])
             }
         }
     }
@@ -637,40 +616,44 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     }
 
     /// If there is a REPLACE statement in the projected expression in the form of
-    /// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces
-    /// that column with the given replace expression. Column name remains the same.
-    /// Multiple REPLACEs are also possible with comma separations.
-    fn replace_columns(
+    /// "REPLACE (some_column_within_an_expr AS some_column)", we should plan the
+    /// replace expressions first.
+    fn plan_wildcard_options(
         &self,
         plan: &LogicalPlan,
         empty_from: bool,
         planner_context: &mut PlannerContext,
-        mut exprs: Vec<Expr>,
-        replace: ReplaceSelectItem,
-    ) -> Result<Vec<Expr>> {
-        for expr in exprs.iter_mut() {
-            if let Expr::Column(Column { name, .. }) = expr {
-                if let Some(item) = replace
-                    .items
-                    .iter()
-                    .find(|item| item.column_name.value == *name)
-                {
-                    let new_expr = self.sql_select_to_rex(
+        options: WildcardAdditionalOptions,
+    ) -> Result<WildcardOptions> {
+        let planned_option = WildcardOptions {
+            ilike: options.opt_ilike,
+            exclude: options.opt_exclude,
+            except: options.opt_except,
+            replace: None,
+            rename: options.opt_rename,
+        };
+        if let Some(replace) = options.opt_replace {
+            let replace_expr = replace
+                .items
+                .iter()
+                .map(|item| {
+                    Ok(self.sql_select_to_rex(
                         SelectItem::UnnamedExpr(item.expr.clone()),
                         plan,
                         empty_from,
                         planner_context,
                     )?[0]
-                        .clone();
-                    *expr = Expr::Alias(Alias {
-                        expr: Box::new(new_expr),
-                        relation: None,
-                        name: name.clone(),
-                    });
-                }
-            }
+                        .clone())
+                })
+                .collect::<Result<Vec<_>>>()?;
+            let planned_replace = PlannedReplaceSelectItem {
+                items: replace.items.into_iter().map(|i| *i).collect(),
+                planned_expressions: replace_expr,
+            };
+            Ok(planned_option.with_replace(planned_replace))
+        } else {
+            Ok(planned_option)
         }
-        Ok(exprs)
     }
 
     /// Wrap a plan in a projection
@@ -715,7 +698,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let plan = LogicalPlanBuilder::from(input.clone())
             .aggregate(group_by_exprs.to_vec(), aggr_exprs.to_vec())?
             .build()?;
-
         let group_by_exprs = if let LogicalPlan::Aggregate(agg) = &plan {
             &agg.group_expr
         } else {
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index de130754ab1a..39511ea4d03a 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -21,11 +21,13 @@ use datafusion_expr::ScalarUDF;
 use sqlparser::ast::Value::SingleQuotedString;
 use sqlparser::ast::{
     self, BinaryOperator, Expr as AstExpr, Function, FunctionArg, Ident, Interval,
-    TimezoneInfo, UnaryOperator,
+    ObjectName, TimezoneInfo, UnaryOperator,
 };
 use std::sync::Arc;
 use std::{fmt::Display, vec};
 
+use super::dialect::{DateFieldExtractStyle, IntervalStyle};
+use super::Unparser;
 use arrow::datatypes::{Decimal128Type, Decimal256Type, DecimalType};
 use arrow::util::display::array_value_to_string;
 use arrow_array::types::{
@@ -44,9 +46,6 @@ use datafusion_expr::{
     Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
 };
 
-use super::dialect::{DateFieldExtractStyle, IntervalStyle};
-use super::Unparser;
-
 /// DataFusion's Exprs can represent either an `Expr` or an `OrderByExpr`
 pub enum Unparsed {
     // SQL Expression
@@ -159,7 +158,13 @@ impl Unparser<'_> {
                 let args = args
                     .iter()
                     .map(|e| {
-                        if matches!(e, Expr::Wildcard { qualifier: None }) {
+                        if matches!(
+                            e,
+                            Expr::Wildcard {
+                                qualifier: None,
+                                ..
+                            }
+                        ) {
                             Ok(FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard))
                         } else {
                             self.expr_to_sql_inner(e).map(|e| {
@@ -477,8 +482,15 @@ impl Unparser<'_> {
                     format: None,
                 })
             }
-            Expr::Wildcard { qualifier: _ } => {
-                not_impl_err!("Unsupported Expr conversion: {expr:?}")
+            // TODO: unparsing wildcard addition options
+            Expr::Wildcard { qualifier, .. } => {
+                if let Some(qualifier) = qualifier {
+                    let idents: Vec<Ident> =
+                        qualifier.to_vec().into_iter().map(Ident::new).collect();
+                    Ok(ast::Expr::QualifiedWildcard(ObjectName(idents)))
+                } else {
+                    Ok(ast::Expr::Wildcard)
+                }
             }
             Expr::GroupingSet(grouping_set) => match grouping_set {
                 GroupingSet::GroupingSets(grouping_sets) => {
@@ -643,7 +655,13 @@ impl Unparser<'_> {
     fn function_args_to_sql(&self, args: &[Expr]) -> Result<Vec<ast::FunctionArg>> {
         args.iter()
             .map(|e| {
-                if matches!(e, Expr::Wildcard { qualifier: None }) {
+                if matches!(
+                    e,
+                    Expr::Wildcard {
+                        qualifier: None,
+                        ..
+                    }
+                ) {
                     Ok(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard))
                 } else {
                     self.expr_to_sql(e)
@@ -1503,6 +1521,7 @@ mod tests {
     use arrow_schema::DataType::Int8;
     use ast::ObjectName;
     use datafusion_common::TableReference;
+    use datafusion_expr::expr::WildcardOptions;
     use datafusion_expr::{
         case, col, cube, exists, grouping_set, interval_datetime_lit,
         interval_year_month_lit, lit, not, not_exists, out_ref_col, placeholder, rollup,
@@ -1558,7 +1577,10 @@ mod tests {
     fn expr_to_sql_ok() -> Result<()> {
         let dummy_schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
         let dummy_logical_plan = table_scan(Some("t"), &dummy_schema, None)?
-            .project(vec![Expr::Wildcard { qualifier: None }])?
+            .project(vec![Expr::Wildcard {
+                qualifier: None,
+                options: WildcardOptions::default(),
+            }])?
             .filter(col("a").eq(lit(1)))?
             .build()?;
 
@@ -1749,7 +1771,10 @@ mod tests {
             (sum(col("a")), r#"sum(a)"#),
             (
                 count_udaf()
-                    .call(vec![Expr::Wildcard { qualifier: None }])
+                    .call(vec![Expr::Wildcard {
+                        qualifier: None,
+                        options: WildcardOptions::default(),
+                    }])
                     .distinct()
                     .build()
                     .unwrap(),
@@ -1757,7 +1782,10 @@ mod tests {
             ),
             (
                 count_udaf()
-                    .call(vec![Expr::Wildcard { qualifier: None }])
+                    .call(vec![Expr::Wildcard {
+                        qualifier: None,
+                        options: WildcardOptions::default(),
+                    }])
                     .filter(lit(true))
                     .build()
                     .unwrap(),
@@ -1833,11 +1861,11 @@ mod tests {
             (Expr::Negative(Box::new(col("a"))), r#"-a"#),
             (
                 exists(Arc::new(dummy_logical_plan.clone())),
-                r#"EXISTS (SELECT t.a FROM t WHERE (t.a = 1))"#,
+                r#"EXISTS (SELECT * FROM t WHERE (t.a = 1))"#,
             ),
             (
                 not_exists(Arc::new(dummy_logical_plan.clone())),
-                r#"NOT EXISTS (SELECT t.a FROM t WHERE (t.a = 1))"#,
+                r#"NOT EXISTS (SELECT * FROM t WHERE (t.a = 1))"#,
             ),
             (
                 try_cast(col("a"), DataType::Date64),
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 277efd5fe700..024f33fb2c7d 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -359,18 +359,14 @@ impl Unparser<'_> {
                             .iter()
                             .map(|e| self.select_item_to_sql(e))
                             .collect::<Result<Vec<_>>>()?;
-                        match &on.sort_expr {
-                            Some(sort_expr) => {
-                                if let Some(query_ref) = query {
-                                    query_ref
-                                        .order_by(self.sort_to_sql(sort_expr.clone())?);
-                                } else {
-                                    return internal_err!(
-                                "Sort operator only valid in a statement context."
-                            );
-                                }
+                        if let Some(sort_expr) = &on.sort_expr {
+                            if let Some(query_ref) = query {
+                                query_ref.order_by(self.sort_to_sql(sort_expr.clone())?);
+                            } else {
+                                return internal_err!(
+                                    "Sort operator only valid in a statement context."
+                                );
                             }
-                            None => {}
                         }
                         select.projection(items);
                         (ast::Distinct::On(exprs), on.input.as_ref())
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 179fc108e6d2..ed23fada0cfb 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -33,7 +33,7 @@ use datafusion_functions::core::planner::CoreFunctionPlanner;
 use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect};
 use sqlparser::parser::Parser;
 
-use crate::common::MockContextProvider;
+use crate::common::{MockContextProvider, MockSessionState};
 
 #[test]
 fn roundtrip_expr() {
@@ -59,8 +59,8 @@ fn roundtrip_expr() {
     let roundtrip = |table, sql: &str| -> Result<String> {
         let dialect = GenericDialect {};
         let sql_expr = Parser::new(&dialect).try_with_sql(sql)?.parse_expr()?;
-
-        let context = MockContextProvider::default().with_udaf(sum_udaf());
+        let state = MockSessionState::default().with_aggregate_function(sum_udaf());
+        let context = MockContextProvider { state };
         let schema = context.get_table_source(table)?.schema();
         let df_schema = DFSchema::try_from(schema.as_ref().clone())?;
         let sql_to_rel = SqlToRel::new(&context);
@@ -156,11 +156,11 @@ fn roundtrip_statement() -> Result<()> {
         let statement = Parser::new(&dialect)
             .try_with_sql(query)?
             .parse_statement()?;
-
-        let context = MockContextProvider::default()
-            .with_udaf(sum_udaf())
-            .with_udaf(count_udaf())
+        let state = MockSessionState::default()
+            .with_aggregate_function(sum_udaf())
+            .with_aggregate_function(count_udaf())
             .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+        let context = MockContextProvider { state };
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -189,8 +189,10 @@ fn roundtrip_crossjoin() -> Result<()> {
         .try_with_sql(query)?
         .parse_statement()?;
 
-    let context = MockContextProvider::default()
+    let state = MockSessionState::default()
         .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+
+    let context = MockContextProvider { state };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -412,10 +414,12 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
             .try_with_sql(query.sql)?
             .parse_statement()?;
 
-        let context = MockContextProvider::default()
-            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
-            .with_udaf(max_udaf())
-            .with_udaf(min_udaf());
+        let state = MockSessionState::default()
+            .with_aggregate_function(max_udaf())
+            .with_aggregate_function(min_udaf())
+            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
+
+        let context = MockContextProvider { state };
         let sql_to_rel = SqlToRel::new(&context);
         let plan = sql_to_rel
             .sql_statement_to_plan(statement)
@@ -443,7 +447,9 @@ fn test_unnest_logical_plan() -> Result<()> {
         .try_with_sql(query)?
         .parse_statement()?;
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider {
+        state: MockSessionState::default(),
+    };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
@@ -516,7 +522,9 @@ fn test_pretty_roundtrip() -> Result<()> {
 
     let df_schema = DFSchema::try_from(schema)?;
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider {
+        state: MockSessionState::default(),
+    };
     let sql_to_rel = SqlToRel::new(&context);
 
     let unparser = Unparser::default().with_pretty(true);
@@ -589,7 +597,9 @@ fn sql_round_trip(query: &str, expect: &str) {
         .parse_statement()
         .unwrap();
 
-    let context = MockContextProvider::default();
+    let context = MockContextProvider {
+        state: MockSessionState::default(),
+    };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index 374aa9db6714..fe0e5f7283a4 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -50,36 +50,40 @@ impl Display for MockCsvType {
 }
 
 #[derive(Default)]
-pub(crate) struct MockContextProvider {
-    options: ConfigOptions,
-    udfs: HashMap<String, Arc<ScalarUDF>>,
-    udafs: HashMap<String, Arc<AggregateUDF>>,
+pub(crate) struct MockSessionState {
+    scalar_functions: HashMap<String, Arc<ScalarUDF>>,
+    aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
     expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    pub config_options: ConfigOptions,
 }
 
-impl MockContextProvider {
-    // Suppressing dead code warning, as this is used in integration test crates
-    #[allow(dead_code)]
-    pub(crate) fn options_mut(&mut self) -> &mut ConfigOptions {
-        &mut self.options
+impl MockSessionState {
+    pub fn with_expr_planner(mut self, expr_planner: Arc<dyn ExprPlanner>) -> Self {
+        self.expr_planners.push(expr_planner);
+        self
     }
 
-    #[allow(dead_code)]
-    pub(crate) fn with_udf(mut self, udf: ScalarUDF) -> Self {
-        self.udfs.insert(udf.name().to_string(), Arc::new(udf));
+    pub fn with_scalar_function(mut self, scalar_function: Arc<ScalarUDF>) -> Self {
+        self.scalar_functions
+            .insert(scalar_function.name().to_string(), scalar_function);
         self
     }
 
-    pub(crate) fn with_udaf(mut self, udaf: Arc<AggregateUDF>) -> Self {
+    pub fn with_aggregate_function(
+        mut self,
+        aggregate_function: Arc<AggregateUDF>,
+    ) -> Self {
         // TODO: change to to_string() if all the function name is converted to lowercase
-        self.udafs.insert(udaf.name().to_lowercase(), udaf);
+        self.aggregate_functions.insert(
+            aggregate_function.name().to_string().to_lowercase(),
+            aggregate_function,
+        );
         self
     }
+}
 
-    pub(crate) fn with_expr_planner(mut self, planner: Arc<dyn ExprPlanner>) -> Self {
-        self.expr_planners.push(planner);
-        self
-    }
+pub(crate) struct MockContextProvider {
+    pub(crate) state: MockSessionState,
 }
 
 impl ContextProvider for MockContextProvider {
@@ -202,11 +206,11 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-        self.udfs.get(name).cloned()
+        self.state.scalar_functions.get(name).cloned()
     }
 
     fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
-        self.udafs.get(name).cloned()
+        self.state.aggregate_functions.get(name).cloned()
     }
 
     fn get_variable_type(&self, _: &[String]) -> Option<DataType> {
@@ -218,7 +222,7 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn options(&self) -> &ConfigOptions {
-        &self.options
+        &self.state.config_options
     }
 
     fn get_file_type(
@@ -237,11 +241,11 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn udf_names(&self) -> Vec<String> {
-        self.udfs.keys().cloned().collect()
+        self.state.scalar_functions.keys().cloned().collect()
     }
 
     fn udaf_names(&self) -> Vec<String> {
-        self.udafs.keys().cloned().collect()
+        self.state.aggregate_functions.keys().cloned().collect()
     }
 
     fn udwf_names(&self) -> Vec<String> {
@@ -249,7 +253,7 @@ impl ContextProvider for MockContextProvider {
     }
 
     fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
-        &self.expr_planners
+        &self.state.expr_planners
     }
 }
 
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 4d7e60805657..7ce3565fa29f 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -41,6 +41,7 @@ use datafusion_sql::{
     planner::{ParserOptions, SqlToRel},
 };
 
+use crate::common::MockSessionState;
 use datafusion_functions::core::planner::CoreFunctionPlanner;
 use datafusion_functions_aggregate::{
     approx_median::approx_median_udaf, count::count_udaf, min_max::max_udaf,
@@ -57,7 +58,7 @@ mod common;
 fn test_schema_support() {
     quick_test(
         "SELECT * FROM s1.test",
-        "Projection: s1.test.t_date32, s1.test.t_date64\
+        "Projection: *\
              \n  TableScan: s1.test",
     );
 }
@@ -516,7 +517,7 @@ fn plan_copy_to_query() {
     let plan = r#"
 CopyTo: format=csv output_url=output.csv options: ()
   Limit: skip=0, fetch=10
-    Projection: test_decimal.id, test_decimal.price
+    Projection: *
       TableScan: test_decimal
     "#
     .trim();
@@ -636,23 +637,13 @@ fn select_repeated_column() {
     );
 }
 
-#[test]
-fn select_wildcard_with_repeated_column() {
-    let sql = "SELECT *, age FROM person";
-    let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"person.age\" at position 3 and \"person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
-    );
-}
-
 #[test]
 fn select_wildcard_with_repeated_column_but_is_aliased() {
     quick_test(
-            "SELECT *, first_name AS fn from person",
-            "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀, person.first_name AS fn\
+        "SELECT *, first_name AS fn from person",
+        "Projection: *, person.first_name AS fn\
             \n  TableScan: person",
-        );
+    );
 }
 
 #[test]
@@ -869,7 +860,7 @@ fn where_selection_with_ambiguous_column() {
 #[test]
 fn natural_join() {
     let sql = "SELECT * FROM lineitem a NATURAL JOIN lineitem b";
-    let expected = "Projection: a.l_item_id, a.l_description, a.price\
+    let expected = "Projection: *\
                         \n  Inner Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price\
                         \n    SubqueryAlias: a\
                         \n      TableScan: lineitem\
@@ -905,7 +896,7 @@ fn natural_right_join() {
 #[test]
 fn natural_join_no_common_becomes_cross_join() {
     let sql = "SELECT * FROM person a NATURAL JOIN lineitem b";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, b.l_item_id, b.l_description, b.price\
+    let expected = "Projection: *\
                         \n  CrossJoin:\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -917,8 +908,7 @@ fn natural_join_no_common_becomes_cross_join() {
 #[test]
 fn using_join_multiple_keys() {
     let sql = "SELECT * FROM person a join person b using (id, age)";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.first_name, b.last_name, b.state, b.salary, b.birth_date, b.😀\
+    let expected = "Projection: *\
                         \n  Inner Join: Using a.id = b.id, a.age = b.age\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -932,8 +922,7 @@ fn using_join_multiple_keys_subquery() {
     let sql =
         "SELECT age FROM (SELECT * FROM person a join person b using (id, age, state))";
     let expected = "Projection: a.age\
-                        \n  Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.first_name, b.last_name, b.salary, b.birth_date, b.😀\
+                        \n  Projection: *\
                         \n    Inner Join: Using a.id = b.id, a.age = b.age, a.state = b.state\
                         \n      SubqueryAlias: a\
                         \n        TableScan: person\
@@ -945,8 +934,7 @@ fn using_join_multiple_keys_subquery() {
 #[test]
 fn using_join_multiple_keys_qualified_wildcard_select() {
     let sql = "SELECT a.* FROM person a join person b using (id, age)";
-    let expected =
-        "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀\
+    let expected = "Projection: a.*\
                         \n  Inner Join: Using a.id = b.id, a.age = b.age\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -958,8 +946,7 @@ fn using_join_multiple_keys_qualified_wildcard_select() {
 #[test]
 fn using_join_multiple_keys_select_all_columns() {
     let sql = "SELECT a.*, b.* FROM person a join person b using (id, age)";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.id, b.first_name, b.last_name, b.age, b.state, b.salary, b.birth_date, b.😀\
+    let expected = "Projection: a.*, b.*\
                         \n  Inner Join: Using a.id = b.id, a.age = b.age\
                         \n    SubqueryAlias: a\
                         \n      TableScan: person\
@@ -971,9 +958,7 @@ fn using_join_multiple_keys_select_all_columns() {
 #[test]
 fn using_join_multiple_keys_multiple_joins() {
     let sql = "SELECT * FROM person a join person b using (id, age, state) join person c using (id, age, state)";
-    let expected = "Projection: a.id, a.first_name, a.last_name, a.age, a.state, a.salary, a.birth_date, a.😀, \
-        b.first_name, b.last_name, b.salary, b.birth_date, b.😀, \
-        c.first_name, c.last_name, c.salary, c.birth_date, c.😀\
+    let expected = "Projection: *\
                         \n  Inner Join: Using a.id = c.id, a.age = c.age, a.state = c.state\
                         \n    Inner Join: Using a.id = b.id, a.age = b.age, a.state = b.state\
                         \n      SubqueryAlias: a\
@@ -1304,13 +1289,13 @@ fn select_binary_expr_nested() {
 fn select_wildcard_with_groupby() {
     quick_test(
             r#"SELECT * FROM person GROUP BY id, first_name, last_name, age, state, salary, birth_date, "😀""#,
-            "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+            "Projection: *\
              \n  Aggregate: groupBy=[[person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀]], aggr=[[]]\
              \n    TableScan: person",
         );
     quick_test(
             "SELECT * FROM (SELECT first_name, last_name FROM person) AS a GROUP BY first_name, last_name",
-            "Projection: a.first_name, a.last_name\
+            "Projection: *\
             \n  Aggregate: groupBy=[[a.first_name, a.last_name]], aggr=[[]]\
             \n    SubqueryAlias: a\
             \n      Projection: person.first_name, person.last_name\
@@ -1473,7 +1458,7 @@ fn recursive_ctes() {
         select * from numbers;";
     quick_test(
         sql,
-        "Projection: numbers.n\
+        "Projection: *\
     \n  SubqueryAlias: numbers\
     \n    RecursiveQuery: is_distinct=false\
     \n      Projection: Int64(1) AS n\
@@ -1495,8 +1480,9 @@ fn recursive_ctes_disabled() {
         select * from numbers;";
 
     // manually setting up test here so that we can disable recursive ctes
-    let mut context = MockContextProvider::default();
-    context.options_mut().execution.enable_recursive_ctes = false;
+    let mut state = MockSessionState::default();
+    state.config_options.execution.enable_recursive_ctes = false;
+    let context = MockContextProvider { state };
 
     let planner = SqlToRel::new_with_options(&context, ParserOptions::default());
     let result = DFParser::parse_sql_with_dialect(sql, &GenericDialect {});
@@ -1685,10 +1671,10 @@ fn select_aggregate_with_non_column_inner_expression_with_groupby() {
 #[test]
 fn test_wildcard() {
     quick_test(
-            "SELECT * from person",
-            "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+        "SELECT * from person",
+        "Projection: *\
             \n  TableScan: person",
-        );
+    );
 }
 
 #[test]
@@ -2116,7 +2102,7 @@ fn project_wildcard_on_join_with_using() {
             FROM lineitem \
             JOIN lineitem as lineitem2 \
             USING (l_item_id)";
-    let expected = "Projection: lineitem.l_item_id, lineitem.l_description, lineitem.price, lineitem2.l_description, lineitem2.price\
+    let expected = "Projection: *\
         \n  Inner Join: Using lineitem.l_item_id = lineitem2.l_item_id\
         \n    TableScan: lineitem\
         \n    SubqueryAlias: lineitem2\
@@ -2174,148 +2160,6 @@ fn union_all() {
     quick_test(sql, expected);
 }
 
-#[test]
-fn union_with_different_column_names() {
-    let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders";
-    let expected = "Union\
-            \n  Projection: orders.order_id\
-            \n    TableScan: orders\
-            \n  Projection: orders.customer_id AS order_id\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_values_with_no_alias() {
-    let sql = "SELECT 1, 2 UNION ALL SELECT 3, 4";
-    let expected = "Union\
-            \n  Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)\
-            \n    EmptyRelation\
-            \n  Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_incompatible_data_type() {
-    let sql = "SELECT interval '1 year 1 day' UNION ALL SELECT 1";
-    let err = logical_plan(sql)
-        .expect_err("query should have failed")
-        .strip_backtrace();
-    assert_eq!(
-       "Error during planning: UNION Column Int64(1) (type: Int64) is not compatible with column IntervalMonthDayNano(\"IntervalMonthDayNano { months: 12, days: 1, nanoseconds: 0 }\") (type: Interval(MonthDayNano))",
-       err
-    );
-}
-
-#[test]
-fn union_with_different_decimal_data_types() {
-    let sql = "SELECT 1 a UNION ALL SELECT 1.1 a";
-    let expected = "Union\
-            \n  Projection: CAST(Int64(1) AS Float64) AS a\
-            \n    EmptyRelation\
-            \n  Projection: Float64(1.1) AS a\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_null() {
-    let sql = "SELECT NULL a UNION ALL SELECT 1.1 a";
-    let expected = "Union\
-            \n  Projection: CAST(NULL AS Float64) AS a\
-            \n    EmptyRelation\
-            \n  Projection: Float64(1.1) AS a\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_float_and_string() {
-    let sql = "SELECT 'a' a UNION ALL SELECT 1.1 a";
-    let expected = "Union\
-            \n  Projection: Utf8(\"a\") AS a\
-            \n    EmptyRelation\
-            \n  Projection: CAST(Float64(1.1) AS Utf8) AS a\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_multiply_cols() {
-    let sql = "SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b";
-    let expected = "Union\
-            \n  Projection: Utf8(\"a\") AS a, CAST(Int64(1) AS Float64) AS b\
-            \n    EmptyRelation\
-            \n  Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b\
-            \n    EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn sorted_union_with_different_types_and_group_by() {
-    let sql = "SELECT a FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1";
-    let expected = "Sort: x.a ASC NULLS LAST\
-        \n  Union\
-        \n    Projection: CAST(x.a AS Float64) AS a\
-        \n      Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n        SubqueryAlias: x\
-        \n          Projection: Int64(1) AS a\
-        \n            EmptyRelation\
-        \n    Projection: x.a\
-        \n      Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n        SubqueryAlias: x\
-        \n          Projection: Float64(1.1) AS a\
-        \n            EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_binary_expr_and_cast() {
-    let sql = "SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)";
-    let expected = "Union\
-        \n  Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a\
-        \n    Aggregate: groupBy=[[CAST(Float64(0) + x.a AS Int32)]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Int64(1) AS a\
-        \n          EmptyRelation\
-        \n  Projection: Float64(2.1) + x.a AS Float64(0) + x.a\
-        \n    Aggregate: groupBy=[[Float64(2.1) + x.a]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Int64(1) AS a\
-        \n          EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_aliases() {
-    let sql = "SELECT a as a1 FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1)";
-    let expected = "Union\
-        \n  Projection: CAST(x.a AS Float64) AS a1\
-        \n    Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Int64(1) AS a\
-        \n          EmptyRelation\
-        \n  Projection: x.a AS a1\
-        \n    Aggregate: groupBy=[[x.a]], aggr=[[]]\
-        \n      SubqueryAlias: x\
-        \n        Projection: Float64(1.1) AS a\
-        \n          EmptyRelation";
-    quick_test(sql, expected);
-}
-
-#[test]
-fn union_with_incompatible_data_types() {
-    let sql = "SELECT 'a' a UNION ALL SELECT true a";
-    let err = logical_plan(sql)
-        .expect_err("query should have failed")
-        .strip_backtrace();
-    assert_eq!(
-        "Error during planning: UNION Column a (type: Boolean) is not compatible with column a (type: Utf8)",
-        err
-    );
-}
-
 #[test]
 fn empty_over() {
     let sql = "SELECT order_id, MAX(order_id) OVER () from orders";
@@ -2727,7 +2571,8 @@ fn logical_plan_with_options(sql: &str, options: ParserOptions) -> Result<Logica
 }
 
 fn logical_plan_with_dialect(sql: &str, dialect: &dyn Dialect) -> Result<LogicalPlan> {
-    let context = MockContextProvider::default().with_udaf(sum_udaf());
+    let state = MockSessionState::default().with_aggregate_function(sum_udaf());
+    let context = MockContextProvider { state };
     let planner = SqlToRel::new(&context);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);
     let mut ast = result?;
@@ -2739,39 +2584,44 @@ fn logical_plan_with_dialect_and_options(
     dialect: &dyn Dialect,
     options: ParserOptions,
 ) -> Result<LogicalPlan> {
-    let context = MockContextProvider::default()
-        .with_udf(unicode::character_length().as_ref().clone())
-        .with_udf(string::concat().as_ref().clone())
-        .with_udf(make_udf(
+    let state = MockSessionState::default()
+        .with_scalar_function(Arc::new(unicode::character_length().as_ref().clone()))
+        .with_scalar_function(Arc::new(string::concat().as_ref().clone()))
+        .with_scalar_function(Arc::new(make_udf(
             "nullif",
             vec![DataType::Int32, DataType::Int32],
             DataType::Int32,
-        ))
-        .with_udf(make_udf(
+        )))
+        .with_scalar_function(Arc::new(make_udf(
             "round",
             vec![DataType::Float64, DataType::Int64],
             DataType::Float32,
-        ))
-        .with_udf(make_udf(
+        )))
+        .with_scalar_function(Arc::new(make_udf(
             "arrow_cast",
             vec![DataType::Int64, DataType::Utf8],
             DataType::Float64,
-        ))
-        .with_udf(make_udf(
+        )))
+        .with_scalar_function(Arc::new(make_udf(
             "date_trunc",
             vec![DataType::Utf8, DataType::Timestamp(Nanosecond, None)],
             DataType::Int32,
-        ))
-        .with_udf(make_udf("sqrt", vec![DataType::Int64], DataType::Int64))
-        .with_udaf(sum_udaf())
-        .with_udaf(approx_median_udaf())
-        .with_udaf(count_udaf())
-        .with_udaf(avg_udaf())
-        .with_udaf(min_udaf())
-        .with_udaf(max_udaf())
-        .with_udaf(grouping_udaf())
+        )))
+        .with_scalar_function(Arc::new(make_udf(
+            "sqrt",
+            vec![DataType::Int64],
+            DataType::Int64,
+        )))
+        .with_aggregate_function(sum_udaf())
+        .with_aggregate_function(approx_median_udaf())
+        .with_aggregate_function(count_udaf())
+        .with_aggregate_function(avg_udaf())
+        .with_aggregate_function(min_udaf())
+        .with_aggregate_function(max_udaf())
+        .with_aggregate_function(grouping_udaf())
         .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
 
+    let context = MockContextProvider { state };
     let planner = SqlToRel::new_with_options(&context, options);
     let result = DFParser::parse_sql_with_dialect(sql, dialect);
     let mut ast = result?;
@@ -2997,7 +2847,7 @@ fn exists_subquery_wildcard() {
     let expected = "Projection: p.id\
         \n  Filter: EXISTS (<subquery>)\
         \n    Subquery:\
-        \n      Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+        \n      Projection: *\
         \n        Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)\
         \n          TableScan: person\
         \n    SubqueryAlias: p\
@@ -3084,13 +2934,13 @@ fn subquery_references_cte() {
         cte AS (SELECT * FROM person) \
         SELECT * FROM person WHERE EXISTS (SELECT * FROM cte WHERE id = person.id)";
 
-    let expected = "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+    let expected = "Projection: *\
         \n  Filter: EXISTS (<subquery>)\
         \n    Subquery:\
-        \n      Projection: cte.id, cte.first_name, cte.last_name, cte.age, cte.state, cte.salary, cte.birth_date, cte.😀\
+        \n      Projection: *\
         \n        Filter: cte.id = outer_ref(person.id)\
         \n          SubqueryAlias: cte\
-        \n            Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀\
+        \n            Projection: *\
         \n              TableScan: person\
         \n    TableScan: person";
 
@@ -3105,7 +2955,7 @@ fn cte_with_no_column_names() {
         ) \
         SELECT * FROM numbers;";
 
-    let expected = "Projection: numbers.a, numbers.b, numbers.c\
+    let expected = "Projection: *\
         \n  SubqueryAlias: numbers\
         \n    Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c\
         \n      EmptyRelation";
@@ -3121,7 +2971,7 @@ fn cte_with_column_names() {
         ) \
         SELECT * FROM numbers;";
 
-    let expected = "Projection: numbers.a, numbers.b, numbers.c\
+    let expected = "Projection: *\
         \n  SubqueryAlias: numbers\
         \n    Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c\
         \n      Projection: Int64(1), Int64(2), Int64(3)\
@@ -3139,7 +2989,7 @@ fn cte_with_column_aliases_precedence() {
         ) \
         SELECT * FROM numbers;";
 
-    let expected = "Projection: numbers.a, numbers.b, numbers.c\
+    let expected = "Projection: *\
         \n  SubqueryAlias: numbers\
         \n    Projection: x AS a, y AS b, z AS c\
         \n      Projection: Int64(1) AS x, Int64(2) AS y, Int64(3) AS z\
@@ -3520,7 +3370,7 @@ fn test_select_all_inner_join() {
             INNER JOIN orders \
             ON orders.customer_id * 2 = person.id + 10";
 
-    let expected = "Projection: person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀, orders.order_id, orders.customer_id, orders.o_item_id, orders.qty, orders.price, orders.delivered\
+    let expected = "Projection: *\
             \n  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)\
             \n    TableScan: person\
             \n    TableScan: orders";
@@ -4237,7 +4087,7 @@ fn test_prepare_statement_to_plan_value_list() {
     let sql = "PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter);";
 
     let expected_plan = "Prepare: \"my_plan\" [Utf8, Utf8] \
-        \n  Projection: t.num, t.letter\
+        \n  Projection: *\
         \n    SubqueryAlias: t\
         \n      Projection: column1 AS num, column2 AS letter\
         \n        Values: (Int64(1), $1), (Int64(2), $2)";
@@ -4252,7 +4102,7 @@ fn test_prepare_statement_to_plan_value_list() {
         ScalarValue::from("a".to_string()),
         ScalarValue::from("b".to_string()),
     ];
-    let expected_plan = "Projection: t.num, t.letter\
+    let expected_plan = "Projection: *\
         \n  SubqueryAlias: t\
         \n    Projection: column1 AS num, column2 AS letter\
         \n      Values: (Int64(1), Utf8(\"a\")), (Int64(2), Utf8(\"b\"))";
@@ -4302,7 +4152,7 @@ fn test_table_alias() {
           (select age from person) t2 \
         ) as f";
 
-    let expected = "Projection: f.id, f.age\
+    let expected = "Projection: *\
         \n  SubqueryAlias: f\
         \n    CrossJoin:\
         \n      SubqueryAlias: t1\
@@ -4319,7 +4169,7 @@ fn test_table_alias() {
           (select age from person) t2 \
         ) as f (c1, c2)";
 
-    let expected = "Projection: f.c1, f.c2\
+    let expected = "Projection: *\
         \n  SubqueryAlias: f\
         \n    Projection: t1.id AS c1, t2.age AS c2\
         \n      CrossJoin:\
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
index afd0a241ca5e..5c24b49cfe86 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs
@@ -15,10 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
 use std::{path::PathBuf, time::Duration};
 
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
+use datafusion::physical_plan::common::collect;
+use datafusion::physical_plan::execute_stream;
 use datafusion::prelude::SessionContext;
 use log::info;
 use sqllogictest::DBOutput;
@@ -69,9 +72,12 @@ impl sqllogictest::AsyncDB for DataFusion {
 
 async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DFOutput> {
     let df = ctx.sql(sql.into().as_str()).await?;
+    let task_ctx = Arc::new(df.task_ctx());
+    let plan = df.create_physical_plan().await?;
 
-    let types = normalize::convert_schema_to_types(df.schema().fields());
-    let results: Vec<RecordBatch> = df.collect().await?;
+    let stream = execute_stream(plan, task_ctx)?;
+    let types = normalize::convert_schema_to_types(stream.schema().fields());
+    let results: Vec<RecordBatch> = collect(stream).await?;
     let rows = normalize::convert_batches(results)?;
 
     if rows.is_empty() && types.is_empty() {
diff --git a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
index 6c0cf5f800d8..ba378f4230f8 100644
--- a/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
+++ b/datafusion/sqllogictest/test_files/aggregate_skip_partial.slt
@@ -209,6 +209,21 @@ SELECT c2, sum(c3), sum(c11) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2;
 4 29 9.531112968922
 5 -194 7.074412226677
 
+# Test avg for tinyint / float
+query TRR
+SELECT
+  c1,
+  avg(c2),
+  avg(c11)
+FROM aggregate_test_100 GROUP BY c1 ORDER BY c1;
+----
+a 2.857142857143 0.438223421574
+b 3.263157894737 0.496481208425
+c 2.666666666667 0.425241138254
+d 2.444444444444 0.541519476308
+e 3 0.505440263521
+
+
 # Enabling PG dialect for filtered aggregates tests
 statement ok
 set datafusion.sql_parser.dialect = 'Postgres';
@@ -267,6 +282,20 @@ FROM aggregate_test_100_null GROUP BY c2 ORDER BY c2;
 4 11 14
 5 8 7
 
+# Test avg for tinyint / float
+query TRR
+SELECT
+  c1,
+  avg(c2) FILTER (WHERE c2 != 5),
+  avg(c11) FILTER (WHERE c2 != 5)
+FROM aggregate_test_100 GROUP BY c1 ORDER BY c1;
+----
+a 2.5 0.449071887467    
+b 2.642857142857 0.445486298629
+c 2.421052631579 0.422882117723
+d 2.125 0.518706191331
+e 2.789473684211 0.536785323369
+
 # Test count with nullable fields and nullable filter
 query III
 SELECT c2,
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index d16b79734c62..0e977666ccfd 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -23,7 +23,7 @@ select coalesce(1, 2, 3);
 1
 
 # test with first null
-query ?T
+query IT
 select coalesce(null, 3, 2, 1), arrow_typeof(coalesce(null, 3, 2, 1));
 ----
 3 Int64
@@ -35,7 +35,7 @@ select coalesce(null, null);
 NULL
 
 # cast to float
-query IT
+query RT
 select
   coalesce(1, 2.0),
   arrow_typeof(coalesce(1, 2.0))
@@ -51,7 +51,7 @@ select
 ----
 2 Float64
 
-query IT
+query RT
 select
   coalesce(1, arrow_cast(2.0, 'Float32')),
   arrow_typeof(coalesce(1, arrow_cast(2.0, 'Float32')))
@@ -177,7 +177,7 @@ select
 2 Decimal256(22, 2)
 
 # coalesce string
-query T?
+query TT
 select
   coalesce('', 'test'),
   coalesce(null, 'test');
@@ -246,7 +246,7 @@ drop table test1
 statement ok
 create table t(c varchar) as values ('a'), (null);
 
-query TT
+query ?T
 select 
   coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')),
   arrow_typeof(coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')))
@@ -295,7 +295,7 @@ statement ok
 drop table t;
 
 # test dict(int32, int8)
-query I
+query ?
 select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
 ----
 34
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index ff7040926caa..ebb3ca2173b8 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -20,13 +20,13 @@ statement ok
 create table source_table(col1 integer, col2 varchar) as values (1, 'Foo'), (2, 'Bar');
 
 # Copy to directory as multiple files
-query IT
+query I
 COPY source_table TO 'test_files/scratch/copy/table/' STORED AS parquet OPTIONS ('format.compression' 'zstd(10)');
 ----
 2
 
 # Copy to directory as partitioned files
-query IT
+query I
 COPY source_table TO 'test_files/scratch/copy/partitioned_table1/' STORED AS parquet PARTITIONED BY (col2) OPTIONS ('format.compression' 'zstd(10)');
 ----
 2
@@ -53,7 +53,7 @@ select * from validate_partitioned_parquet_bar order by col1;
 2
 
 # Copy to directory as partitioned files
-query ITT
+query I
 COPY (values (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table2/' STORED AS parquet PARTITIONED BY (column2, column3)
 OPTIONS ('format.compression' 'zstd(10)');
 ----
@@ -81,7 +81,7 @@ select * from validate_partitioned_parquet_a_x order by column1;
 1
 
 # Copy to directory as partitioned files
-query TTT
+query I
 COPY (values ('1', 'a', 'x'), ('2', 'b', 'y'), ('3', 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table3/' STORED AS parquet PARTITIONED BY (column1, column3)
 OPTIONS ('format.compression' 'zstd(10)');
 ----
@@ -167,7 +167,7 @@ physical_plan
 02)--MemoryExec: partitions=1, partition_sizes=[1]
 
 # Copy to directory as partitioned files with keep_partition_by_columns enabled
-query TT
+query I
 COPY (values ('1', 'a'), ('2', 'b'), ('3', 'c')) TO 'test_files/scratch/copy/partitioned_table4/' STORED AS parquet PARTITIONED BY (column1)
 OPTIONS (execution.keep_partition_by_columns true);
 ----
@@ -184,7 +184,7 @@ select column1, column2 from validate_partitioned_parquet4 order by column1,colu
 1 a
 
 # Copy more files to directory via query
-query IT
+query I
 COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/copy/table/' STORED AS PARQUET;
 ----
 4
@@ -203,7 +203,7 @@ select * from validate_parquet;
 1 Foo
 2 Bar
 
-query ?
+query I
 copy (values (struct(timestamp '2021-01-01 01:00:01', 1)), (struct(timestamp '2022-01-01 01:00:01', 2)), 
 (struct(timestamp '2023-01-03 01:00:01', 3)), (struct(timestamp '2024-01-01 01:00:01', 4)))
 to 'test_files/scratch/copy/table_nested2/' STORED AS PARQUET;
@@ -221,7 +221,7 @@ select * from validate_parquet_nested2;
 {c0: 2023-01-03T01:00:01, c1: 3}
 {c0: 2024-01-01T01:00:01, c1: 4}
 
-query ??
+query I
 COPY 
 (values (struct ('foo', (struct ('foo', make_array(struct('a',1), struct('b',2))))), make_array(timestamp '2023-01-01 01:00:01',timestamp '2023-01-01 01:00:01')), 
 (struct('bar', (struct ('foo', make_array(struct('aa',10), struct('bb',20))))), make_array(timestamp '2024-01-01 01:00:01', timestamp '2024-01-01 01:00:01'))) 
@@ -239,7 +239,7 @@ select * from validate_parquet_nested;
 {c0: foo, c1: {c0: foo, c1: [{c0: a, c1: 1}, {c0: b, c1: 2}]}} [2023-01-01T01:00:01, 2023-01-01T01:00:01]
 {c0: bar, c1: {c0: foo, c1: [{c0: aa, c1: 10}, {c0: bb, c1: 20}]}} [2024-01-01T01:00:01, 2024-01-01T01:00:01]
 
-query ?
+query I
 copy (values ([struct('foo', 1), struct('bar', 2)])) 
 to 'test_files/scratch/copy/array_of_struct/'
 STORED AS PARQUET;
@@ -255,7 +255,7 @@ select * from validate_array_of_struct;
 ----
 [{c0: foo, c1: 1}, {c0: bar, c1: 2}]
 
-query ?
+query I
 copy (values (struct('foo', [1,2,3], struct('bar', [2,3,4])))) 
 to 'test_files/scratch/copy/struct_with_array/' STORED AS PARQUET;
 ----
@@ -272,7 +272,7 @@ select * from validate_struct_with_array;
 
 
 # Copy parquet with all supported statement overrides
-query IT
+query I
 COPY source_table
 TO 'test_files/scratch/copy/table_with_options/'
 STORED AS PARQUET
@@ -378,7 +378,7 @@ select * from validate_parquet_with_options;
 2 Bar
 
 # Copy from table to single file
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table.parquet';
 ----
 2
@@ -394,7 +394,7 @@ select * from validate_parquet_single;
 2 Bar
 
 # copy from table to folder of compressed json files
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table_json_gz' STORED AS JSON OPTIONS ('format.compression' gzip);
 ----
 2
@@ -410,7 +410,7 @@ select * from validate_json_gz;
 2 Bar
 
 # copy from table to folder of compressed csv files
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table_csv' STORED AS CSV OPTIONS ('format.has_header' false, 'format.compression' gzip);
 ----
 2
@@ -426,7 +426,7 @@ select * from validate_csv;
 2 Bar
 
 # Copy from table to single csv
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table.csv';
 ----
 2
@@ -442,7 +442,7 @@ select * from validate_single_csv;
 2 Bar
 
 # Copy from table to folder of json
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table_json' STORED AS JSON;
 ----
 2
@@ -458,7 +458,7 @@ select * from validate_json;
 2 Bar
 
 # Copy from table to single json file
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/table.json' STORED AS JSON ;
 ----
 2
@@ -474,7 +474,7 @@ select * from validate_single_json;
 2 Bar
 
 # COPY csv files with all options set
-query IT
+query I
 COPY source_table
 to 'test_files/scratch/copy/table_csv_with_options'
 STORED AS CSV OPTIONS (
@@ -499,7 +499,7 @@ select * from validate_csv_with_options;
 2;Bar
 
 # Copy from table to single arrow file
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table.arrow' STORED AS ARROW;
 ----
 2
@@ -517,7 +517,7 @@ select * from validate_arrow_file;
 2 Bar
 
 # Copy from dict encoded values to single arrow file
-query T?
+query I
 COPY (values 
 ('c', arrow_cast('foo', 'Dictionary(Int32, Utf8)')), ('d', arrow_cast('bar', 'Dictionary(Int32, Utf8)'))) 
 to 'test_files/scratch/copy/table_dict.arrow' STORED AS ARROW;
@@ -538,7 +538,7 @@ d bar
 
 
 # Copy from table to folder of json
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/table_arrow' STORED AS ARROW;
 ----
 2
@@ -556,7 +556,7 @@ select * from validate_arrow;
 # Format Options Support without the 'format.' prefix
 
 # Copy with format options for Parquet without the 'format.' prefix
-query IT
+query I
 COPY source_table TO 'test_files/scratch/copy/format_table.parquet'
 OPTIONS (
     compression snappy,
@@ -566,14 +566,14 @@ OPTIONS (
 2
 
 # Copy with format options for JSON without the 'format.' prefix
-query IT
+query I
 COPY source_table  to 'test_files/scratch/copy/format_table'
 STORED AS JSON OPTIONS (compression gzip);
 ----
 2
 
 # Copy with format options for CSV without the 'format.' prefix
-query IT
+query I
 COPY source_table to 'test_files/scratch/copy/format_table.csv'
 OPTIONS (
     has_header false,
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index f7f5aa54dd0d..3fb9a6f20c24 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -115,14 +115,14 @@ CREATE TABLE src_table_2 (
 (7, 'ggg', 700, 2),
 (8, 'hhh', 800, 2);
 
-query ITII
+query I
 COPY  src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv'
 STORED AS CSV;
 ----
 4
 
 
-query ITII
+query I
 COPY  src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv'
 STORED AS CSV;
 ----
@@ -175,7 +175,7 @@ CREATE TABLE table_with_necessary_quoting (
 (4, 'h|h|h');
 
 # quote is required because `|` is delimiter and part of the data
-query IT
+query I
 COPY table_with_necessary_quoting TO 'test_files/scratch/csv_files/table_with_necessary_quoting.csv'
 STORED AS csv
 OPTIONS ('format.quote' '~',
@@ -247,7 +247,7 @@ id2 "value2"
 id3 "value3"
 
 # ensure that double quote option is used when writing to csv
-query TT
+query I
 COPY csv_with_double_quote TO 'test_files/scratch/csv_files/table_with_double_quotes.csv'
 STORED AS csv
 OPTIONS ('format.double_quote' 'true');
@@ -271,7 +271,7 @@ id2 "value2"
 id3 "value3"
 
 # ensure when double quote option is disabled that quotes are escaped instead
-query TT
+query I
 COPY csv_with_double_quote TO 'test_files/scratch/csv_files/table_with_escaped_quotes.csv'
 STORED AS csv
 OPTIONS ('format.double_quote' 'false', 'format.escape' '#');
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index eae4f428b4b4..1e8850efadff 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -177,6 +177,7 @@ initial_logical_plan
 01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
 02)--TableScan: simple_explain_test
 logical_plan after inline_table_scan SAME TEXT AS ABOVE
+logical_plan after expand_wildcard_rule SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 logical_plan after count_wildcard_rule SAME TEXT AS ABOVE
 analyzed_logical_plan SAME TEXT AS ABOVE
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index 1d5f9ba23d58..3c3b0631e3ff 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -22,7 +22,7 @@ SELECT true, false, false = false, true = false
 true false true false
 
 # test_mathematical_expressions_with_null
-query RRRRRRRRRRRRRRRRRR?RRRRRRRIRRRRRRBB
+query RRRRRRRRRRRRRRRRRR?RRRRRIIIRRRRRRBB
 SELECT
     sqrt(NULL),
     cbrt(NULL),
@@ -365,7 +365,7 @@ SELECT bit_length('josé')
 ----
 40
 
-query ?
+query I
 SELECT bit_length(NULL)
 ----
 NULL
@@ -395,7 +395,7 @@ SELECT btrim('\nxyxtrimyyx\n', 'xyz\n')
 ----
 trim
 
-query ?
+query T
 SELECT btrim(NULL, 'xyz')
 ----
 NULL
@@ -476,7 +476,7 @@ SELECT initcap('hi THOMAS')
 ----
 Hi Thomas
 
-query ?
+query T
 SELECT initcap(NULL)
 ----
 NULL
@@ -491,7 +491,7 @@ SELECT lower('TOM')
 ----
 tom
 
-query ?
+query T
 SELECT lower(NULL)
 ----
 NULL
@@ -511,7 +511,7 @@ SELECT ltrim('zzzytest', 'xyz')
 ----
 test
 
-query ?
+query T
 SELECT ltrim(NULL, 'xyz')
 ----
 NULL
@@ -531,7 +531,7 @@ SELECT octet_length('josé')
 ----
 5
 
-query ?
+query I
 SELECT octet_length(NULL)
 ----
 NULL
@@ -551,7 +551,7 @@ SELECT repeat('Pg', CAST(NULL AS INT))
 ----
 NULL
 
-query ?
+query T
 SELECT repeat(NULL, 4)
 ----
 NULL
@@ -576,7 +576,7 @@ SELECT replace('abcdefabcdef', NULL, 'XX')
 ----
 NULL
 
-query ?
+query T
 SELECT replace(NULL, 'cd', 'XX')
 ----
 NULL
@@ -596,7 +596,7 @@ SELECT rtrim('testxxzx', 'xyz')
 ----
 test
 
-query ?
+query T
 SELECT rtrim(NULL, 'xyz')
 ----
 NULL
@@ -611,7 +611,7 @@ SELECT split_part('abc~@~def~@~ghi', '~@~', 20)
 ----
 (empty)
 
-query ?
+query T
 SELECT split_part(NULL, '~@~', 20)
 ----
 NULL
@@ -788,7 +788,7 @@ SELECT upper('tom')
 ----
 TOM
 
-query ?
+query T
 SELECT upper(NULL)
 ----
 NULL
@@ -1774,7 +1774,7 @@ SELECT arrow_cast(decode(arrow_cast('746f6d', 'LargeBinary'),'hex'), 'Utf8');
 ----
 tom
 
-query ?
+query T
 SELECT encode(NULL,'base64');
 ----
 NULL
@@ -1784,7 +1784,7 @@ SELECT decode(NULL,'base64');
 ----
 NULL
 
-query ?
+query T
 SELECT encode(NULL,'hex');
 ----
 NULL
@@ -1829,7 +1829,7 @@ SELECT md5('');
 ----
 d41d8cd98f00b204e9800998ecf8427e
 
-query ?
+query T
 SELECT md5(NULL);
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index 8a4855ea2c05..f728942b38c3 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -33,7 +33,7 @@ SELECT char_length('josé')
 ----
 4
 
-query ?
+query I
 SELECT char_length(NULL)
 ----
 NULL
@@ -53,7 +53,7 @@ SELECT character_length('josé')
 ----
 4
 
-query ?
+query I
 SELECT character_length(NULL)
 ----
 NULL
@@ -93,12 +93,12 @@ SELECT left('abcde', CAST(NULL AS INT))
 ----
 NULL
 
-query ?
+query T
 SELECT left(NULL, 2)
 ----
 NULL
 
-query ?
+query T
 SELECT left(NULL, CAST(NULL AS INT))
 ----
 NULL
@@ -128,7 +128,7 @@ SELECT length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
 ----
 4
 
-query ?
+query I
 SELECT length(NULL)
 ----
 NULL
@@ -193,12 +193,12 @@ SELECT lpad('xyxhi', 3)
 ----
 xyx
 
-query ?
+query T
 SELECT lpad(NULL, 0)
 ----
 NULL
 
-query ?
+query T
 SELECT lpad(NULL, 5, 'xy')
 ----
 NULL
@@ -244,7 +244,7 @@ SELECT reverse('loẅks')
 ----
 sk̈wol
 
-query ?
+query T
 SELECT reverse(NULL)
 ----
 NULL
@@ -284,12 +284,12 @@ SELECT right('abcde', CAST(NULL AS INT))
 ----
 NULL
 
-query ?
+query T
 SELECT right(NULL, 2)
 ----
 NULL
 
-query ?
+query T
 SELECT right(NULL, CAST(NULL AS INT))
 ----
 NULL
@@ -374,7 +374,7 @@ SELECT strpos('joséésoj', 'abc')
 ----
 0
 
-query ?
+query I
 SELECT strpos(NULL, 'abc')
 ----
 NULL
@@ -455,7 +455,7 @@ SELECT translate(arrow_cast('12345', 'Dictionary(Int32, Utf8)'), '143', 'ax')
 ----
 a2x5
 
-query ?
+query T
 SELECT translate(NULL, '143', 'ax')
 ----
 NULL
@@ -949,12 +949,12 @@ SELECT levenshtein('kitten', NULL)
 ----
 NULL
 
-query ?
+query I
 SELECT levenshtein(NULL, 'sitting')
 ----
 NULL
 
-query ?
+query I
 SELECT levenshtein(NULL, NULL)
 ----
 NULL
@@ -1041,7 +1041,7 @@ arrow 1 arrow
 arrow 2 arrow
 
 # Test substring_index with NULL values
-query ?TT?
+query TTTT
 SELECT
   substring_index(NULL, '.', 1),
   substring_index('arrow.apache.org', NULL, 1),
@@ -1092,7 +1092,7 @@ docs.apache.com docs com
 community.influxdata.com community com
 arrow.apache.org arrow org
 
-
+# find_in_set tests
 query I
 SELECT find_in_set('b', 'a,b,c,d')
 ----
@@ -1120,7 +1120,7 @@ SELECT find_in_set('', '')
 ----
 1
 
-query ?
+query I
 SELECT find_in_set(NULL, 'a,b,c,d')
 ----
 NULL
@@ -1131,11 +1131,28 @@ SELECT find_in_set('a', NULL)
 NULL
 
 
-query ?
+query I
 SELECT find_in_set(NULL, NULL)
 ----
 NULL
 
+# find_in_set tests with utf8view
+query I
+SELECT find_in_set(arrow_cast('b', 'Utf8View'), 'a,b,c,d')
+----
+2
+
+
+query I
+SELECT find_in_set('a', arrow_cast('a,b,c,d,a', 'Utf8View'))
+----
+1
+
+query I
+SELECT find_in_set(arrow_cast('', 'Utf8View'), arrow_cast('a,b,c,d,a', 'Utf8View'))
+----
+0
+
 # Verify that multiple calls to volatile functions like `random()` are not combined / optimized away
 query B
 SELECT r FROM (SELECT r1 == r2 r, r1, r2 FROM (SELECT random()+1 r1, random()+1 r2) WHERE r1 > 0 AND r2 > 0)
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index a4a886c75a77..5571315e2acc 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4502,28 +4502,28 @@ CREATE TABLE src_table (
 ('2020-12-19T00:00:00.00Z', 9);
 
 # Use src_table to create a partitioned file
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/0.csv'
 STORED AS CSV;
 ----
 10
 
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/1.csv'
 STORED AS CSV;
 ----
 10
 
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/2.csv'
 STORED AS CSV;
 ----
 10
 
-query PI
+query I
 COPY (SELECT * FROM src_table)
 TO 'test_files/scratch/group_by/timestamp_table/3.csv'
 STORED AS CSV;
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index 4cdd40ac8c34..439df7fede51 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -518,7 +518,7 @@ drop table aggregate_test_100;
 ## Test limit pushdown in StreamingTableExec
 
 ## Create sorted table with 5 rows
-query IT
+query I
 COPY (select * from (values
    (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')
 )) TO 'test_files/scratch/limit/data.csv' STORED AS CSV;
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index eb350c22bb5d..0dc37c68bca4 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -199,25 +199,50 @@ SELECT MAP(arrow_cast(make_array('POST', 'HEAD', 'PATCH'), 'LargeList(Utf8)'), a
 
 statement ok
 create table t as values
-('a', 1, 'k1', 10, ['k1', 'k2'], [1, 2]),
-('b', 2, 'k3', 30, ['k3'], [3]),
-('d', 4, 'k5', 50, ['k5'], [5]);
+('a', 1, 'k1', 10, ['k1', 'k2'], [1, 2], 'POST', [[1,2,3]], ['a']),
+('b', 2, 'k3', 30, ['k3'], [3], 'PUT', [[4]], ['b']),
+('d', 4, 'k5', 50, ['k5'], [5], null, [[1,2]], ['c']);
 
-query error
+query ?
 SELECT make_map(column1, column2, column3, column4) FROM t;
-# TODO: support array value
-# ----
-# {a: 1, k1: 10}
-# {b: 2, k3: 30}
-# {d: 4, k5: 50}
+----
+{a: 1, k1: 10}
+{b: 2, k3: 30}
+{d: 4, k5: 50}
 
-query error
+query ?
 SELECT map(column5, column6) FROM t;
-# TODO: support array value
-# ----
-# {k1:1, k2:2}
-# {k3: 3}
-# {k5: 5}
+----
+{k1: 1, k2: 2}
+{k3: 3}
+{k5: 5}
+
+query ?
+SELECT map(column8, column9) FROM t;
+----
+{[1, 2, 3]: a}
+{[4]: b}
+{[1, 2]: c}
+
+query error
+SELECT map(column6, column7) FROM t;
+
+query ?
+select Map {column6: column7} from t;
+----
+{[1, 2]: POST}
+{[3]: PUT}
+{[5]: }
+
+query ?
+select Map {column8: column7} from t;
+----
+{[[1, 2, 3]]: POST}
+{[[4]]: PUT}
+{[[1, 2]]: }
+
+query error
+select Map {column7: column8} from t;
 
 query ?
 SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', 30, 'OPTION', 29, 'GET', 27, 'PUT', 25, 'DELETE', 24) AS method_count from t;
diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt
index c77214cc302a..81e79e1eb5b0 100644
--- a/datafusion/sqllogictest/test_files/nvl.slt
+++ b/datafusion/sqllogictest/test_files/nvl.slt
@@ -114,7 +114,7 @@ SELECT NVL(1, 3);
 ----
 1
 
-query ?
+query I
 SELECT NVL(NULL, NULL);
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 3342f85c8141..34d4ed6ff284 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -42,7 +42,7 @@ CREATE TABLE src_table (
 # Setup 2 files, i.e., as many as there are partitions:
 
 # File 1:
-query ITID
+query I
 COPY (SELECT * FROM src_table LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/0.parquet'
 STORED AS PARQUET;
@@ -50,7 +50,7 @@ STORED AS PARQUET;
 3
 
 # File 2:
-query ITID
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 3 LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/1.parquet'
 STORED AS PARQUET;
@@ -123,7 +123,7 @@ physical_plan
 02)--ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet]]}, projection=[int_col, string_col], output_ordering=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST]
 
 # Add another file to the directory underlying test_table
-query ITID
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 6 LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/2.parquet'
 STORED AS PARQUET;
@@ -251,31 +251,29 @@ SELECT COUNT(*) FROM timestamp_with_tz;
 ----
 131072
 
-# FIXME(#TODO) fails with feature `force_hash_collisions`
-# https://github.com/apache/datafusion/issues/11660
 # Perform the query:
-# query IPT
-# SELECT
-#   count,
-#   LAG(timestamp, 1) OVER (ORDER BY timestamp),
-#   arrow_typeof(LAG(timestamp, 1) OVER (ORDER BY timestamp))
-# FROM timestamp_with_tz
-# LIMIT 10;
-# ----
-# 0 NULL Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 4 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 14 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
-# 0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+query IPT
+SELECT
+  count,
+  LAG(timestamp, 1) OVER (ORDER BY timestamp),
+  arrow_typeof(LAG(timestamp, 1) OVER (ORDER BY timestamp))
+FROM timestamp_with_tz
+LIMIT 10;
+----
+0 NULL Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+4 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+14 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
+0 2014-08-27T14:00:00Z Timestamp(Millisecond, Some("UTC"))
 
 # Test config listing_table_ignore_subdirectory:
 
-query ITID
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 6 LIMIT 3)
 TO 'test_files/scratch/parquet/test_table/subdir/3.parquet'
 STORED AS PARQUET;
diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
index f7a81f08456f..b68d4f52d21c 100644
--- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
+++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
@@ -56,7 +56,7 @@ CREATE TABLE src_table (
 # Setup 3 files, in particular more files than there are partitions
 
 # File 1:
-query IITIDII
+query I
 COPY (SELECT * FROM src_table ORDER BY int_col LIMIT 3)
 TO 'test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet'
 STORED AS PARQUET;
@@ -64,7 +64,7 @@ STORED AS PARQUET;
 3
 
 # File 2:
-query IITIDII
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 3 ORDER BY int_col LIMIT 3)
 TO 'test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet'
 STORED AS PARQUET;
@@ -72,7 +72,7 @@ STORED AS PARQUET;
 3
 
 # Add another file to the directory underlying test_table
-query IITIDII
+query I
 COPY (SELECT * FROM src_table WHERE int_col > 6 ORDER BY int_col LIMIT 3)
 TO 'test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet'
 STORED AS PARQUET;
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index aa99a54c26ee..149ad7f6fdcd 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -94,7 +94,7 @@ SELECT regexp_like('aa', '.*-(\d)');
 ----
 false
 
-query ?
+query B
 SELECT regexp_like(NULL, '.*-(\d)');
 ----
 NULL
@@ -104,7 +104,7 @@ SELECT regexp_like('aaa-0', NULL);
 ----
 NULL
 
-query ?
+query B
 SELECT regexp_like(null, '.*-(\d)');
 ----
 NULL
@@ -294,7 +294,7 @@ SELECT regexp_replace('Thomas', '.[mN]a.', 'M');
 ----
 ThM
 
-query ?
+query T
 SELECT regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g');
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 188a2c5863e6..6eed72e914bd 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -706,7 +706,7 @@ select power(2, 0), power(2, 1), power(2, 2);
 1 2 4
 
 # power scalar nulls
-query R rowsort
+query I rowsort
 select power(null, 64);
 ----
 NULL
@@ -718,7 +718,7 @@ select power(2, null);
 NULL
 
 # power scalar nulls #2
-query R rowsort
+query I rowsort
 select power(null, null);
 ----
 NULL
@@ -1720,7 +1720,7 @@ CREATE TABLE test(
 (-14, -14, -14.5, -14.5),
 (NULL, NULL, NULL, NULL);
 
-query RRRRIR rowsort
+query IRRRIR rowsort
 SELECT power(i32, exp_i) as power_i32,
        power(i64, exp_f) as power_i64,
        pow(f32, exp_i) as power_f32,
@@ -1895,7 +1895,7 @@ select 100000 where position('legend' in 'league of legend') = 11;
 100000
 
 # test null
-query ?
+query I
 select position(null in null)
 ----
 NULL
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index f217cbab074f..49a18ca09de4 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1195,12 +1195,12 @@ LIMIT 5
 200 2000
 
 # Trying to exclude non-existing column should give error
-statement error DataFusion error: Schema error: No field named e. Valid fields are table1.a, table1.b, table1.c, table1.d.
+statement error
 SELECT * EXCLUDE e
 FROM table1
 
 # similarly, except should raise error if excluded column is not in the table
-statement error DataFusion error: Schema error: No field named e. Valid fields are table1.a, table1.b, table1.c, table1.d.
+statement error
 SELECT * EXCEPT(e)
 FROM table1
 
@@ -1214,7 +1214,7 @@ FROM table1
 2 20 20 200 2000
 
 # EXCEPT, or EXCLUDE shouldn't contain duplicate column names
-statement error DataFusion error: Error during planning: EXCLUDE or EXCEPT contains duplicate column names
+statement error
 SELECT * EXCLUDE(a, a)
 FROM table1
 
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index dcc6784bf44a..264f85ff84b9 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -425,6 +425,50 @@ logical_plan
 01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
 02)--TableScan: test projection=[column1_utf8view]
 
+### Initcap
+
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: initcap(test.column1_utf8view) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+# Create a table with lowercase strings
+statement ok
+CREATE TABLE test_lowercase AS SELECT
+  lower(column1_utf8) as column1_utf8_lower,
+  lower(column1_large_utf8) as column1_large_utf8_lower,
+  lower(column1_utf8view) as column1_utf8view_lower
+FROM test;
+
+# Test INITCAP with utf8view, utf8, and largeutf8
+# Should not cast anything
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view_lower) as c1,
+  INITCAP(column1_utf8_lower) as c2,
+  INITCAP(column1_large_utf8_lower) as c3
+FROM test_lowercase;
+----
+logical_plan
+01)Projection: initcap(test_lowercase.column1_utf8view_lower) AS c1, initcap(test_lowercase.column1_utf8_lower) AS c2, initcap(test_lowercase.column1_large_utf8_lower) AS c3
+02)--TableScan: test_lowercase projection=[column1_utf8_lower, column1_large_utf8_lower, column1_utf8view_lower]
+
+query TTT
+SELECT
+  INITCAP(column1_utf8view_lower) as c1,
+  INITCAP(column1_utf8_lower) as c2,
+  INITCAP(column1_large_utf8_lower) as c3
+FROM test_lowercase;
+----
+Andrew Andrew Andrew    
+Xiangpeng Xiangpeng Xiangpeng
+Raphael Raphael Raphael
+NULL NULL NULL
+
 # Ensure string functions use native StringView implementation
 # and do not fall back to Utf8 or LargeUtf8
 # Should see no casts to Utf8 in the plans below
@@ -519,15 +563,143 @@ SELECT
 228 0 NULL
 
 ## Ensure no casts for BTRIM
+# Test BTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  BTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: btrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test BTRIM with Utf8View input and Utf8View pattern
 query TT
 EXPLAIN SELECT
   BTRIM(column1_utf8view, 'foo') AS l
 FROM test;
 ----
 logical_plan
-01)Projection: btrim(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS l
+01)Projection: btrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test BTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  BTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: btrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test BTRIM outputs
+query TTTT
+SELECT
+  BTRIM(column1_utf8view, 'foo') AS l1,
+  BTRIM(column1_utf8view, 'A') AS l2,
+  BTRIM(column1_utf8view) AS l3,
+  BTRIM(column1_utf8view, NULL) AS l4
+FROM test;
+----
+Andrew    ndrew     Andrew    NULL
+Xiangpeng Xiangpeng Xiangpeng NULL
+Raphael   Raphael   Raphael   NULL
+NULL      NULL      NULL      NULL
+
+## Ensure no casts for LTRIM
+# Test LTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM outputs
+query TTTTT
+SELECT
+  LTRIM(column1_utf8view, 'foo') AS l1,
+  LTRIM(column1_utf8view, column2_utf8view) AS l2,
+  LTRIM(column1_utf8view) AS l3,
+  LTRIM(column1_utf8view, NULL) AS l4,
+  LTRIM(column1_utf8view, 'Xiang') AS l5
+FROM test;
+----
+Andrew    Andrew    Andrew    NULL  Andrew
+Xiangpeng (empty)   Xiangpeng NULL  peng
+Raphael   aphael    Raphael   NULL  Raphael
+NULL      NULL      NULL      NULL  NULL
+
+## ensure no casts for RTRIM
+# Test RTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view) AS l
 02)--TableScan: test projection=[column1_utf8view]
 
+# Test RTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM outputs
+query TTTTT
+SELECT
+  RTRIM(column1_utf8view, 'foo') AS l1,
+  RTRIM(column1_utf8view, column2_utf8view) AS l2,
+  RTRIM(column1_utf8view) AS l3,
+  RTRIM(column1_utf8view, NULL) AS l4,
+  RTRIM(column1_utf8view, 'peng') As l5
+FROM test;
+----
+Andrew    Andrew    Andrew    NULL  Andrew
+Xiangpeng (empty)   Xiangpeng NULL  Xia
+Raphael   Raphael   Raphael   NULL  Raphael
+NULL      NULL      NULL      NULL  NULL
+
+
 ## Ensure no casts for CHARACTER_LENGTH
 query TT
 EXPLAIN SELECT
@@ -574,7 +746,6 @@ logical_plan
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for ENDS_WITH
-## TODO https://github.com/apache/datafusion/issues/11852
 query TT
 EXPLAIN SELECT
   ENDS_WITH(column1_utf8view, 'foo') as c1,
@@ -582,24 +753,10 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: ends_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, ends_with(__common_expr_1, __common_expr_1) AS c2
-02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-
-
-## Ensure no casts for INITCAP
-## TODO https://github.com/apache/datafusion/issues/11853
-query TT
-EXPLAIN SELECT
-  INITCAP(column1_utf8view) as c
-FROM test;
-----
-logical_plan
-01)Projection: initcap(CAST(test.column1_utf8view AS Utf8)) AS c
-02)--TableScan: test projection=[column1_utf8view]
+01)Projection: ends_with(test.column1_utf8view, Utf8View("foo")) AS c1, ends_with(test.column2_utf8view, test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for LEVENSHTEIN
-## TODO https://github.com/apache/datafusion/issues/11854
 query TT
 EXPLAIN SELECT
   levenshtein(column1_utf8view, 'foo') as c1,
@@ -607,9 +764,8 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: levenshtein(__common_expr_1, Utf8("foo")) AS c1, levenshtein(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+01)Projection: levenshtein(test.column1_utf8view, Utf8View("foo")) AS c1, levenshtein(test.column1_utf8view, test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for LOWER
 ## TODO https://github.com/apache/datafusion/issues/11855
@@ -622,16 +778,6 @@ logical_plan
 01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
-## Ensure no casts for LTRIM
-## TODO https://github.com/apache/datafusion/issues/11856
-query TT
-EXPLAIN SELECT
-  LTRIM(column1_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
-02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for LPAD
 query TT
@@ -662,14 +808,13 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for OCTET_LENGTH
-## TODO https://github.com/apache/datafusion/issues/11858
 query TT
 EXPLAIN SELECT
   OCTET_LENGTH(column1_utf8view) as c1
 FROM test;
 ----
 logical_plan
-01)Projection: octet_length(CAST(test.column1_utf8view AS Utf8)) AS c1
+01)Projection: octet_length(test.column1_utf8view) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for OVERLAY
@@ -749,30 +894,30 @@ logical_plan
 01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
-  RTRIM(column1_utf8view) as c1,
-  RTRIM(column1_utf8view, 'foo') as c2
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
-03)----TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for RIGHT
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   RIGHT(column1_utf8view, 3) as c2
 FROM test;
 ----
 logical_plan
-01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
+01)Projection: right(test.column1_utf8view, Int64(3)) AS c2
 02)--TableScan: test projection=[column1_utf8view]
 
+# Test outputs of RIGHT
+query TTT
+SELECT
+  RIGHT(column1_utf8view, 3) as c1,
+  RIGHT(column1_utf8view, 0) as c2,
+  RIGHT(column1_utf8view, -3) as c3
+FROM test;
+----
+rew  (empty) rew
+eng  (empty) ngpeng
+ael  (empty) hael
+NULL NULL    NULL
+
 ## Ensure no casts for RPAD
 ## TODO file ticket
 query TT
@@ -787,19 +932,6 @@ logical_plan
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
-  RTRIM(column1_utf8view) as c,
-  RTRIM(column1_utf8view, column2_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-
 ## Ensure no casts for SPLIT_PART
 ## TODO file ticket
 query TT
@@ -860,18 +992,24 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for FIND_IN_SET
-## TODO file ticket
 query TT
 EXPLAIN SELECT
   FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c
 FROM test;
 ----
 logical_plan
-01)Projection: find_in_set(CAST(test.column1_utf8view AS Utf8), Utf8("a,b,c,d")) AS c
+01)Projection: find_in_set(test.column1_utf8view, Utf8View("a,b,c,d")) AS c
 02)--TableScan: test projection=[column1_utf8view]
 
-
-
+query I
+SELECT
+  FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c
+FROM test;
+----
+0
+0
+0
+NULL
 
 statement ok
 drop table test;
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index caa612f556fe..f3ac6549ad06 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -72,6 +72,14 @@ select struct(a, b, c)['c1'] from values;
 2.2
 3.3
 
+# explicit invocation of get_field
+query R
+select get_field(struct(a, b, c), 'c1') from values;
+----
+1.1
+2.2
+3.3
+
 # struct scalar function #1
 query ?
 select struct(1, 3.14, 'e');
@@ -218,9 +226,6 @@ select named_struct('field_a', 1, 'field_b', 2);
 ----
 {field_a: 1, field_b: 2}
 
-statement ok
-drop table values;
-
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
@@ -236,3 +241,44 @@ query ?
 select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}};
 ----
 {animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}}
+
+# test tuple as struct
+query B
+select ('x', 'y') = ('x', 'y');
+----
+true
+
+query B
+select ('x', 'y') = ('y', 'x');
+----
+false
+
+query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Struct.*
+select ('x', 'y') = ('x', 'y', 'z');
+
+query B
+select ('x', 'y') IN (('x', 'y'));
+----
+true
+
+query B
+select ('x', 'y') IN (('x', 'y'), ('y', 'x'));
+----
+true
+
+query I
+select a from values where (a, c) = (1, 'a');
+----
+1
+
+query I
+select a from values where (a, c) IN ((1, 'a'), (2, 'b'));
+----
+1
+2
+
+statement ok
+drop table values;
+
+statement ok
+drop table struct_values;
diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt
index aa1e6826eca5..e420c0cc7155 100644
--- a/datafusion/sqllogictest/test_files/type_coercion.slt
+++ b/datafusion/sqllogictest/test_files/type_coercion.slt
@@ -49,3 +49,179 @@ select interval '1 month' - '2023-05-01'::date;
 # interval - timestamp
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
+
+
+####################################
+## Test type coercion with UNIONs ##
+####################################
+
+# Disable optimizer to test only the analyzer with type coercion
+statement ok
+set datafusion.optimizer.max_passes = 0;
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+# Create test table
+statement ok
+CREATE TABLE orders(
+    order_id INT UNSIGNED NOT NULL,
+    customer_id INT UNSIGNED NOT NULL,
+    o_item_id VARCHAR NOT NULL,
+    qty INT NOT NULL,
+    price DOUBLE NOT NULL,
+    delivered BOOLEAN NOT NULL
+);
+
+# union_different_num_columns_error() / UNION
+query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields
+SELECT order_id FROM orders UNION SELECT customer_id, o_item_id FROM orders
+
+# union_different_num_columns_error() / UNION ALL
+query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields
+SELECT order_id FROM orders UNION ALL SELECT customer_id, o_item_id FROM orders
+
+# union_with_different_column_names()
+query TT
+EXPLAIN SELECT order_id from orders UNION ALL SELECT customer_id FROM orders
+----
+logical_plan
+01)Union
+02)--Projection: orders.order_id
+03)----TableScan: orders
+04)--Projection: orders.customer_id AS order_id
+05)----TableScan: orders
+
+# union_values_with_no_alias()
+query TT
+EXPLAIN SELECT 1, 2 UNION ALL SELECT 3, 4
+----
+logical_plan
+01)Union
+02)--Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)
+03)----EmptyRelation
+04)--Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)
+05)----EmptyRelation
+
+# union_with_incompatible_data_type()
+query error Incompatible inputs for Union: Previous inputs were of type Interval\(MonthDayNano\), but got incompatible type Int64 on column 'Int64\(1\)'
+SELECT interval '1 year 1 day' UNION ALL SELECT 1
+
+# union_with_different_decimal_data_types()
+query TT
+EXPLAIN SELECT 1 a UNION ALL SELECT 1.1 a
+----
+logical_plan
+01)Union
+02)--Projection: CAST(Int64(1) AS Float64) AS a
+03)----EmptyRelation
+04)--Projection: Float64(1.1) AS a
+05)----EmptyRelation
+
+# union_with_null()
+query TT
+EXPLAIN SELECT NULL a UNION ALL SELECT 1.1 a
+----
+logical_plan
+01)Union
+02)--Projection: CAST(NULL AS Float64) AS a
+03)----EmptyRelation
+04)--Projection: Float64(1.1) AS a
+05)----EmptyRelation
+
+# union_with_float_and_string()
+query TT
+EXPLAIN SELECT 'a' a UNION ALL SELECT 1.1 a
+----
+logical_plan
+01)Union
+02)--Projection: Utf8("a") AS a
+03)----EmptyRelation
+04)--Projection: CAST(Float64(1.1) AS Utf8) AS a
+05)----EmptyRelation
+
+# union_with_multiply_cols()
+query TT
+EXPLAIN SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b
+----
+logical_plan
+01)Union
+02)--Projection: Utf8("a") AS a, CAST(Int64(1) AS Float64) AS b
+03)----EmptyRelation
+04)--Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b
+05)----EmptyRelation
+
+# sorted_union_with_different_types_and_group_by()
+query TT
+EXPLAIN SELECT a FROM (select 1 a) x GROUP BY 1
+    UNION ALL
+(SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1
+----
+logical_plan
+01)Sort: x.a ASC NULLS LAST
+02)--Union
+03)----Projection: CAST(x.a AS Float64) AS a
+04)------Aggregate: groupBy=[[x.a]], aggr=[[]]
+05)--------SubqueryAlias: x
+06)----------Projection: Int64(1) AS a
+07)------------EmptyRelation
+08)----Projection: x.a
+09)------Aggregate: groupBy=[[x.a]], aggr=[[]]
+10)--------SubqueryAlias: x
+11)----------Projection: Float64(1.1) AS a
+12)------------EmptyRelation
+
+# union_with_binary_expr_and_cast()
+query TT
+EXPLAIN SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1
+    UNION ALL
+(SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)
+----
+logical_plan
+01)Union
+02)--Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a
+03)----Aggregate: groupBy=[[CAST(Float64(0) + CAST(x.a AS Float64) AS Int32)]], aggr=[[]]
+04)------SubqueryAlias: x
+05)--------Projection: Int64(1) AS a
+06)----------EmptyRelation
+07)--Projection: Float64(2.1) + x.a AS Float64(0) + x.a
+08)----Aggregate: groupBy=[[Float64(2.1) + CAST(x.a AS Float64)]], aggr=[[]]
+09)------SubqueryAlias: x
+10)--------Projection: Int64(1) AS a
+11)----------EmptyRelation
+
+# union_with_aliases()
+query TT
+EXPLAIN SELECT a as a1 FROM (select 1 a) x GROUP BY 1
+    UNION ALL
+(SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1)
+----
+logical_plan
+01)Union
+02)--Projection: CAST(x.a AS Float64) AS a1
+03)----Aggregate: groupBy=[[x.a]], aggr=[[]]
+04)------SubqueryAlias: x
+05)--------Projection: Int64(1) AS a
+06)----------EmptyRelation
+07)--Projection: x.a AS a1
+08)----Aggregate: groupBy=[[x.a]], aggr=[[]]
+09)------SubqueryAlias: x
+10)--------Projection: Float64(1.1) AS a
+11)----------EmptyRelation
+
+# union_with_incompatible_data_types()
+query error Incompatible inputs for Union: Previous inputs were of type Utf8, but got incompatible type Boolean on column 'a'
+SELECT 'a' a UNION ALL SELECT true a
+
+statement ok
+SET datafusion.optimizer.max_passes = 3;
+
+statement ok
+SET datafusion.explain.logical_plan_only = false;
+
+statement ok
+DROP TABLE orders;
+
+########################################
+## Test type coercion with UNIONs end ##
+########################################
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 476ebe7ebebe..288f99d82c10 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -637,8 +637,54 @@ SELECT t1.v2, t1.v0 FROM t2 NATURAL JOIN t1
 SELECT t1.v2, t1.v0 FROM t2 NATURAL JOIN t1 WHERE (t1.v2 IS NULL);
 ----
 
+statement ok
+CREATE TABLE t3 (
+  id INT
+) as VALUES
+  (1),
+  (2),
+  (3)
+;
+
+statement ok
+CREATE TABLE t4 (
+  id TEXT
+) as VALUES
+  ('4'),
+  ('5'),
+  ('6')
+;
+
+# test type coersion for wildcard expansion
+query T rowsort
+(SELECT * FROM t3 ) UNION ALL (SELECT * FROM t4)
+----
+1
+2
+3
+4
+5
+6
+
 statement ok
 DROP TABLE t1;
 
 statement ok
 DROP TABLE t2;
+
+statement ok
+DROP TABLE t3;
+
+statement ok
+DROP TABLE t4;
+
+# Test issue: https://github.com/apache/datafusion/issues/11742
+query R rowsort
+WITH 
+  tt(v1) AS (VALUES (1::INT),(NULL::INT)) 
+SELECT NVL(v1, 0.5) FROM tt
+  UNION ALL
+SELECT NULL WHERE FALSE;
+----
+0.5
+1
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 4957011b8ba2..afa576d12746 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -235,7 +235,7 @@ NULL 10 NULL
 NULL NULL 17
 NULL NULL 18
 
-query IIII
+query IIIT
 select 
     unnest(column1), unnest(column2) + 2, 
     column3 * 10, unnest(array_remove(column1, '4')) 
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index dfc882667617..ddf6a7aabffc 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3929,7 +3929,8 @@ b 1 3
 a 1 4
 b 5 5
 
-statement error DataFusion error: Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c1 could not be resolved from available columns: rn
+# Schema error: No field named aggregate_test_100.c1. Valid fields are rn.
+statement error
 SELECT *
 FROM (SELECT c1, c2, ROW_NUMBER() OVER(PARTITION BY c1) as rn
     FROM aggregate_test_100
diff --git a/dev/release/README.md b/dev/release/README.md
index 1817b3002578..397369a41aa3 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -256,20 +256,7 @@ to all of the DataFusion crates.
 Download and unpack the official release tarball
 
 Verify that the Cargo.toml in the tarball contains the correct version
-(e.g. `version = "38.0.0"`) and then publish the crates by running the script `release-crates.sh`
-in a directory extracted from the source tarball that was voted on. Note that this script doesn't
-work if run in a Git repo.
-
-Alternatively the crates can be published one at a time with the following commands. Crates need to be
-published in the correct order as shown in this diagram.
-
-![](crate-deps.svg)
-
-_To update this diagram, manually edit the dependencies in [crate-deps.dot](crate-deps.dot) and then run:_
-
-```shell
-dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
-```
+(e.g. `version = "38.0.0"`) and then publish the crates by running the following commands
 
 ```shell
 (cd datafusion/common && cargo publish)
@@ -283,7 +270,9 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
 (cd datafusion/sql && cargo publish)
 (cd datafusion/optimizer && cargo publish)
 (cd datafusion/common-runtime && cargo publish)
+(cd datafusion/catalog && cargo publish)
 (cd datafusion/physical-plan && cargo publish)
+(cd datafusion/physical-optimizer && cargo publish)
 (cd datafusion/core && cargo publish)
 (cd datafusion/proto-common && cargo publish)
 (cd datafusion/proto && cargo publish)
diff --git a/dev/release/crate-deps.dot b/dev/release/crate-deps.dot
deleted file mode 100644
index 1d903a56021d..000000000000
--- a/dev/release/crate-deps.dot
+++ /dev/null
@@ -1,91 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-digraph G {
-        datafusion_examples
-        datafusion_examples -> datafusion
-        datafusion_examples -> datafusion_common
-        datafusion_examples -> datafusion_expr
-        datafusion_examples -> datafusion_optimizer
-        datafusion_examples -> datafusion_physical_expr
-        datafusion_examples -> datafusion_sql
-        datafusion_expr
-        datafusion_expr -> datafusion_common
-        datafusion_functions
-        datafusion_functions -> datafusion_common
-        datafusion_functions -> datafusion_execution
-        datafusion_functions -> datafusion_expr
-        datafusion_wasmtest
-        datafusion_wasmtest -> datafusion
-        datafusion_wasmtest -> datafusion_common
-        datafusion_wasmtest -> datafusion_execution
-        datafusion_wasmtest -> datafusion_expr
-        datafusion_wasmtest -> datafusion_optimizer
-        datafusion_wasmtest -> datafusion_physical_expr
-        datafusion_wasmtest -> datafusion_physical_plan
-        datafusion_wasmtest -> datafusion_sql
-        datafusion_common
-        datafusion_sql
-        datafusion_sql -> datafusion_common
-        datafusion_sql -> datafusion_expr
-        datafusion_physical_plan
-        datafusion_physical_plan -> datafusion_common
-        datafusion_physical_plan -> datafusion_execution
-        datafusion_physical_plan -> datafusion_expr
-        datafusion_physical_plan -> datafusion_physical_expr
-        datafusion_benchmarks
-        datafusion_benchmarks -> datafusion
-        datafusion_benchmarks -> datafusion_common
-        datafusion_benchmarks -> datafusion_proto
-        datafusion_docs_tests
-        datafusion_docs_tests -> datafusion
-        datafusion_optimizer
-        datafusion_optimizer -> datafusion_common
-        datafusion_optimizer -> datafusion_expr
-        datafusion_optimizer -> datafusion_physical_expr
-        datafusion_optimizer -> datafusion_sql
-        datafusion_proto
-        datafusion_proto -> datafusion
-        datafusion_proto -> datafusion_common
-        datafusion_proto -> datafusion_expr
-        datafusion_physical_expr
-        datafusion_physical_expr -> datafusion_common
-        datafusion_physical_expr -> datafusion_execution
-        datafusion_physical_expr -> datafusion_expr
-        datafusion_sqllogictest
-        datafusion_sqllogictest -> datafusion
-        datafusion_sqllogictest -> datafusion_common
-        datafusion
-        datafusion -> datafusion_common
-        datafusion -> datafusion_execution
-        datafusion -> datafusion_expr
-        datafusion -> datafusion_functions
-        datafusion -> datafusion_functions_nested
-        datafusion -> datafusion_optimizer
-        datafusion -> datafusion_physical_expr
-        datafusion -> datafusion_physical_plan
-        datafusion -> datafusion_sql
-        datafusion_functions_nested
-        datafusion_functions_nested -> datafusion_common
-        datafusion_functions_nested -> datafusion_execution
-        datafusion_functions_nested -> datafusion_expr
-        datafusion_execution
-        datafusion_execution -> datafusion_common
-        datafusion_execution -> datafusion_expr
-        datafusion_substrait
-        datafusion_substrait -> datafusion
-}
\ No newline at end of file
diff --git a/dev/release/crate-deps.svg b/dev/release/crate-deps.svg
deleted file mode 100644
index c76fe3abb4ac..000000000000
--- a/dev/release/crate-deps.svg
+++ /dev/null
@@ -1,445 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
- "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 2.43.0 (0)
- -->
-<!-- Title: G Pages: 1 -->
-<svg width="1695pt" height="548pt"
- viewBox="0.00 0.00 1695.02 548.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 544)">
-<title>G</title>
-<polygon fill="white" stroke="transparent" points="-4,4 -4,-544 1691.02,-544 1691.02,4 -4,4"/>
-<!-- datafusion_examples -->
-<g id="node1" class="node">
-<title>datafusion_examples</title>
-<ellipse fill="none" stroke="black" cx="144.38" cy="-450" rx="107.78" ry="18"/>
-<text text-anchor="middle" x="144.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_examples</text>
-</g>
-<!-- datafusion -->
-<g id="node2" class="node">
-<title>datafusion</title>
-<ellipse fill="none" stroke="black" cx="974.38" cy="-378" rx="59.59" ry="18"/>
-<text text-anchor="middle" x="974.38" y="-374.3" font-family="Times,serif" font-size="14.00">datafusion</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion -->
-<g id="edge1" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M238.38,-441.07C407.7,-426.79 759.98,-397.08 907.32,-384.66"/>
-<polygon fill="black" stroke="black" points="907.9,-388.12 917.57,-383.79 907.31,-381.14 907.9,-388.12"/>
-</g>
-<!-- datafusion_common -->
-<g id="node3" class="node">
-<title>datafusion_common</title>
-<ellipse fill="none" stroke="black" cx="817.38" cy="-18" rx="102.88" ry="18"/>
-<text text-anchor="middle" x="817.38" y="-14.3" font-family="Times,serif" font-size="14.00">datafusion_common</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_common -->
-<g id="edge2" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M120.33,-432.33C89.06,-408.49 38.38,-361.45 38.38,-307 38.38,-307 38.38,-307 38.38,-161 38.38,-86.89 109.11,-95.52 179.38,-72 274.07,-40.31 550.14,-26.99 706.33,-21.87"/>
-<polygon fill="black" stroke="black" points="706.56,-25.37 716.44,-21.55 706.33,-18.37 706.56,-25.37"/>
-</g>
-<!-- datafusion_expr -->
-<g id="node4" class="node">
-<title>datafusion_expr</title>
-<ellipse fill="none" stroke="black" cx="636.38" cy="-90" rx="85.29" ry="18"/>
-<text text-anchor="middle" x="636.38" y="-86.3" font-family="Times,serif" font-size="14.00">datafusion_expr</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_expr -->
-<g id="edge3" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M132.74,-431.83C106.54,-390.84 48.73,-285.5 95.38,-216 145.15,-141.87 406.15,-109.57 546.64,-97.38"/>
-<polygon fill="black" stroke="black" points="547.29,-100.84 556.96,-96.5 546.69,-93.86 547.29,-100.84"/>
-</g>
-<!-- datafusion_optimizer -->
-<g id="node5" class="node">
-<title>datafusion_optimizer</title>
-<ellipse fill="none" stroke="black" cx="327.38" cy="-306" rx="108.58" ry="18"/>
-<text text-anchor="middle" x="327.38" y="-302.3" font-family="Times,serif" font-size="14.00">datafusion_optimizer</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_optimizer -->
-<g id="edge4" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M166.09,-432.15C198.84,-406.75 260.94,-358.56 297.79,-329.96"/>
-<polygon fill="black" stroke="black" points="299.95,-332.72 305.7,-323.82 295.65,-327.19 299.95,-332.72"/>
-</g>
-<!-- datafusion_physical_expr -->
-<g id="node6" class="node">
-<title>datafusion_physical_expr</title>
-<ellipse fill="none" stroke="black" cx="480.38" cy="-234" rx="127.28" ry="18"/>
-<text text-anchor="middle" x="480.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_physical_expr</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_physical_expr -->
-<g id="edge5" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M214.78,-436.27C282.15,-420.68 382.57,-388.01 445.38,-324 461.94,-307.12 470.91,-281.48 475.61,-262.05"/>
-<polygon fill="black" stroke="black" points="479.03,-262.78 477.75,-252.26 472.19,-261.28 479.03,-262.78"/>
-</g>
-<!-- datafusion_sql -->
-<g id="node7" class="node">
-<title>datafusion_sql</title>
-<ellipse fill="none" stroke="black" cx="181.38" cy="-234" rx="77.19" ry="18"/>
-<text text-anchor="middle" x="181.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_sql</text>
-</g>
-<!-- datafusion_examples&#45;&gt;datafusion_sql -->
-<g id="edge6" class="edge">
-<title>datafusion_examples&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M139.73,-431.93C132.4,-401.83 120.71,-337.82 138.38,-288 142.11,-277.51 148.89,-267.58 155.98,-259.21"/>
-<polygon fill="black" stroke="black" points="158.67,-261.46 162.79,-251.7 153.48,-256.75 158.67,-261.46"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_common -->
-<g id="edge41" class="edge">
-<title>datafusion&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1032.05,-373.07C1164.93,-362.45 1481.38,-327.44 1481.38,-235 1481.38,-235 1481.38,-235 1481.38,-161 1481.38,-49.59 1119.38,-25.28 929.96,-20.19"/>
-<polygon fill="black" stroke="black" points="929.76,-16.69 919.68,-19.93 929.59,-23.69 929.76,-16.69"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_expr -->
-<g id="edge43" class="edge">
-<title>datafusion&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M955.24,-360.8C898.46,-312.76 730.77,-170.86 664.13,-114.48"/>
-<polygon fill="black" stroke="black" points="666.05,-111.52 656.16,-107.73 661.53,-116.86 666.05,-111.52"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_optimizer -->
-<g id="edge46" class="edge">
-<title>datafusion&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M919.22,-371.03C809.73,-359.19 563.67,-332.56 425.78,-317.65"/>
-<polygon fill="black" stroke="black" points="425.96,-314.14 415.64,-316.55 425.2,-321.1 425.96,-314.14"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_physical_expr -->
-<g id="edge47" class="edge">
-<title>datafusion&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M914.47,-377.77C837.99,-376.73 702.92,-368 597.38,-324 561.14,-308.89 526.2,-279.79 504.09,-258.98"/>
-<polygon fill="black" stroke="black" points="506.46,-256.41 496.82,-252 501.61,-261.45 506.46,-256.41"/>
-</g>
-<!-- datafusion&#45;&gt;datafusion_sql -->
-<g id="edge49" class="edge">
-<title>datafusion&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M914.48,-376.67C743.89,-375.21 264.27,-367.27 209.38,-324 190.86,-309.4 184.35,-282.61 182.18,-262.23"/>
-<polygon fill="black" stroke="black" points="185.67,-261.95 181.4,-252.25 178.69,-262.49 185.67,-261.95"/>
-</g>
-<!-- datafusion_functions -->
-<g id="node8" class="node">
-<title>datafusion_functions</title>
-<ellipse fill="none" stroke="black" cx="1019.38" cy="-234" rx="106.68" ry="18"/>
-<text text-anchor="middle" x="1019.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_functions -->
-<g id="edge44" class="edge">
-<title>datafusion&#45;&gt;datafusion_functions</title>
-<path fill="none" stroke="black" d="M979.81,-359.87C987.51,-335.56 1001.69,-290.82 1010.82,-262.01"/>
-<polygon fill="black" stroke="black" points="1014.25,-262.78 1013.94,-252.19 1007.58,-260.66 1014.25,-262.78"/>
-</g>
-<!-- datafusion_execution -->
-<g id="node9" class="node">
-<title>datafusion_execution</title>
-<ellipse fill="none" stroke="black" cx="886.38" cy="-162" rx="108.58" ry="18"/>
-<text text-anchor="middle" x="886.38" y="-158.3" font-family="Times,serif" font-size="14.00">datafusion_execution</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_execution -->
-<g id="edge42" class="edge">
-<title>datafusion&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M969.67,-359.99C964.02,-341.35 953.41,-311.04 938.38,-288 926.19,-269.31 913.69,-271.79 903.38,-252 893.36,-232.75 889.21,-208.41 887.5,-190.05"/>
-<polygon fill="black" stroke="black" points="890.99,-189.73 886.76,-180.02 884.01,-190.25 890.99,-189.73"/>
-</g>
-<!-- datafusion_physical_plan -->
-<g id="node11" class="node">
-<title>datafusion_physical_plan</title>
-<ellipse fill="none" stroke="black" cx="732.38" cy="-306" rx="126.18" ry="18"/>
-<text text-anchor="middle" x="732.38" y="-302.3" font-family="Times,serif" font-size="14.00">datafusion_physical_plan</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_physical_plan -->
-<g id="edge48" class="edge">
-<title>datafusion&#45;&gt;datafusion_physical_plan</title>
-<path fill="none" stroke="black" d="M932.78,-364.97C895.04,-354.05 838.86,-337.8 795.58,-325.28"/>
-<polygon fill="black" stroke="black" points="796.29,-321.84 785.71,-322.43 794.34,-328.57 796.29,-321.84"/>
-</g>
-<!-- datafusion_functions_nested -->
-<g id="node16" class="node">
-<title>datafusion_functions_nested</title>
-<ellipse fill="none" stroke="black" cx="1279.38" cy="-234" rx="135.68" ry="18"/>
-<text text-anchor="middle" x="1279.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions_nested</text>
-</g>
-<!-- datafusion&#45;&gt;datafusion_functions_nested -->
-<g id="edge45" class="edge">
-<title>datafusion&#45;&gt;datafusion_functions_nested</title>
-<path fill="none" stroke="black" d="M1005.59,-362.47C1059.89,-337.19 1171.92,-285.03 1234.54,-255.88"/>
-<polygon fill="black" stroke="black" points="1236.19,-258.97 1243.78,-251.58 1233.23,-252.63 1236.19,-258.97"/>
-</g>
-<!-- datafusion_expr&#45;&gt;datafusion_common -->
-<g id="edge7" class="edge">
-<title>datafusion_expr&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M675.66,-73.81C702.56,-63.41 738.47,-49.52 767.41,-38.33"/>
-<polygon fill="black" stroke="black" points="768.87,-41.52 776.93,-34.64 766.34,-34.99 768.87,-41.52"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_common -->
-<g id="edge29" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M323.59,-287.75C320.33,-269.42 317.31,-239.86 325.38,-216 338.29,-177.85 349.94,-170.36 380.38,-144 472.7,-64.04 613.13,-35.08 709.74,-24.67"/>
-<polygon fill="black" stroke="black" points="710.24,-28.14 719.83,-23.63 709.52,-21.17 710.24,-28.14"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_expr -->
-<g id="edge30" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M326.16,-287.98C325.67,-268.49 327.82,-236.59 344.38,-216 397.49,-150 492,-118.2 559.1,-103.32"/>
-<polygon fill="black" stroke="black" points="560.17,-106.67 569.21,-101.16 558.7,-99.83 560.17,-106.67"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_physical_expr -->
-<g id="edge31" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M362.48,-288.94C384.12,-279.04 412.07,-266.25 435.33,-255.61"/>
-<polygon fill="black" stroke="black" points="437.01,-258.69 444.65,-251.35 434.1,-252.33 437.01,-258.69"/>
-</g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_sql -->
-<g id="edge32" class="edge">
-<title>datafusion_optimizer&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M293.52,-288.76C272.44,-278.66 245.18,-265.59 222.81,-254.86"/>
-<polygon fill="black" stroke="black" points="224.09,-251.59 213.56,-250.43 221.06,-257.91 224.09,-251.59"/>
-</g>
-<!-- datafusion_physical_expr&#45;&gt;datafusion_common -->
-<g id="edge36" class="edge">
-<title>datafusion_physical_expr&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M481.69,-215.81C485.19,-182.52 497.82,-109.29 542.38,-72 568.76,-49.93 649.98,-36.06 717.37,-28.08"/>
-<polygon fill="black" stroke="black" points="718.1,-31.52 727.63,-26.89 717.29,-24.56 718.1,-31.52"/>
-</g>
-<!-- datafusion_physical_expr&#45;&gt;datafusion_expr -->
-<g id="edge38" class="edge">
-<title>datafusion_physical_expr&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M498.89,-216.15C526.57,-190.96 578.85,-143.37 610.36,-114.68"/>
-<polygon fill="black" stroke="black" points="612.86,-117.14 617.9,-107.82 608.15,-111.97 612.86,-117.14"/>
-</g>
-<!-- datafusion_physical_expr&#45;&gt;datafusion_execution -->
-<g id="edge37" class="edge">
-<title>datafusion_physical_expr&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M557.7,-219.67C628.24,-207.51 732.15,-189.59 804.18,-177.17"/>
-<polygon fill="black" stroke="black" points="804.86,-180.61 814.12,-175.46 803.67,-173.71 804.86,-180.61"/>
-</g>
-<!-- datafusion_sql&#45;&gt;datafusion_common -->
-<g id="edge19" class="edge">
-<title>datafusion_sql&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M177.9,-215.85C172.25,-182.65 165.44,-109.55 205.38,-72 240.96,-38.55 538.93,-25.83 705.59,-21.31"/>
-<polygon fill="black" stroke="black" points="705.98,-24.8 715.89,-21.03 705.8,-17.8 705.98,-24.8"/>
-</g>
-<!-- datafusion_sql&#45;&gt;datafusion_expr -->
-<g id="edge20" class="edge">
-<title>datafusion_sql&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M210.26,-217.08C228.16,-206.95 251.47,-193.27 271.38,-180 293.57,-165.21 294.87,-154.5 319.38,-144 390.71,-113.46 478.09,-100.46 543.03,-94.95"/>
-<polygon fill="black" stroke="black" points="543.56,-98.42 553.25,-94.13 543,-91.44 543.56,-98.42"/>
-</g>
-<!-- datafusion_functions&#45;&gt;datafusion_common -->
-<g id="edge8" class="edge">
-<title>datafusion_functions&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1026.08,-216.03C1032.36,-197.16 1039.08,-166.38 1026.38,-144 995,-88.66 928.31,-55.19 878.65,-37"/>
-<polygon fill="black" stroke="black" points="879.56,-33.61 868.96,-33.56 877.22,-40.21 879.56,-33.61"/>
-</g>
-<!-- datafusion_functions&#45;&gt;datafusion_expr -->
-<g id="edge10" class="edge">
-<title>datafusion_functions&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M1021.9,-215.92C1023.8,-195.75 1023.44,-162.61 1004.38,-144 966.64,-107.14 827.05,-95.75 731.07,-92.32"/>
-<polygon fill="black" stroke="black" points="731.16,-88.82 721.05,-91.99 730.93,-95.82 731.16,-88.82"/>
-</g>
-<!-- datafusion_functions&#45;&gt;datafusion_execution -->
-<g id="edge9" class="edge">
-<title>datafusion_functions&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M988.54,-216.76C970.04,-207.03 946.31,-194.54 926.38,-184.05"/>
-<polygon fill="black" stroke="black" points="928,-180.95 917.52,-179.39 924.74,-187.14 928,-180.95"/>
-</g>
-<!-- datafusion_execution&#45;&gt;datafusion_common -->
-<g id="edge53" class="edge">
-<title>datafusion_execution&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M878.06,-143.87C866.15,-119.35 844.12,-74.03 830.14,-45.26"/>
-<polygon fill="black" stroke="black" points="833.25,-43.65 825.74,-36.19 826.96,-46.71 833.25,-43.65"/>
-</g>
-<!-- datafusion_execution&#45;&gt;datafusion_expr -->
-<g id="edge54" class="edge">
-<title>datafusion_execution&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M833.67,-146.24C793.22,-134.91 737.38,-119.28 695.43,-107.53"/>
-<polygon fill="black" stroke="black" points="696.2,-104.11 685.63,-104.79 694.31,-110.85 696.2,-104.11"/>
-</g>
-<!-- datafusion_wasmtest -->
-<g id="node10" class="node">
-<title>datafusion_wasmtest</title>
-<ellipse fill="none" stroke="black" cx="540.38" cy="-450" rx="108.58" ry="18"/>
-<text text-anchor="middle" x="540.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_wasmtest</text>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion -->
-<g id="edge11" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M619.01,-437.59C720.75,-422.79 886.69,-398.58 900.38,-396 906.59,-394.83 913.06,-393.51 919.49,-392.12"/>
-<polygon fill="black" stroke="black" points="920.49,-395.49 929.5,-389.91 918.98,-388.65 920.49,-395.49"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_common -->
-<g id="edge12" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M617.4,-437.24C630.72,-435.37 644.44,-433.54 657.38,-432 753.48,-420.54 1519.38,-403.78 1519.38,-307 1519.38,-307 1519.38,-307 1519.38,-161 1519.38,-102.16 1475.29,-95.58 1421.38,-72 1335.95,-34.62 1079.45,-23.6 929.6,-20.35"/>
-<polygon fill="black" stroke="black" points="929.52,-16.85 919.45,-20.14 929.37,-23.84 929.52,-16.85"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_expr -->
-<g id="edge14" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M545.58,-431.76C554.76,-401.95 575.01,-339.06 597.38,-288 604.64,-271.43 610.96,-269.26 616.38,-252 630.65,-206.62 634.8,-151.03 635.97,-118.47"/>
-<polygon fill="black" stroke="black" points="639.47,-118.45 636.26,-108.35 632.47,-118.25 639.47,-118.45"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_optimizer -->
-<g id="edge15" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M515.53,-432.43C477.24,-406.9 403.74,-357.91 360.82,-329.29"/>
-<polygon fill="black" stroke="black" points="362.67,-326.32 352.41,-323.68 358.78,-332.14 362.67,-326.32"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_physical_expr -->
-<g id="edge16" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M535.57,-431.85C525.17,-394.75 500.52,-306.81 487.98,-262.1"/>
-<polygon fill="black" stroke="black" points="491.28,-260.92 485.22,-252.23 484.54,-262.81 491.28,-260.92"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_sql -->
-<g id="edge18" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M439.39,-443.4C356.27,-432.89 242.93,-403.72 184.38,-324 171.49,-306.44 171.88,-281.25 174.84,-262.15"/>
-<polygon fill="black" stroke="black" points="178.29,-262.72 176.68,-252.25 171.41,-261.44 178.29,-262.72"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_execution -->
-<g id="edge13" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M598.04,-434.64C682.46,-412.41 833.41,-367.64 867.38,-324 883.14,-303.76 886.09,-230.71 886.48,-190.32"/>
-<polygon fill="black" stroke="black" points="889.99,-190.12 886.54,-180.1 882.99,-190.09 889.99,-190.12"/>
-</g>
-<!-- datafusion_wasmtest&#45;&gt;datafusion_physical_plan -->
-<g id="edge17" class="edge">
-<title>datafusion_wasmtest&#45;&gt;datafusion_physical_plan</title>
-<path fill="none" stroke="black" d="M563.16,-432.15C597.52,-406.75 662.67,-358.56 701.34,-329.96"/>
-<polygon fill="black" stroke="black" points="703.68,-332.59 709.63,-323.82 699.51,-326.96 703.68,-332.59"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_common -->
-<g id="edge21" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M735.32,-287.86C740.6,-258.18 752.64,-195.49 768.38,-144 779.04,-109.17 794.94,-70.29 805.72,-45.28"/>
-<polygon fill="black" stroke="black" points="808.93,-46.66 809.71,-36.09 802.51,-43.87 808.93,-46.66"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_expr -->
-<g id="edge23" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M724.69,-287.85C707.94,-250.5 668.07,-161.63 648.14,-117.21"/>
-<polygon fill="black" stroke="black" points="651.27,-115.63 643.98,-107.94 644.88,-118.49 651.27,-115.63"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_physical_expr -->
-<g id="edge24" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M677.39,-289.72C638.59,-278.95 586.46,-264.46 545.56,-253.11"/>
-<polygon fill="black" stroke="black" points="546.26,-249.67 535.69,-250.36 544.39,-256.41 546.26,-249.67"/>
-</g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_execution -->
-<g id="edge22" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M741.25,-287.92C751.89,-268.66 771.21,-237.26 794.38,-216 808.12,-203.39 825.36,-192.48 841.2,-183.88"/>
-<polygon fill="black" stroke="black" points="843.1,-186.84 850.32,-179.09 839.84,-180.64 843.1,-186.84"/>
-</g>
-<!-- datafusion_benchmarks -->
-<g id="node12" class="node">
-<title>datafusion_benchmarks</title>
-<ellipse fill="none" stroke="black" cx="1148.38" cy="-522" rx="120.48" ry="18"/>
-<text text-anchor="middle" x="1148.38" y="-518.3" font-family="Times,serif" font-size="14.00">datafusion_benchmarks</text>
-</g>
-<!-- datafusion_benchmarks&#45;&gt;datafusion -->
-<g id="edge25" class="edge">
-<title>datafusion_benchmarks&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M1149.44,-503.72C1149.68,-483.98 1147,-451.85 1129.38,-432 1106.81,-406.57 1071.52,-393.33 1040.3,-386.44"/>
-<polygon fill="black" stroke="black" points="1040.83,-382.98 1030.34,-384.42 1039.44,-389.84 1040.83,-382.98"/>
-</g>
-<!-- datafusion_benchmarks&#45;&gt;datafusion_common -->
-<g id="edge26" class="edge">
-<title>datafusion_benchmarks&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1029.67,-519.04C749.26,-513.92 66.41,-498.4 27.38,-468 -5.23,-442.6 0.38,-420.34 0.38,-379 0.38,-379 0.38,-379 0.38,-161 0.38,-113.54 18.38,-95.89 59.38,-72 113.79,-40.3 508.68,-26.4 705.91,-21.39"/>
-<polygon fill="black" stroke="black" points="706.1,-24.88 716.01,-21.13 705.93,-17.89 706.1,-24.88"/>
-</g>
-<!-- datafusion_proto -->
-<g id="node13" class="node">
-<title>datafusion_proto</title>
-<ellipse fill="none" stroke="black" cx="1292.38" cy="-450" rx="89.08" ry="18"/>
-<text text-anchor="middle" x="1292.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_proto</text>
-</g>
-<!-- datafusion_benchmarks&#45;&gt;datafusion_proto -->
-<g id="edge27" class="edge">
-<title>datafusion_benchmarks&#45;&gt;datafusion_proto</title>
-<path fill="none" stroke="black" d="M1182.15,-504.59C1202.66,-494.62 1229,-481.81 1250.79,-471.22"/>
-<polygon fill="black" stroke="black" points="1252.36,-474.35 1259.82,-466.83 1249.3,-468.05 1252.36,-474.35"/>
-</g>
-<!-- datafusion_proto&#45;&gt;datafusion -->
-<g id="edge33" class="edge">
-<title>datafusion_proto&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M1234.43,-436.24C1176.63,-423.52 1088.42,-404.1 1031.07,-391.48"/>
-<polygon fill="black" stroke="black" points="1031.61,-388.01 1021.09,-389.28 1030.1,-394.85 1031.61,-388.01"/>
-</g>
-<!-- datafusion_proto&#45;&gt;datafusion_common -->
-<g id="edge34" class="edge">
-<title>datafusion_proto&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1368.68,-440.64C1446.96,-427.32 1557.38,-393.25 1557.38,-307 1557.38,-307 1557.38,-307 1557.38,-161 1557.38,-106.3 1521.76,-95.52 1472.38,-72 1379.47,-27.74 1092.21,-19.54 930.67,-18.56"/>
-<polygon fill="black" stroke="black" points="930.23,-15.06 920.21,-18.5 930.2,-22.06 930.23,-15.06"/>
-</g>
-<!-- datafusion_proto&#45;&gt;datafusion_expr -->
-<g id="edge35" class="edge">
-<title>datafusion_proto&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M1329.71,-433.53C1374.42,-412.22 1443.38,-369.26 1443.38,-307 1443.38,-307 1443.38,-307 1443.38,-233 1443.38,-160.79 935.69,-113.79 725.68,-97.44"/>
-<polygon fill="black" stroke="black" points="725.84,-93.95 715.6,-96.67 725.3,-100.93 725.84,-93.95"/>
-</g>
-<!-- datafusion_docs_tests -->
-<g id="node14" class="node">
-<title>datafusion_docs_tests</title>
-<ellipse fill="none" stroke="black" cx="1008.38" cy="-450" rx="112.38" ry="18"/>
-<text text-anchor="middle" x="1008.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_docs_tests</text>
-</g>
-<!-- datafusion_docs_tests&#45;&gt;datafusion -->
-<g id="edge28" class="edge">
-<title>datafusion_docs_tests&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M999.98,-431.7C996.11,-423.73 991.43,-414.1 987.14,-405.26"/>
-<polygon fill="black" stroke="black" points="990.21,-403.57 982.69,-396.1 983.91,-406.63 990.21,-403.57"/>
-</g>
-<!-- datafusion_sqllogictest -->
-<g id="node15" class="node">
-<title>datafusion_sqllogictest</title>
-<ellipse fill="none" stroke="black" cx="1569.38" cy="-450" rx="117.78" ry="18"/>
-<text text-anchor="middle" x="1569.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_sqllogictest</text>
-</g>
-<!-- datafusion_sqllogictest&#45;&gt;datafusion -->
-<g id="edge39" class="edge">
-<title>datafusion_sqllogictest&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M1479.34,-438.41C1358.17,-424.15 1145.58,-399.14 1039.06,-386.61"/>
-<polygon fill="black" stroke="black" points="1039.43,-383.13 1029.09,-385.44 1038.61,-390.08 1039.43,-383.13"/>
-</g>
-<!-- datafusion_sqllogictest&#45;&gt;datafusion_common -->
-<g id="edge40" class="edge">
-<title>datafusion_sqllogictest&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1574.65,-431.7C1582.24,-405.14 1595.38,-352.61 1595.38,-307 1595.38,-307 1595.38,-307 1595.38,-161 1595.38,-108.7 1565.1,-95.53 1518.38,-72 1467.03,-46.14 1111.46,-29.63 927.36,-22.72"/>
-<polygon fill="black" stroke="black" points="927.23,-19.22 917.1,-22.34 926.97,-26.21 927.23,-19.22"/>
-</g>
-<!-- datafusion_functions_nested&#45;&gt;datafusion_common -->
-<g id="edge50" class="edge">
-<title>datafusion_functions_nested&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M1253.53,-216.22C1203.95,-184.59 1091.36,-115.46 989.38,-72 955.26,-57.46 915.79,-45 883.28,-35.83"/>
-<polygon fill="black" stroke="black" points="884,-32.4 873.43,-33.09 882.12,-39.14 884,-32.4"/>
-</g>
-<!-- datafusion_functions_nested&#45;&gt;datafusion_expr -->
-<g id="edge52" class="edge">
-<title>datafusion_functions_nested&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M1240.69,-216.61C1191.68,-196.5 1104.24,-162.78 1026.38,-144 924.46,-119.42 804.61,-105.26 724.94,-97.87"/>
-<polygon fill="black" stroke="black" points="725.22,-94.38 714.94,-96.96 724.59,-101.35 725.22,-94.38"/>
-</g>
-<!-- datafusion_functions_nested&#45;&gt;datafusion_execution -->
-<g id="edge51" class="edge">
-<title>datafusion_functions_nested&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M1201.74,-219.17C1134.05,-207.11 1036.21,-189.69 967.5,-177.45"/>
-<polygon fill="black" stroke="black" points="968.05,-173.99 957.59,-175.68 966.82,-180.88 968.05,-173.99"/>
-</g>
-<!-- datafusion_substrait -->
-<g id="node17" class="node">
-<title>datafusion_substrait</title>
-<ellipse fill="none" stroke="black" cx="772.38" cy="-450" rx="105.88" ry="18"/>
-<text text-anchor="middle" x="772.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_substrait</text>
-</g>
-<!-- datafusion_substrait&#45;&gt;datafusion -->
-<g id="edge55" class="edge">
-<title>datafusion_substrait&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M816.71,-433.64C849.38,-422.32 893.73,-406.95 927.07,-395.39"/>
-<polygon fill="black" stroke="black" points="928.3,-398.67 936.6,-392.09 926.01,-392.06 928.3,-398.67"/>
-</g>
-</g>
-</svg>
diff --git a/dev/release/release-crates.sh b/dev/release/release-crates.sh
deleted file mode 100644
index b9bda68b780b..000000000000
--- a/dev/release/release-crates.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# This script publishes datafusion crates to crates.io.
-#
-# This script should only be run after the release has been approved
-# by the Apache DataFusion PMC committee.
-#
-# See release/README.md for full release instructions
-
-set -eu
-
-# Do not run inside a git repo
-if ! [ git rev-parse --is-inside-work-tree ]; then
-  cd datafusion/common && cargo publish
-  cd datafusion/expr && cargo publish
-  cd datafusion/sql && cargo publish
-  cd datafusion/physical-expr && cargo publish
-  cd datafusion/optimizer && cargo publish
-  cd datafusion/core && cargo publish
-  cd datafusion/proto && cargo publish
-  cd datafusion/execution && cargo publish
-  cd datafusion/substrait && cargo publish
-  cd datafusion-cli && cargo publish --no-verify
-else
-    echo "Crates must be released from the source tarball that was voted on, not from the repo"
-    exit 1
-fi
diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md
index 254b1de6521e..4e52a2fbcaa6 100644
--- a/docs/source/contributor-guide/howtos.md
+++ b/docs/source/contributor-guide/howtos.md
@@ -24,7 +24,7 @@
 Below is a checklist of what you need to do to add a new scalar function to DataFusion:
 
 - Add the actual implementation of the function to a new module file within:
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions-array) for array functions
+  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions-nested) for arrays, maps and structs functions
   - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/crypto) for crypto functions
   - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/datetime) for datetime functions
   - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/encoding) for encoding functions