diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index ab6a615ab60b..44ca5aaf4eda 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -61,4 +61,4 @@ jobs:
             git add --all
             git commit -m 'Publish built docs triggered by ${{ github.sha }}'
             git push || git push --force
-          fi
+          fi
\ No newline at end of file
diff --git a/benchmarks/src/parquet_filter.rs b/benchmarks/src/parquet_filter.rs
index eb9e09a7cb7c..5c98a2f8be3d 100644
--- a/benchmarks/src/parquet_filter.rs
+++ b/benchmarks/src/parquet_filter.rs
@@ -15,8 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::AccessLogOpt;
-use crate::{BenchmarkRun, CommonOpt};
+use std::path::PathBuf;
+
+use crate::{AccessLogOpt, BenchmarkRun, CommonOpt};
+
 use arrow::util::pretty;
 use datafusion::common::Result;
 use datafusion::logical_expr::utils::disjunction;
@@ -25,7 +27,7 @@ use datafusion::physical_plan::collect;
 use datafusion::prelude::{col, SessionContext};
 use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile};
 use datafusion_common::instant::Instant;
-use std::path::PathBuf;
+
 use structopt::StructOpt;
 
 /// Test performance of parquet filter pushdown
@@ -179,7 +181,7 @@ async fn exec_scan(
     debug: bool,
 ) -> Result<(usize, std::time::Duration)> {
     let start = Instant::now();
-    let exec = test_file.create_scan(Some(filter)).await?;
+    let exec = test_file.create_scan(ctx, Some(filter)).await?;
 
     let task_ctx = ctx.task_ctx();
     let result = collect(exec, task_ctx).await?;
diff --git a/benchmarks/src/sort.rs b/benchmarks/src/sort.rs
index bda0f4ae3f43..19eec2949ef6 100644
--- a/benchmarks/src/sort.rs
+++ b/benchmarks/src/sort.rs
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::AccessLogOpt;
-use crate::BenchmarkRun;
-use crate::CommonOpt;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use crate::{AccessLogOpt, BenchmarkRun, CommonOpt};
+
 use arrow::util::pretty;
 use datafusion::common::Result;
 use datafusion::physical_expr::PhysicalSortExpr;
@@ -26,8 +28,7 @@ use datafusion::physical_plan::sorts::sort::SortExec;
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion::test_util::parquet::TestParquetFile;
 use datafusion_common::instant::Instant;
-use std::path::PathBuf;
-use std::sync::Arc;
+
 use structopt::StructOpt;
 
 /// Test performance of sorting large datasets
@@ -174,7 +175,7 @@ async fn exec_sort(
     debug: bool,
 ) -> Result<(usize, std::time::Duration)> {
     let start = Instant::now();
-    let scan = test_file.create_scan(None).await?;
+    let scan = test_file.create_scan(ctx, None).await?;
     let exec = Arc::new(SortExec::new(expr.to_owned(), scan));
     let task_ctx = ctx.task_ctx();
     let result = collect(exec, task_ctx).await?;
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index 5497315fa3ba..564a2f05b6fe 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -15,8 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::get_query_sql;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use super::{
+    get_query_sql, get_tbl_tpch_table_schema, get_tpch_table_schema, TPCH_TABLES,
+};
 use crate::{BenchmarkRun, CommonOpt};
+
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::{self, pretty_format_batches};
 use datafusion::datasource::file_format::csv::CsvFormat;
@@ -26,21 +32,16 @@ use datafusion::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
 };
 use datafusion::datasource::{MemTable, TableProvider};
+use datafusion::error::Result;
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::{collect, displayable};
-use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};
-use log::info;
-
+use datafusion::prelude::*;
 use datafusion_common::instant::Instant;
-use std::path::PathBuf;
-use std::sync::Arc;
+use datafusion_common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION};
 
-use datafusion::error::Result;
-use datafusion::prelude::*;
+use log::info;
 use structopt::StructOpt;
 
-use super::{get_tbl_tpch_table_schema, get_tpch_table_schema, TPCH_TABLES};
-
 /// Run the tpch benchmark.
 ///
 /// This benchmarks is derived from the [TPC-H][1] version
@@ -253,7 +254,7 @@ impl RunOpt {
                 }
                 "parquet" => {
                     let path = format!("{path}/{table}");
-                    let format = ParquetFormat::default().with_enable_pruning(Some(true));
+                    let format = ParquetFormat::default().with_enable_pruning(true);
 
                     (Arc::new(format), path, DEFAULT_PARQUET_EXTENSION)
                 }
@@ -298,11 +299,12 @@ struct QueryResult {
 // Only run with "ci" mode when we have the data
 #[cfg(feature = "ci")]
 mod tests {
+    use std::path::Path;
+
     use super::*;
+
     use datafusion::common::exec_err;
     use datafusion::error::{DataFusionError, Result};
-    use std::path::Path;
-
     use datafusion_proto::bytes::{
         logical_plan_from_bytes, logical_plan_to_bytes, physical_plan_from_bytes,
         physical_plan_to_bytes,
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 6efb657ea899..a0f68c76e4a8 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1238,8 +1238,8 @@ dependencies = [
  "datafusion-common",
  "paste",
  "sqlparser",
- "strum 0.26.1",
- "strum_macros 0.26.1",
+ "strum 0.26.2",
+ "strum_macros 0.26.2",
 ]
 
 [[package]]
@@ -1265,6 +1265,9 @@ name = "datafusion-functions-array"
 version = "36.0.0"
 dependencies = [
  "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
@@ -1363,7 +1366,7 @@ dependencies = [
  "datafusion-expr",
  "log",
  "sqlparser",
- "strum 0.26.1",
+ "strum 0.26.2",
 ]
 
 [[package]]
@@ -3257,11 +3260,11 @@ checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
 
 [[package]]
 name = "strum"
-version = "0.26.1"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f"
+checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29"
 dependencies = [
- "strum_macros 0.26.1",
+ "strum_macros 0.26.2",
 ]
 
 [[package]]
@@ -3279,9 +3282,9 @@ dependencies = [
 
 [[package]]
 name = "strum_macros"
-version = "0.26.1"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18"
+checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946"
 dependencies = [
  "heck",
  "proc-macro2",
diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index bcedf7248cec..a8ecb98637cb 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::object_storage::get_object_store;
-use async_trait::async_trait;
+use std::any::Any;
+use std::sync::{Arc, Weak};
+
+use crate::object_storage::{get_object_store, AwsOptions, GcpOptions};
+
 use datafusion::catalog::schema::SchemaProvider;
 use datafusion::catalog::{CatalogProvider, CatalogProviderList};
 use datafusion::common::plan_datafusion_err;
@@ -26,12 +29,10 @@ use datafusion::datasource::listing::{
 use datafusion::datasource::TableProvider;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionState;
+
+use async_trait::async_trait;
 use dirs::home_dir;
 use parking_lot::RwLock;
-use std::any::Any;
-use std::collections::HashMap;
-use std::sync::{Arc, Weak};
-use url::Url;
 
 /// Wraps another catalog, automatically creating table providers
 /// for local files if needed
@@ -155,7 +156,7 @@ impl SchemaProvider for DynamicFileSchemaProvider {
 
         // if the inner schema provider didn't have a table by
         // that name, try to treat it as a listing table
-        let state = self
+        let mut state = self
             .state
             .upgrade()
             .ok_or_else(|| plan_datafusion_err!("locking error"))?
@@ -163,7 +164,8 @@ impl SchemaProvider for DynamicFileSchemaProvider {
             .clone();
         let optimized_name = substitute_tilde(name.to_owned());
         let table_url = ListingTableUrl::parse(optimized_name.as_str())?;
-        let url: &Url = table_url.as_ref();
+        let scheme = table_url.scheme();
+        let url = table_url.as_ref();
 
         // If the store is already registered for this URL then `get_store`
         // will return `Ok` which means we don't need to register it again. However,
@@ -174,10 +176,22 @@ impl SchemaProvider for DynamicFileSchemaProvider {
             Err(_) => {
                 // Register the store for this URL. Here we don't have access
                 // to any command options so the only choice is to use an empty collection
-                let mut options = HashMap::new();
-                let store =
-                    get_object_store(&state, &mut options, table_url.scheme(), url)
-                        .await?;
+                match scheme {
+                    "s3" | "oss" => {
+                        state = state.add_table_options_extension(AwsOptions::default());
+                    }
+                    "gs" | "gcs" => {
+                        state = state.add_table_options_extension(GcpOptions::default())
+                    }
+                    _ => {}
+                };
+                let store = get_object_store(
+                    &state,
+                    table_url.scheme(),
+                    url,
+                    state.default_table_options(),
+                )
+                .await?;
                 state.runtime_env().register_object_store(url, store);
             }
         }
@@ -215,6 +229,7 @@ fn substitute_tilde(cur: String) -> String {
 #[cfg(test)]
 mod tests {
     use super::*;
+
     use datafusion::catalog::schema::SchemaProvider;
     use datafusion::prelude::SessionContext;
 
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index 4eae5ffdd7e7..b11f1c202284 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -17,7 +17,6 @@
 
 //! Execution functions
 
-use datafusion_common::instant::Instant;
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::prelude::*;
@@ -27,15 +26,15 @@ use crate::print_format::PrintFormat;
 use crate::{
     command::{Command, OutputFormat},
     helper::{unescape_input, CliHelper},
-    object_storage::get_object_store,
+    object_storage::{get_object_store, register_options},
     print_options::{MaxRows, PrintOptions},
 };
 
+use datafusion::common::instant::Instant;
 use datafusion::common::plan_datafusion_err;
 use datafusion::datasource::listing::ListingTableUrl;
 use datafusion::error::{DataFusionError, Result};
-use datafusion::logical_expr::dml::CopyTo;
-use datafusion::logical_expr::{CreateExternalTable, DdlStatement, LogicalPlan};
+use datafusion::logical_expr::{DdlStatement, LogicalPlan};
 use datafusion::physical_plan::{collect, execute_stream, ExecutionPlanProperties};
 use datafusion::prelude::SessionContext;
 use datafusion::sql::parser::{DFParser, Statement};
@@ -44,7 +43,6 @@ use datafusion::sql::sqlparser::dialect::dialect_from_str;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
 use tokio::signal;
-use url::Url;
 
 /// run and execute SQL statements and commands, against a context with the given print options
 pub async fn exec_from_commands(
@@ -258,42 +256,74 @@ async fn create_plan(
     // Note that cmd is a mutable reference so that create_external_table function can remove all
     // datafusion-cli specific options before passing through to datafusion. Otherwise, datafusion
     // will raise Configuration errors.
-    if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
-        create_external_table(ctx, cmd).await?;
+    if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
+        register_object_store_and_config_extensions(ctx, &cmd.location, &cmd.options)
+            .await?;
     }
 
     if let LogicalPlan::Copy(copy_to) = &mut plan {
-        register_object_store(ctx, copy_to).await?;
+        register_object_store_and_config_extensions(
+            ctx,
+            &copy_to.output_url,
+            &copy_to.options,
+        )
+        .await?;
     }
     Ok(plan)
 }
 
-async fn register_object_store(
-    ctx: &SessionContext,
-    copy_to: &mut CopyTo,
-) -> Result<(), DataFusionError> {
-    let url = ListingTableUrl::parse(copy_to.output_url.as_str())?;
-    let store = get_object_store(
-        &ctx.state(),
-        &mut HashMap::new(),
-        url.scheme(),
-        url.as_ref(),
-    )
-    .await?;
-    ctx.runtime_env().register_object_store(url.as_ref(), store);
-    Ok(())
-}
-
-async fn create_external_table(
+/// Asynchronously registers an object store and its configuration extensions
+/// to the session context.
+///
+/// This function dynamically registers a cloud object store based on the given
+/// location and options. It first parses the location to determine the scheme
+/// and constructs the URL accordingly. Depending on the scheme, it also registers
+/// relevant options. The function then alters the default table options with the
+/// given custom options. Finally, it retrieves and registers the object store
+/// in the session context.
+///
+/// # Parameters
+///
+/// * `ctx`: A reference to the `SessionContext` for registering the object store.
+/// * `location`: A string reference representing the location of the object store.
+/// * `options`: A reference to a hash map containing configuration options for
+///   the object store.
+///
+/// # Returns
+///
+/// A `Result<()>` which is an Ok value indicating successful registration, or
+/// an error upon failure.
+///
+/// # Errors
+///
+/// This function can return an error if the location parsing fails, options
+/// alteration fails, or if the object store cannot be retrieved and registered
+/// successfully.
+pub(crate) async fn register_object_store_and_config_extensions(
     ctx: &SessionContext,
-    cmd: &mut CreateExternalTable,
+    location: &String,
+    options: &HashMap<String, String>,
 ) -> Result<()> {
-    let table_path = ListingTableUrl::parse(&cmd.location)?;
+    // Parse the location URL to extract the scheme and other components
+    let table_path = ListingTableUrl::parse(location)?;
+
+    // Extract the scheme (e.g., "s3", "gcs") from the parsed URL
     let scheme = table_path.scheme();
-    let url: &Url = table_path.as_ref();
 
-    // registering the cloud object store dynamically using cmd.options
-    let store = get_object_store(&ctx.state(), &mut cmd.options, scheme, url).await?;
+    // Obtain a reference to the URL
+    let url = table_path.as_ref();
+
+    // Register the options based on the scheme extracted from the location
+    register_options(ctx, scheme);
+
+    // Clone and modify the default table options based on the provided options
+    let mut table_options = ctx.state().default_table_options().clone();
+    table_options.alter_with_string_hash_map(options)?;
+
+    // Retrieve the appropriate object store based on the scheme, URL, and modified table options
+    let store = get_object_store(&ctx.state(), scheme, url, &table_options).await?;
+
+    // Register the retrieved object store in the session context's runtime environment
     ctx.runtime_env().register_object_store(url, store);
 
     Ok(())
@@ -301,33 +331,48 @@ async fn create_external_table(
 
 #[cfg(test)]
 mod tests {
-    use std::str::FromStr;
-
     use super::*;
 
-    use datafusion::common::{plan_err, FileType, FileTypeWriterOptions};
-    use datafusion_common::file_options::StatementOptions;
+    use datafusion_common::config::FormatOptions;
+    use datafusion_common::plan_err;
+
+    use url::Url;
 
     async fn create_external_table_test(location: &str, sql: &str) -> Result<()> {
         let ctx = SessionContext::new();
-        let mut plan = ctx.state().create_logical_plan(sql).await?;
-
-        if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
-            create_external_table(&ctx, cmd).await?;
-            let options: Vec<_> = cmd
-                .options
-                .iter()
-                .map(|(k, v)| (k.clone(), v.clone()))
-                .collect();
-            let statement_options = StatementOptions::new(options);
-            let file_type =
-                datafusion_common::FileType::from_str(cmd.file_type.as_str())?;
-
-            let _file_type_writer_options = FileTypeWriterOptions::build(
-                &file_type,
-                ctx.state().config_options(),
-                &statement_options,
-            )?;
+        let plan = ctx.state().create_logical_plan(sql).await?;
+
+        if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
+            register_object_store_and_config_extensions(
+                &ctx,
+                &cmd.location,
+                &cmd.options,
+            )
+            .await?;
+        } else {
+            return plan_err!("LogicalPlan is not a CreateExternalTable");
+        }
+
+        // Ensure the URL is supported by the object store
+        ctx.runtime_env()
+            .object_store(ListingTableUrl::parse(location)?)?;
+
+        Ok(())
+    }
+
+    async fn copy_to_table_test(location: &str, sql: &str) -> Result<()> {
+        let ctx = SessionContext::new();
+        // AWS CONFIG register.
+
+        let plan = ctx.state().create_logical_plan(sql).await?;
+
+        if let LogicalPlan::Copy(cmd) = &plan {
+            register_object_store_and_config_extensions(
+                &ctx,
+                &cmd.output_url,
+                &cmd.options,
+            )
+            .await?;
         } else {
             return plan_err!("LogicalPlan is not a CreateExternalTable");
         }
@@ -374,7 +419,7 @@ mod tests {
                 let mut plan = create_plan(&mut ctx, statement).await?;
                 if let LogicalPlan::Copy(copy_to) = &mut plan {
                     assert_eq!(copy_to.output_url, location);
-                    assert_eq!(copy_to.file_format, FileType::PARQUET);
+                    assert!(matches!(copy_to.format_options, FormatOptions::PARQUET(_)));
                     ctx.runtime_env()
                         .object_store_registry
                         .get_store(&Url::parse(&copy_to.output_url).unwrap())?;
@@ -386,6 +431,20 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn copy_to_object_store_table_s3() -> Result<()> {
+        let access_key_id = "fake_access_key_id";
+        let secret_access_key = "fake_secret_access_key";
+        let location = "s3://bucket/path/file.parquet";
+
+        // Missing region, use object_store defaults
+        let sql = format!("COPY (values (1,2)) TO '{location}'
+            (format parquet, 'aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}')");
+        copy_to_table_test(location, &sql).await?;
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn create_object_store_table_s3() -> Result<()> {
         let access_key_id = "fake_access_key_id";
@@ -396,12 +455,12 @@ mod tests {
 
         // Missing region, use object_store defaults
         let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
-            OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}') LOCATION '{location}'");
+            OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}') LOCATION '{location}'");
         create_external_table_test(location, &sql).await?;
 
         // Should be OK
         let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
-            OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'region' '{region}', 'session_token' '{session_token}') LOCATION '{location}'");
+            OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.region' '{region}', 'aws.session_token' '{session_token}') LOCATION '{location}'");
         create_external_table_test(location, &sql).await?;
 
         Ok(())
@@ -416,7 +475,7 @@ mod tests {
 
         // Should be OK
         let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
-            OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'endpoint' '{endpoint}') LOCATION '{location}'");
+            OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'");
         create_external_table_test(location, &sql).await?;
 
         Ok(())
@@ -432,14 +491,14 @@ mod tests {
 
         // for service_account_path
         let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
-            OPTIONS('service_account_path' '{service_account_path}') LOCATION '{location}'");
+            OPTIONS('gcp.service_account_path' '{service_account_path}') LOCATION '{location}'");
         let err = create_external_table_test(location, &sql)
             .await
             .unwrap_err();
         assert!(err.to_string().contains("os error 2"));
 
         // for service_account_key
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('service_account_key' '{service_account_key}') LOCATION '{location}'");
+        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_key' '{service_account_key}') LOCATION '{location}'");
         let err = create_external_table_test(location, &sql)
             .await
             .unwrap_err()
@@ -448,7 +507,7 @@ mod tests {
 
         // for application_credentials_path
         let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
-            OPTIONS('application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");
+            OPTIONS('gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");
         let err = create_external_table_test(location, &sql)
             .await
             .unwrap_err();
diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs
index 897f3796550d..033c8f839ab2 100644
--- a/datafusion-cli/src/object_storage.rs
+++ b/datafusion-cli/src/object_storage.rs
@@ -15,40 +15,41 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use async_trait::async_trait;
-use aws_credential_types::provider::ProvideCredentials;
-use datafusion::common::exec_datafusion_err;
+use std::any::Any;
+use std::fmt::{Debug, Display};
+use std::sync::Arc;
+
+use datafusion::common::{config_namespace, exec_datafusion_err, exec_err, internal_err};
 use datafusion::error::{DataFusionError, Result};
 use datafusion::execution::context::SessionState;
-use object_store::aws::AwsCredential;
-use object_store::http::HttpBuilder;
-use object_store::ObjectStore;
-use object_store::{
-    aws::AmazonS3Builder, gcp::GoogleCloudStorageBuilder, CredentialProvider,
+use datafusion::prelude::SessionContext;
+use datafusion_common::config::{
+    ConfigEntry, ConfigExtension, ConfigField, ExtensionOptions, TableOptions, Visit,
 };
-use std::collections::HashMap;
-use std::sync::Arc;
+
+use async_trait::async_trait;
+use aws_credential_types::provider::ProvideCredentials;
+use object_store::aws::{AmazonS3Builder, AwsCredential};
+use object_store::gcp::GoogleCloudStorageBuilder;
+use object_store::http::HttpBuilder;
+use object_store::{CredentialProvider, ObjectStore};
 use url::Url;
 
 pub async fn get_s3_object_store_builder(
     url: &Url,
-    options: &mut HashMap<String, String>,
+    aws_options: &AwsOptions,
 ) -> Result<AmazonS3Builder> {
     let bucket_name = get_bucket_name(url)?;
     let mut builder = AmazonS3Builder::from_env().with_bucket_name(bucket_name);
 
-    if let (Some(access_key_id), Some(secret_access_key)) = (
-        // These options are datafusion-cli specific and must be removed before passing through to datafusion.
-        // Otherwise, a Configuration error will be raised.
-        options.remove("access_key_id"),
-        options.remove("secret_access_key"),
-    ) {
-        println!("removing secret access key!");
+    if let (Some(access_key_id), Some(secret_access_key)) =
+        (&aws_options.access_key_id, &aws_options.secret_access_key)
+    {
         builder = builder
             .with_access_key_id(access_key_id)
             .with_secret_access_key(secret_access_key);
 
-        if let Some(session_token) = options.remove("session_token") {
+        if let Some(session_token) = &aws_options.session_token {
             builder = builder.with_token(session_token);
         }
     } else {
@@ -62,7 +63,7 @@ pub async fn get_s3_object_store_builder(
             .ok_or_else(|| {
                 DataFusionError::ObjectStore(object_store::Error::Generic {
                     store: "S3",
-                    source: "Failed to get S3 credentials from environment".into(),
+                    source: "Failed to get S3 credentials from the environment".into(),
                 })
             })?
             .clone();
@@ -71,7 +72,7 @@ pub async fn get_s3_object_store_builder(
         builder = builder.with_credentials(credentials);
     }
 
-    if let Some(region) = options.remove("region") {
+    if let Some(region) = &aws_options.region {
         builder = builder.with_region(region);
     }
 
@@ -104,7 +105,7 @@ impl CredentialProvider for S3CredentialProvider {
 
 pub fn get_oss_object_store_builder(
     url: &Url,
-    cmd: &mut HashMap<String, String>,
+    aws_options: &AwsOptions,
 ) -> Result<AmazonS3Builder> {
     let bucket_name = get_bucket_name(url)?;
     let mut builder = AmazonS3Builder::from_env()
@@ -114,14 +115,14 @@ pub fn get_oss_object_store_builder(
         .with_region("do_not_care");
 
     if let (Some(access_key_id), Some(secret_access_key)) =
-        (cmd.remove("access_key_id"), cmd.remove("secret_access_key"))
+        (&aws_options.access_key_id, &aws_options.secret_access_key)
     {
         builder = builder
             .with_access_key_id(access_key_id)
             .with_secret_access_key(secret_access_key);
     }
 
-    if let Some(endpoint) = cmd.remove("endpoint") {
+    if let Some(endpoint) = &aws_options.oss.endpoint {
         builder = builder.with_endpoint(endpoint);
     }
 
@@ -130,21 +131,20 @@ pub fn get_oss_object_store_builder(
 
 pub fn get_gcs_object_store_builder(
     url: &Url,
-    cmd: &mut HashMap<String, String>,
+    gs_options: &GcpOptions,
 ) -> Result<GoogleCloudStorageBuilder> {
     let bucket_name = get_bucket_name(url)?;
     let mut builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket_name);
 
-    if let Some(service_account_path) = cmd.remove("service_account_path") {
+    if let Some(service_account_path) = &gs_options.service_account_path {
         builder = builder.with_service_account_path(service_account_path);
     }
 
-    if let Some(service_account_key) = cmd.remove("service_account_key") {
+    if let Some(service_account_key) = &gs_options.service_account_key {
         builder = builder.with_service_account_key(service_account_key);
     }
 
-    if let Some(application_credentials_path) = cmd.remove("application_credentials_path")
-    {
+    if let Some(application_credentials_path) = &gs_options.application_credentials_path {
         builder = builder.with_application_credentials(application_credentials_path);
     }
 
@@ -160,32 +160,277 @@ fn get_bucket_name(url: &Url) -> Result<&str> {
     })
 }
 
+/// This struct encapsulates AWS options one uses when setting up object storage.
+#[derive(Default, Debug, Clone)]
+pub struct AwsOptions {
+    /// Access Key ID
+    pub access_key_id: Option<String>,
+    /// Secret Access Key
+    pub secret_access_key: Option<String>,
+    /// Session token
+    pub session_token: Option<String>,
+    /// AWS Region
+    pub region: Option<String>,
+    /// Object Storage Service options
+    pub oss: OssOptions,
+}
+
+config_namespace! {
+    pub struct OssOptions {
+        pub endpoint: Option<String>, default = None
+    }
+}
+
+impl ExtensionOptions for AwsOptions {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    fn cloned(&self) -> Box<dyn ExtensionOptions> {
+        Box::new(self.clone())
+    }
+
+    fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        let (_key, aws_key) = key.split_once('.').unwrap_or((key, ""));
+        let (key, rem) = aws_key.split_once('.').unwrap_or((aws_key, ""));
+        match key {
+            "access_key_id" => {
+                self.access_key_id.set(rem, value)?;
+            }
+            "secret_access_key" => {
+                self.secret_access_key.set(rem, value)?;
+            }
+            "session_token" => {
+                self.session_token.set(rem, value)?;
+            }
+            "region" => {
+                self.region.set(rem, value)?;
+            }
+            "oss" => {
+                self.oss.set(rem, value)?;
+            }
+            _ => {
+                return internal_err!("Config value \"{}\" not found on AwsOptions", rem);
+            }
+        }
+        Ok(())
+    }
+
+    fn entries(&self) -> Vec<ConfigEntry> {
+        struct Visitor(Vec<ConfigEntry>);
+
+        impl Visit for Visitor {
+            fn some<V: Display>(
+                &mut self,
+                key: &str,
+                value: V,
+                description: &'static str,
+            ) {
+                self.0.push(ConfigEntry {
+                    key: key.to_string(),
+                    value: Some(value.to_string()),
+                    description,
+                })
+            }
+
+            fn none(&mut self, key: &str, description: &'static str) {
+                self.0.push(ConfigEntry {
+                    key: key.to_string(),
+                    value: None,
+                    description,
+                })
+            }
+        }
+
+        let mut v = Visitor(vec![]);
+        self.access_key_id.visit(&mut v, "access_key_id", "");
+        self.secret_access_key
+            .visit(&mut v, "secret_access_key", "");
+        self.session_token.visit(&mut v, "session_token", "");
+        self.region.visit(&mut v, "region", "");
+        self.oss.visit(&mut v, "oss", "");
+        v.0
+    }
+}
+
+impl ConfigExtension for AwsOptions {
+    const PREFIX: &'static str = "aws";
+}
+
+/// This struct encapsulates GCP options one uses when setting up object storage.
+#[derive(Debug, Clone, Default)]
+pub struct GcpOptions {
+    /// Service account path
+    pub service_account_path: Option<String>,
+    /// Service account key
+    pub service_account_key: Option<String>,
+    /// Application credentials path
+    pub application_credentials_path: Option<String>,
+}
+
+impl ExtensionOptions for GcpOptions {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    fn cloned(&self) -> Box<dyn ExtensionOptions> {
+        Box::new(self.clone())
+    }
+
+    fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        let (_key, rem) = key.split_once('.').unwrap_or((key, ""));
+        match rem {
+            "service_account_path" => {
+                self.service_account_path.set(rem, value)?;
+            }
+            "service_account_key" => {
+                self.service_account_key.set(rem, value)?;
+            }
+            "application_credentials_path" => {
+                self.application_credentials_path.set(rem, value)?;
+            }
+            _ => {
+                return internal_err!("Config value \"{}\" not found on GcpOptions", rem);
+            }
+        }
+        Ok(())
+    }
+
+    fn entries(&self) -> Vec<ConfigEntry> {
+        struct Visitor(Vec<ConfigEntry>);
+
+        impl Visit for Visitor {
+            fn some<V: Display>(
+                &mut self,
+                key: &str,
+                value: V,
+                description: &'static str,
+            ) {
+                self.0.push(ConfigEntry {
+                    key: key.to_string(),
+                    value: Some(value.to_string()),
+                    description,
+                })
+            }
+
+            fn none(&mut self, key: &str, description: &'static str) {
+                self.0.push(ConfigEntry {
+                    key: key.to_string(),
+                    value: None,
+                    description,
+                })
+            }
+        }
+
+        let mut v = Visitor(vec![]);
+        self.service_account_path
+            .visit(&mut v, "service_account_path", "");
+        self.service_account_key
+            .visit(&mut v, "service_account_key", "");
+        self.application_credentials_path.visit(
+            &mut v,
+            "application_credentials_path",
+            "",
+        );
+        v.0
+    }
+}
+
+impl ConfigExtension for GcpOptions {
+    const PREFIX: &'static str = "gcp";
+}
+
+/// Registers storage options for different cloud storage schemes in a given
+/// session context.
+///
+/// This function is responsible for extending the session context with specific
+/// options based on the storage scheme being used. These options are essential
+/// for handling interactions with different cloud storage services such as Amazon
+/// S3, Alibaba Cloud OSS, Google Cloud Storage, etc.
+///
+/// # Parameters
+///
+/// * `ctx` - A mutable reference to the session context where table options are
+///   to be registered. The session context holds configuration and environment
+///   for the current session.
+/// * `scheme` - A string slice that represents the cloud storage scheme. This
+///   determines which set of options will be registered in the session context.
+///
+/// # Supported Schemes
+///
+/// * `s3` or `oss` - Registers `AwsOptions` which are configurations specific to
+///   Amazon S3 and Alibaba Cloud OSS.
+/// * `gs` or `gcs` - Registers `GcpOptions` which are configurations specific to
+///   Google Cloud Storage.
+///
+/// NOTE: This function will not perform any action when given an unsupported scheme.
+pub(crate) fn register_options(ctx: &SessionContext, scheme: &str) {
+    // Match the provided scheme against supported cloud storage schemes:
+    match scheme {
+        // For Amazon S3 or Alibaba Cloud OSS
+        "s3" | "oss" => {
+            // Register AWS specific table options in the session context:
+            ctx.register_table_options_extension(AwsOptions::default())
+        }
+        // For Google Cloud Storage
+        "gs" | "gcs" => {
+            // Register GCP specific table options in the session context:
+            ctx.register_table_options_extension(GcpOptions::default())
+        }
+        // For unsupported schemes, do nothing:
+        _ => {}
+    }
+}
+
 pub(crate) async fn get_object_store(
     state: &SessionState,
-    options: &mut HashMap<String, String>,
     scheme: &str,
     url: &Url,
+    table_options: &TableOptions,
 ) -> Result<Arc<dyn ObjectStore>, DataFusionError> {
-    let store = match scheme {
+    let store: Arc<dyn ObjectStore> = match scheme {
         "s3" => {
+            let Some(options) = table_options.extensions.get::<AwsOptions>() else {
+                return exec_err!(
+                    "Given table options incompatible with the 's3' scheme"
+                );
+            };
             let builder = get_s3_object_store_builder(url, options).await?;
-            Arc::new(builder.build()?) as Arc<dyn ObjectStore>
+            Arc::new(builder.build()?)
         }
         "oss" => {
+            let Some(options) = table_options.extensions.get::<AwsOptions>() else {
+                return exec_err!(
+                    "Given table options incompatible with the 'oss' scheme"
+                );
+            };
             let builder = get_oss_object_store_builder(url, options)?;
-            Arc::new(builder.build()?) as Arc<dyn ObjectStore>
+            Arc::new(builder.build()?)
         }
         "gs" | "gcs" => {
+            let Some(options) = table_options.extensions.get::<GcpOptions>() else {
+                return exec_err!(
+                    "Given table options incompatible with the 'gs'/'gcs' scheme"
+                );
+            };
             let builder = get_gcs_object_store_builder(url, options)?;
-            Arc::new(builder.build()?) as Arc<dyn ObjectStore>
+            Arc::new(builder.build()?)
         }
         "http" | "https" => Arc::new(
             HttpBuilder::new()
                 .with_url(url.origin().ascii_serialization())
                 .build()?,
-        ) as Arc<dyn ObjectStore>,
+        ),
         _ => {
-            // for other types, try to get from the object_store_registry
+            // For other types, try to get from `object_store_registry`:
             state
                 .runtime_env()
                 .object_store_registry
@@ -201,12 +446,14 @@ pub(crate) async fn get_object_store(
 #[cfg(test)]
 mod tests {
     use super::*;
+
     use datafusion::common::plan_err;
     use datafusion::{
         datasource::listing::ListingTableUrl,
         logical_expr::{DdlStatement, LogicalPlan},
         prelude::SessionContext,
     };
+
     use object_store::{aws::AmazonS3ConfigKey, gcp::GoogleConfigKey};
 
     #[tokio::test]
@@ -218,14 +465,19 @@ mod tests {
         let location = "s3://bucket/path/file.parquet";
 
         let table_url = ListingTableUrl::parse(location)?;
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'region' '{region}', 'session_token' {session_token}) LOCATION '{location}'");
+        let scheme = table_url.scheme();
+        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.region' '{region}', 'aws.session_token' {session_token}) LOCATION '{location}'");
 
         let ctx = SessionContext::new();
         let mut plan = ctx.state().create_logical_plan(&sql).await?;
 
         if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
+            register_options(&ctx, scheme);
+            let mut table_options = ctx.state().default_table_options().clone();
+            table_options.alter_with_string_hash_map(&cmd.options)?;
+            let aws_options = table_options.extensions.get::<AwsOptions>().unwrap();
             let builder =
-                get_s3_object_store_builder(table_url.as_ref(), &mut cmd.options).await?;
+                get_s3_object_store_builder(table_url.as_ref(), aws_options).await?;
             // get the actual configuration information, then assert_eq!
             let config = [
                 (AmazonS3ConfigKey::AccessKeyId, access_key_id),
@@ -251,14 +503,18 @@ mod tests {
         let location = "oss://bucket/path/file.parquet";
 
         let table_url = ListingTableUrl::parse(location)?;
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'endpoint' '{endpoint}') LOCATION '{location}'");
+        let scheme = table_url.scheme();
+        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'");
 
         let ctx = SessionContext::new();
         let mut plan = ctx.state().create_logical_plan(&sql).await?;
 
         if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
-            let builder =
-                get_oss_object_store_builder(table_url.as_ref(), &mut cmd.options)?;
+            register_options(&ctx, scheme);
+            let mut table_options = ctx.state().default_table_options().clone();
+            table_options.alter_with_string_hash_map(&cmd.options)?;
+            let aws_options = table_options.extensions.get::<AwsOptions>().unwrap();
+            let builder = get_oss_object_store_builder(table_url.as_ref(), aws_options)?;
             // get the actual configuration information, then assert_eq!
             let config = [
                 (AmazonS3ConfigKey::AccessKeyId, access_key_id),
@@ -284,14 +540,18 @@ mod tests {
         let location = "gcs://bucket/path/file.parquet";
 
         let table_url = ListingTableUrl::parse(location)?;
-        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('service_account_path' '{service_account_path}', 'service_account_key' '{service_account_key}', 'application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");
+        let scheme = table_url.scheme();
+        let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_path' '{service_account_path}', 'gcp.service_account_key' '{service_account_key}', 'gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");
 
         let ctx = SessionContext::new();
         let mut plan = ctx.state().create_logical_plan(&sql).await?;
 
         if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
-            let builder =
-                get_gcs_object_store_builder(table_url.as_ref(), &mut cmd.options)?;
+            register_options(&ctx, scheme);
+            let mut table_options = ctx.state().default_table_options().clone();
+            table_options.alter_with_string_hash_map(&cmd.options)?;
+            let gcp_options = table_options.extensions.get::<GcpOptions>().unwrap();
+            let builder = get_gcs_object_store_builder(table_url.as_ref(), gcp_options)?;
             // get the actual configuration information, then assert_eq!
             let config = [
                 (GoogleConfigKey::ServiceAccount, service_account_path),
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index e1fb401e7b73..dbc8050555b9 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -45,7 +45,7 @@ cargo run --example csv_sql
 - [`avro_sql.rs`](examples/avro_sql.rs): Build and run a query plan from a SQL statement against a local AVRO file
 - [`csv_sql.rs`](examples/csv_sql.rs): Build and run a query plan from a SQL statement against a local CSV file
 - [`csv_sql_streaming.rs`](examples/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
-- [`catalog.rs`](examples/external_dependency/catalog.rs): Register the table into a custom catalog
+- [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog
 - [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
 - [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame against a local parquet file
 - [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
diff --git a/datafusion-examples/examples/dataframe_output.rs b/datafusion-examples/examples/dataframe_output.rs
index c773384dfcd5..60ca090d722d 100644
--- a/datafusion-examples/examples/dataframe_output.rs
+++ b/datafusion-examples/examples/dataframe_output.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use datafusion::{dataframe::DataFrameWriteOptions, prelude::*};
+use datafusion_common::config::CsvOptions;
 use datafusion_common::{parsers::CompressionTypeVariant, DataFusionError};
 
 /// This example demonstrates the various methods to write out a DataFrame to local storage.
@@ -60,8 +61,8 @@ async fn main() -> Result<(), DataFusionError> {
             "./datafusion-examples/test_csv/",
             // DataFrameWriteOptions contains options which control how data is written
             // such as compression codec
-            DataFrameWriteOptions::new().with_compression(CompressionTypeVariant::GZIP),
-            None,
+            DataFrameWriteOptions::new(),
+            Some(CsvOptions::default().with_compression(CompressionTypeVariant::GZIP)),
         )
         .await?;
 
@@ -69,6 +70,7 @@ async fn main() -> Result<(), DataFusionError> {
         .write_json(
             "./datafusion-examples/test_json/",
             DataFrameWriteOptions::new(),
+            None,
         )
         .await?;
 
diff --git a/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs b/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs
index 883da7d0d13d..8d56c440da36 100644
--- a/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs
+++ b/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::env;
+use std::sync::Arc;
+
 use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::ListingOptions;
@@ -23,8 +26,6 @@ use datafusion::prelude::*;
 use datafusion_common::{FileType, GetExt};
 
 use object_store::aws::AmazonS3Builder;
-use std::env;
-use std::sync::Arc;
 use url::Url;
 
 /// This example demonstrates querying data from AmazonS3 and writing
@@ -52,7 +53,7 @@ async fn main() -> Result<()> {
         .register_object_store(&s3_url, arc_s3.clone());
 
     let path = format!("s3://{bucket_name}/test_data/");
-    let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
+    let file_format = ParquetFormat::default().with_enable_pruning(true);
     let listing_options = ListingOptions::new(Arc::new(file_format))
         .with_file_extension(FileType::PARQUET.get_ext());
     ctx.register_listing_table("test", &path, listing_options, None, None)
@@ -69,7 +70,7 @@ async fn main() -> Result<()> {
     //write as JSON to s3
     let json_out = format!("s3://{bucket_name}/json_out");
     df.clone()
-        .write_json(&json_out, DataFrameWriteOptions::new())
+        .write_json(&json_out, DataFrameWriteOptions::new(), None)
         .await?;
 
     //write as csv to s3
@@ -77,7 +78,7 @@ async fn main() -> Result<()> {
     df.write_csv(&csv_out, DataFrameWriteOptions::new(), None)
         .await?;
 
-    let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
+    let file_format = ParquetFormat::default().with_enable_pruning(true);
     let listing_options = ListingOptions::new(Arc::new(file_format))
         .with_file_extension(FileType::PARQUET.get_ext());
     ctx.register_listing_table("test2", &out_path, listing_options, None, None)
diff --git a/datafusion-examples/examples/parquet_sql_multiple_files.rs b/datafusion-examples/examples/parquet_sql_multiple_files.rs
index 0e2968f20356..30ca1df73d91 100644
--- a/datafusion-examples/examples/parquet_sql_multiple_files.rs
+++ b/datafusion-examples/examples/parquet_sql_multiple_files.rs
@@ -15,12 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::path::Path;
+use std::sync::Arc;
+
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::ListingOptions;
 use datafusion::prelude::*;
+
 use object_store::local::LocalFileSystem;
-use std::path::Path;
-use std::sync::Arc;
 
 /// This example demonstrates executing a simple query against an Arrow data source (a directory
 /// with multiple Parquet files) and fetching results. The query is run twice, once showing
@@ -34,7 +36,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let test_data = datafusion::test_util::parquet_test_data();
 
     // Configure listing options
-    let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
+    let file_format = ParquetFormat::default().with_enable_pruning(true);
     let listing_options = ListingOptions::new(Arc::new(file_format))
         // This is a workaround for this example since `test_data` contains
         // many different parquet different files,
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 181f318d3eb3..72d51cb15a88 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -16,11 +16,15 @@
 // under the License.
 
 //! Runtime configuration, via [`ConfigOptions`]
-use crate::error::_internal_err;
-use crate::{DataFusionError, Result};
+
 use std::any::Any;
 use std::collections::{BTreeMap, HashMap};
-use std::fmt::Display;
+use std::fmt::{self, Display};
+use std::str::FromStr;
+
+use crate::error::_config_err;
+use crate::parsers::CompressionTypeVariant;
+use crate::{DataFusionError, FileType, Result};
 
 /// A macro that wraps a configuration struct and automatically derives
 /// [`Default`] and [`ConfigField`] for it, allowing it to be used
@@ -98,6 +102,7 @@ use std::fmt::Display;
 ///
 /// NB: Misplaced commas may result in nonsensical errors
 ///
+#[macro_export]
 macro_rules! config_namespace {
     (
      $(#[doc = $struct_d:tt])*
@@ -110,8 +115,7 @@ macro_rules! config_namespace {
     ) => {
 
         $(#[doc = $struct_d])*
-        #[derive(Debug, Clone)]
-        #[non_exhaustive]
+        #[derive(Debug, Clone, PartialEq)]
         $vis struct $struct_name{
             $(
             $(#[doc = $d])*
@@ -126,9 +130,9 @@ macro_rules! config_namespace {
                     $(
                        stringify!($field_name) => self.$field_name.set(rem, value),
                     )*
-                    _ => _internal_err!(
+                    _ => return Err(DataFusionError::Configuration(format!(
                         "Config value \"{}\" not found on {}", key, stringify!($struct_name)
-                    )
+                    )))
                 }
             }
 
@@ -635,7 +639,7 @@ impl ConfigField for ConfigOptions {
             "optimizer" => self.optimizer.set(rem, value),
             "explain" => self.explain.set(rem, value),
             "sql_parser" => self.sql_parser.set(rem, value),
-            _ => _internal_err!("Config value \"{key}\" not found on ConfigOptions"),
+            _ => _config_err!("Config value \"{key}\" not found on ConfigOptions"),
         }
     }
 
@@ -663,9 +667,9 @@ impl ConfigOptions {
     /// Set a configuration option
     pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
         let (prefix, key) = key.split_once('.').ok_or_else(|| {
-            DataFusionError::External(
-                format!("could not find config namespace for key \"{key}\"",).into(),
-            )
+            DataFusionError::Configuration(format!(
+                "could not find config namespace for key \"{key}\"",
+            ))
         })?;
 
         if prefix == "datafusion" {
@@ -674,9 +678,9 @@ impl ConfigOptions {
 
         let e = self.extensions.0.get_mut(prefix);
         let e = e.ok_or_else(|| {
-            DataFusionError::External(
-                format!("Could not find config namespace \"{prefix}\"",).into(),
-            )
+            DataFusionError::Configuration(format!(
+                "Could not find config namespace \"{prefix}\""
+            ))
         })?;
         e.0.set(key, value)
     }
@@ -886,7 +890,7 @@ impl Clone for ExtensionBox {
 
 /// A trait implemented by `config_namespace` and for field types that provides
 /// the ability to walk and mutate the configuration tree
-trait ConfigField {
+pub trait ConfigField {
     fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str);
 
     fn set(&mut self, key: &str, value: &str) -> Result<()>;
@@ -905,6 +909,7 @@ impl<F: ConfigField + Default> ConfigField for Option<F> {
     }
 }
 
+#[macro_export]
 macro_rules! config_field {
     ($t:ty) => {
         impl ConfigField for $t {
@@ -929,11 +934,52 @@ config_field!(String);
 config_field!(bool);
 config_field!(usize);
 config_field!(f64);
-config_field!(u8);
 config_field!(u64);
 
+impl ConfigField for u8 {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        if value.is_empty() {
+            return Err(DataFusionError::Configuration(format!(
+                "Input string for {} key is empty",
+                key
+            )));
+        }
+        // Check if the string is a valid number
+        if let Ok(num) = value.parse::<u8>() {
+            // TODO: Let's decide how we treat the numerical strings.
+            *self = num;
+        } else {
+            let bytes = value.as_bytes();
+            // Check if the first character is ASCII (single byte)
+            if bytes.len() > 1 || !value.chars().next().unwrap().is_ascii() {
+                return Err(DataFusionError::Configuration(format!(
+                    "Error parsing {} as u8. Non-ASCII string provided",
+                    value
+                )));
+            }
+            *self = bytes[0];
+        }
+        Ok(())
+    }
+}
+
+impl ConfigField for CompressionTypeVariant {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = CompressionTypeVariant::from_str(value)?;
+        Ok(())
+    }
+}
+
 /// An implementation trait used to recursively walk configuration
-trait Visit {
+pub trait Visit {
     fn some<V: Display>(&mut self, key: &str, value: V, description: &'static str);
 
     fn none(&mut self, key: &str, description: &'static str);
@@ -1044,7 +1090,7 @@ macro_rules! extensions_options {
                         Ok(())
                        }
                     )*
-                    _ => Err($crate::DataFusionError::Internal(
+                    _ => Err($crate::DataFusionError::Configuration(
                         format!(concat!("Config value \"{}\" not found on ", stringify!($struct_name)), key)
                     ))
                 }
@@ -1064,3 +1110,556 @@ macro_rules! extensions_options {
         }
     }
 }
+
+#[derive(Debug, Clone, Default)]
+pub struct TableOptions {
+    pub csv: CsvOptions,
+    pub parquet: TableParquetOptions,
+    pub json: JsonOptions,
+    pub current_format: Option<FileType>,
+    /// Optional extensions registered using [`Extensions::insert`]
+    pub extensions: Extensions,
+}
+
+impl ConfigField for TableOptions {
+    fn visit<V: Visit>(&self, v: &mut V, _key_prefix: &str, _description: &'static str) {
+        self.csv.visit(v, "csv", "");
+        self.parquet.visit(v, "parquet", "");
+        self.json.visit(v, "json", "");
+    }
+
+    fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        // Extensions are handled in the public `ConfigOptions::set`
+        let (key, rem) = key.split_once('.').unwrap_or((key, ""));
+        match key {
+            "csv" => self.csv.set(rem, value),
+            "parquet" => self.parquet.set(rem, value),
+            "json" => self.json.set(rem, value),
+            _ => _config_err!("Config value \"{key}\" not found on TableOptions"),
+        }
+    }
+}
+
+impl TableOptions {
+    /// Creates a new [`ConfigOptions`] with default values
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn set_file_format(&mut self, format: FileType) {
+        self.current_format = Some(format);
+    }
+
+    pub fn default_from_session_config(config: &ConfigOptions) -> Self {
+        let mut initial = TableOptions::default();
+        initial.parquet.global = config.execution.parquet.clone();
+        initial
+    }
+
+    /// Set extensions to provided value
+    pub fn with_extensions(mut self, extensions: Extensions) -> Self {
+        self.extensions = extensions;
+        self
+    }
+
+    /// Set a configuration option
+    pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        let (prefix, _) = key.split_once('.').ok_or_else(|| {
+            DataFusionError::Configuration(format!(
+                "could not find config namespace for key \"{key}\""
+            ))
+        })?;
+
+        if prefix == "csv" || prefix == "json" || prefix == "parquet" {
+            if let Some(format) = &self.current_format {
+                match format {
+                    FileType::CSV if prefix != "csv" => {
+                        return Err(DataFusionError::Configuration(format!(
+                            "Key \"{key}\" is not applicable for CSV format"
+                        )))
+                    }
+                    #[cfg(feature = "parquet")]
+                    FileType::PARQUET if prefix != "parquet" => {
+                        return Err(DataFusionError::Configuration(format!(
+                            "Key \"{key}\" is not applicable for PARQUET format"
+                        )))
+                    }
+                    FileType::JSON if prefix != "json" => {
+                        return Err(DataFusionError::Configuration(format!(
+                            "Key \"{key}\" is not applicable for JSON format"
+                        )))
+                    }
+                    _ => {}
+                }
+            }
+            return ConfigField::set(self, key, value);
+        }
+
+        let e = self.extensions.0.get_mut(prefix);
+        let e = e.ok_or_else(|| {
+            DataFusionError::Configuration(format!(
+                "Could not find config namespace \"{prefix}\""
+            ))
+        })?;
+        e.0.set(key, value)
+    }
+
+    pub fn from_string_hash_map(settings: &HashMap<String, String>) -> Result<Self> {
+        let mut ret = Self::default();
+        for (k, v) in settings {
+            ret.set(k, v)?;
+        }
+
+        Ok(ret)
+    }
+
+    pub fn alter_with_string_hash_map(
+        &mut self,
+        settings: &HashMap<String, String>,
+    ) -> Result<()> {
+        for (k, v) in settings {
+            self.set(k, v)?;
+        }
+        Ok(())
+    }
+
+    /// Returns the [`ConfigEntry`] stored within this [`ConfigOptions`]
+    pub fn entries(&self) -> Vec<ConfigEntry> {
+        struct Visitor(Vec<ConfigEntry>);
+
+        impl Visit for Visitor {
+            fn some<V: Display>(
+                &mut self,
+                key: &str,
+                value: V,
+                description: &'static str,
+            ) {
+                self.0.push(ConfigEntry {
+                    key: key.to_string(),
+                    value: Some(value.to_string()),
+                    description,
+                })
+            }
+
+            fn none(&mut self, key: &str, description: &'static str) {
+                self.0.push(ConfigEntry {
+                    key: key.to_string(),
+                    value: None,
+                    description,
+                })
+            }
+        }
+
+        let mut v = Visitor(vec![]);
+        self.visit(&mut v, "csv", "");
+        self.visit(&mut v, "json", "");
+        self.visit(&mut v, "parquet", "");
+
+        v.0.extend(self.extensions.0.values().flat_map(|e| e.0.entries()));
+        v.0
+    }
+}
+
+#[derive(Clone, Default, Debug, PartialEq)]
+pub struct TableParquetOptions {
+    /// Global Parquet options that propagates to all columns.
+    pub global: ParquetOptions,
+    /// Column specific options. Default usage is parquet.XX::column.
+    pub column_specific_options: HashMap<String, ColumnOptions>,
+}
+
+impl ConfigField for TableParquetOptions {
+    fn visit<V: Visit>(&self, v: &mut V, key_prefix: &str, description: &'static str) {
+        self.global.visit(v, key_prefix, description);
+        self.column_specific_options
+            .visit(v, key_prefix, description)
+    }
+
+    fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        // Determine the key if it's a global or column-specific setting
+        if key.contains("::") {
+            self.column_specific_options.set(key, value)
+        } else {
+            self.global.set(key, value)
+        }
+    }
+}
+
+macro_rules! config_namespace_with_hashmap {
+    (
+     $(#[doc = $struct_d:tt])*
+     $vis:vis struct $struct_name:ident {
+        $(
+        $(#[doc = $d:tt])*
+        $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr
+        )*$(,)*
+    }
+    ) => {
+
+        $(#[doc = $struct_d])*
+        #[derive(Debug, Clone, PartialEq)]
+        $vis struct $struct_name{
+            $(
+            $(#[doc = $d])*
+            $field_vis $field_name : $field_type,
+            )*
+        }
+
+        impl ConfigField for $struct_name {
+            fn set(&mut self, key: &str, value: &str) -> Result<()> {
+                let (key, rem) = key.split_once('.').unwrap_or((key, ""));
+                match key {
+                    $(
+                       stringify!($field_name) => self.$field_name.set(rem, value),
+                    )*
+                    _ => _config_err!(
+                        "Config value \"{}\" not found on {}", key, stringify!($struct_name)
+                    )
+                }
+            }
+
+            fn visit<V: Visit>(&self, v: &mut V, key_prefix: &str, _description: &'static str) {
+                $(
+                let key = format!(concat!("{}.", stringify!($field_name)), key_prefix);
+                let desc = concat!($($d),*).trim();
+                self.$field_name.visit(v, key.as_str(), desc);
+                )*
+            }
+        }
+
+        impl Default for $struct_name {
+            fn default() -> Self {
+                Self {
+                    $($field_name: $default),*
+                }
+            }
+        }
+
+        impl ConfigField for HashMap<String,$struct_name> {
+            fn set(&mut self, key: &str, value: &str) -> Result<()> {
+                let parts: Vec<&str> = key.splitn(2, "::").collect();
+                match parts.as_slice() {
+                    [inner_key, hashmap_key] => {
+                        // Get or create the ColumnOptions for the specified column
+                        let inner_value = self
+                            .entry((*hashmap_key).to_owned())
+                            .or_insert_with($struct_name::default);
+
+                        inner_value.set(inner_key, value)
+                    }
+                    _ => Err(DataFusionError::Configuration(format!(
+                        "Unrecognized key '{}'.",
+                        key
+                    ))),
+                }
+            }
+
+            fn visit<V: Visit>(&self, v: &mut V, key_prefix: &str, _description: &'static str) {
+                for (column_name, col_options) in self {
+                    $(
+                    let key = format!("{}.{field}::{}", key_prefix, column_name, field = stringify!($field_name));
+                    let desc = concat!($($d),*).trim();
+                    col_options.$field_name.visit(v, key.as_str(), desc);
+                    )*
+                }
+            }
+        }
+    }
+}
+
+config_namespace_with_hashmap! {
+    pub struct ColumnOptions {
+        /// Sets if bloom filter is enabled for the column path.
+        pub bloom_filter_enabled: Option<bool>, default = None
+
+        /// Sets encoding for the column path.
+        /// Valid values are: plain, plain_dictionary, rle,
+        /// bit_packed, delta_binary_packed, delta_length_byte_array,
+        /// delta_byte_array, rle_dictionary, and byte_stream_split.
+        /// These values are not case-sensitive. If NULL, uses
+        /// default parquet options
+        pub encoding: Option<String>, default = None
+
+        /// Sets if dictionary encoding is enabled for the column path. If NULL, uses
+        /// default parquet options
+        pub dictionary_enabled: Option<bool>, default = None
+
+        /// Sets default parquet compression codec for the column path.
+        /// Valid values are: uncompressed, snappy, gzip(level),
+        /// lzo, brotli(level), lz4, zstd(level), and lz4_raw.
+        /// These values are not case-sensitive. If NULL, uses
+        /// default parquet options
+        pub compression: Option<String>, default = None
+
+        /// Sets if statistics are enabled for the column
+        /// Valid values are: "none", "chunk", and "page"
+        /// These values are not case sensitive. If NULL, uses
+        /// default parquet options
+        pub statistics_enabled: Option<String>, default = None
+
+        /// Sets bloom filter false positive probability for the column path. If NULL, uses
+        /// default parquet options
+        pub bloom_filter_fpp: Option<f64>, default = None
+
+        /// Sets bloom filter number of distinct values. If NULL, uses
+        /// default parquet options
+        pub bloom_filter_ndv: Option<u64>, default = None
+
+        /// Sets max statistics size for the column path. If NULL, uses
+        /// default parquet options
+        pub max_statistics_size: Option<usize>, default = None
+    }
+}
+
+config_namespace! {
+    /// Options controlling CSV format
+    pub struct CsvOptions {
+        pub has_header: bool, default = true
+        pub delimiter: u8, default = b','
+        pub quote: u8, default = b'"'
+        pub escape: Option<u8>, default = None
+        pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
+        pub schema_infer_max_rec: usize, default = 100
+        pub date_format: Option<String>,  default = None
+        pub datetime_format: Option<String>,  default = None
+        pub timestamp_format: Option<String>,  default = None
+        pub timestamp_tz_format: Option<String>,  default = None
+        pub time_format: Option<String>,  default = None
+        pub null_value: Option<String>,  default = None
+    }
+}
+
+impl CsvOptions {
+    /// Set a limit in terms of records to scan to infer the schema
+    /// - default to `DEFAULT_SCHEMA_INFER_MAX_RECORD`
+    pub fn with_compression(
+        mut self,
+        compression_type_variant: CompressionTypeVariant,
+    ) -> Self {
+        self.compression = compression_type_variant;
+        self
+    }
+
+    /// Set a limit in terms of records to scan to infer the schema
+    /// - default to `DEFAULT_SCHEMA_INFER_MAX_RECORD`
+    pub fn with_schema_infer_max_rec(mut self, max_rec: usize) -> Self {
+        self.schema_infer_max_rec = max_rec;
+        self
+    }
+
+    /// Set true to indicate that the first line is a header.
+    /// - default to true
+    pub fn with_has_header(mut self, has_header: bool) -> Self {
+        self.has_header = has_header;
+        self
+    }
+
+    /// True if the first line is a header.
+    pub fn has_header(&self) -> bool {
+        self.has_header
+    }
+
+    /// The character separating values within a row.
+    /// - default to ','
+    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
+        self.delimiter = delimiter;
+        self
+    }
+
+    /// The quote character in a row.
+    /// - default to '"'
+    pub fn with_quote(mut self, quote: u8) -> Self {
+        self.quote = quote;
+        self
+    }
+
+    /// The escape character in a row.
+    /// - default is None
+    pub fn with_escape(mut self, escape: Option<u8>) -> Self {
+        self.escape = escape;
+        self
+    }
+
+    /// Set a `CompressionTypeVariant` of CSV
+    /// - defaults to `CompressionTypeVariant::UNCOMPRESSED`
+    pub fn with_file_compression_type(
+        mut self,
+        compression: CompressionTypeVariant,
+    ) -> Self {
+        self.compression = compression;
+        self
+    }
+
+    /// The delimiter character.
+    pub fn delimiter(&self) -> u8 {
+        self.delimiter
+    }
+
+    /// The quote character.
+    pub fn quote(&self) -> u8 {
+        self.quote
+    }
+
+    /// The escape character.
+    pub fn escape(&self) -> Option<u8> {
+        self.escape
+    }
+}
+
+config_namespace! {
+    /// Options controlling JSON format
+    pub struct JsonOptions {
+        pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
+        pub schema_infer_max_rec: usize, default = 100
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum FormatOptions {
+    CSV(CsvOptions),
+    JSON(JsonOptions),
+    #[cfg(feature = "parquet")]
+    PARQUET(TableParquetOptions),
+    AVRO,
+    ARROW,
+}
+impl Display for FormatOptions {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let out = match self {
+            FormatOptions::CSV(_) => "csv",
+            FormatOptions::JSON(_) => "json",
+            #[cfg(feature = "parquet")]
+            FormatOptions::PARQUET(_) => "parquet",
+            FormatOptions::AVRO => "avro",
+            FormatOptions::ARROW => "arrow",
+        };
+        write!(f, "{}", out)
+    }
+}
+
+impl From<FileType> for FormatOptions {
+    fn from(value: FileType) -> Self {
+        match value {
+            FileType::ARROW => FormatOptions::ARROW,
+            FileType::AVRO => FormatOptions::AVRO,
+            #[cfg(feature = "parquet")]
+            FileType::PARQUET => FormatOptions::PARQUET(TableParquetOptions::default()),
+            FileType::CSV => FormatOptions::CSV(CsvOptions::default()),
+            FileType::JSON => FormatOptions::JSON(JsonOptions::default()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::any::Any;
+    use std::collections::HashMap;
+
+    use crate::config::{
+        ConfigEntry, ConfigExtension, ExtensionOptions, Extensions, TableOptions,
+    };
+
+    #[derive(Default, Debug, Clone)]
+    pub struct TestExtensionConfig {
+        /// Should "foo" be replaced by "bar"?
+        pub properties: HashMap<String, String>,
+    }
+
+    impl ExtensionOptions for TestExtensionConfig {
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn as_any_mut(&mut self) -> &mut dyn Any {
+            self
+        }
+
+        fn cloned(&self) -> Box<dyn ExtensionOptions> {
+            Box::new(self.clone())
+        }
+
+        fn set(&mut self, key: &str, value: &str) -> crate::Result<()> {
+            let (key, rem) = key.split_once('.').unwrap_or((key, ""));
+            assert_eq!(key, "test");
+            self.properties.insert(rem.to_owned(), value.to_owned());
+            Ok(())
+        }
+
+        fn entries(&self) -> Vec<ConfigEntry> {
+            self.properties
+                .iter()
+                .map(|(k, v)| ConfigEntry {
+                    key: k.into(),
+                    value: Some(v.into()),
+                    description: "",
+                })
+                .collect()
+        }
+    }
+
+    impl ConfigExtension for TestExtensionConfig {
+        const PREFIX: &'static str = "test";
+    }
+
+    #[test]
+    fn create_table_config() {
+        let mut extension = Extensions::new();
+        extension.insert(TestExtensionConfig::default());
+        let table_config = TableOptions::new().with_extensions(extension);
+        let kafka_config = table_config.extensions.get::<TestExtensionConfig>();
+        assert!(kafka_config.is_some())
+    }
+
+    #[test]
+    fn alter_kafka_config() {
+        let mut extension = Extensions::new();
+        extension.insert(TestExtensionConfig::default());
+        let mut table_config = TableOptions::new().with_extensions(extension);
+        table_config.set("parquet.write_batch_size", "10").unwrap();
+        assert_eq!(table_config.parquet.global.write_batch_size, 10);
+        table_config.set("test.bootstrap.servers", "asd").unwrap();
+        let kafka_config = table_config
+            .extensions
+            .get::<TestExtensionConfig>()
+            .unwrap();
+        assert_eq!(
+            kafka_config.properties.get("bootstrap.servers").unwrap(),
+            "asd"
+        );
+    }
+
+    #[test]
+    fn parquet_table_options() {
+        let mut table_config = TableOptions::new();
+        table_config
+            .set("parquet.bloom_filter_enabled::col1", "true")
+            .unwrap();
+        assert_eq!(
+            table_config.parquet.column_specific_options["col1"].bloom_filter_enabled,
+            Some(true)
+        );
+    }
+
+    #[test]
+    fn csv_u8_table_options() {
+        let mut table_config = TableOptions::new();
+        table_config.set("csv.delimiter", ";").unwrap();
+        assert_eq!(table_config.csv.delimiter as char, ';');
+        table_config.set("csv.escape", "\"").unwrap();
+        assert_eq!(table_config.csv.escape.unwrap() as char, '"');
+        table_config.set("csv.escape", "\'").unwrap();
+        assert_eq!(table_config.csv.escape.unwrap() as char, '\'');
+    }
+
+    #[test]
+    fn parquet_table_options_config_entry() {
+        let mut table_config = TableOptions::new();
+        table_config
+            .set("parquet.bloom_filter_enabled::col1", "true")
+            .unwrap();
+        let entries = table_config.entries();
+        assert!(entries
+            .iter()
+            .any(|item| item.key == "parquet.bloom_filter_enabled::col1"))
+    }
+}
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 0f4e97905938..1ecd5b62bee8 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -535,6 +535,9 @@ make_error!(not_impl_err, not_impl_datafusion_err, NotImplemented);
 // Exposes a macro to create `DataFusionError::Execution` with optional backtrace
 make_error!(exec_err, exec_datafusion_err, Execution);
 
+// Exposes a macro to create `DataFusionError::Configuration` with optional backtrace
+make_error!(config_err, config_datafusion_err, Configuration);
+
 // Exposes a macro to create `DataFusionError::Substrait` with optional backtrace
 make_error!(substrait_err, substrait_datafusion_err, Substrait);
 
@@ -594,6 +597,7 @@ macro_rules! schema_err {
 
 // To avoid compiler error when using macro in the same crate:
 // macros from the current crate cannot be referred to by absolute paths
+pub use config_err as _config_err;
 pub use internal_datafusion_err as _internal_datafusion_err;
 pub use internal_err as _internal_err;
 pub use not_impl_err as _not_impl_err;
diff --git a/datafusion/common/src/file_options/arrow_writer.rs b/datafusion/common/src/file_options/arrow_writer.rs
index cb921535aba5..99513eecf3f1 100644
--- a/datafusion/common/src/file_options/arrow_writer.rs
+++ b/datafusion/common/src/file_options/arrow_writer.rs
@@ -17,13 +17,6 @@
 
 //! Options related to how Arrow files should be written
 
-use crate::{
-    config::ConfigOptions,
-    error::{DataFusionError, Result},
-};
-
-use super::StatementOptions;
-
 #[derive(Clone, Debug)]
 pub struct ArrowWriterOptions {}
 
@@ -38,11 +31,3 @@ impl Default for ArrowWriterOptions {
         Self::new()
     }
 }
-
-impl TryFrom<(&ConfigOptions, &StatementOptions)> for ArrowWriterOptions {
-    type Error = DataFusionError;
-
-    fn try_from(_value: (&ConfigOptions, &StatementOptions)) -> Result<Self> {
-        Ok(ArrowWriterOptions {})
-    }
-}
diff --git a/datafusion/common/src/file_options/avro_writer.rs b/datafusion/common/src/file_options/avro_writer.rs
index 2e3a64705842..51d923e2c315 100644
--- a/datafusion/common/src/file_options/avro_writer.rs
+++ b/datafusion/common/src/file_options/avro_writer.rs
@@ -17,20 +17,5 @@
 
 //! Options related to how avro files should be written
 
-use crate::{
-    config::ConfigOptions,
-    error::{DataFusionError, Result},
-};
-
-use super::StatementOptions;
-
 #[derive(Clone, Debug)]
 pub struct AvroWriterOptions {}
-
-impl TryFrom<(&ConfigOptions, &StatementOptions)> for AvroWriterOptions {
-    type Error = DataFusionError;
-
-    fn try_from(_value: (&ConfigOptions, &StatementOptions)) -> Result<Self> {
-        Ok(AvroWriterOptions {})
-    }
-}
diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs
index d6046f0219dd..5f1a62682f8d 100644
--- a/datafusion/common/src/file_options/csv_writer.rs
+++ b/datafusion/common/src/file_options/csv_writer.rs
@@ -17,18 +17,12 @@
 
 //! Options related to how csv files should be written
 
-use std::str::FromStr;
+use crate::config::CsvOptions;
+use crate::error::{DataFusionError, Result};
+use crate::parsers::CompressionTypeVariant;
 
 use arrow::csv::WriterBuilder;
 
-use crate::{
-    config::ConfigOptions,
-    error::{DataFusionError, Result},
-    parsers::CompressionTypeVariant,
-};
-
-use super::StatementOptions;
-
 /// Options for writing CSV files
 #[derive(Clone, Debug)]
 pub struct CsvWriterOptions {
@@ -51,58 +45,32 @@ impl CsvWriterOptions {
     }
 }
 
-impl TryFrom<(&ConfigOptions, &StatementOptions)> for CsvWriterOptions {
+impl TryFrom<&CsvOptions> for CsvWriterOptions {
     type Error = DataFusionError;
 
-    fn try_from(value: (&ConfigOptions, &StatementOptions)) -> Result<Self> {
-        let _configs = value.0;
-        let statement_options = value.1;
-        let mut builder = WriterBuilder::default();
-        let mut compression = CompressionTypeVariant::UNCOMPRESSED;
-        for (option, value) in &statement_options.options {
-            builder = match option.to_lowercase().as_str(){
-                "header" => {
-                    let has_header = value.parse()
-                        .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as bool as required for {option}!")))?;
-                    builder.with_header(has_header)
-                },
-                "date_format" => builder.with_date_format(value.to_owned()),
-                "datetime_format" => builder.with_datetime_format(value.to_owned()),
-                "timestamp_format" => builder.with_timestamp_format(value.to_owned()),
-                "time_format" => builder.with_time_format(value.to_owned()),
-                "rfc3339" => builder, // No-op
-                "null_value" => builder.with_null(value.to_owned()),
-                "compression" => {
-                    compression = CompressionTypeVariant::from_str(value.replace('\'', "").as_str())?;
-                    builder
-                },
-                "delimiter" => {
-                    // Ignore string literal single quotes passed from sql parsing
-                    let value = value.replace('\'', "");
-                    let chars: Vec<char> = value.chars().collect();
-                    if chars.len()>1{
-                        return Err(DataFusionError::Configuration(format!(
-                            "CSV Delimiter Option must be a single char, got: {}", value
-                        )))
-                    }
-                    builder.with_delimiter(chars[0].try_into().map_err(|_| {
-                        DataFusionError::Internal(
-                            "Unable to convert CSV delimiter into u8".into(),
-                        )
-                    })?)
-            },
-                "quote" | "escape" => {
-                    // https://github.com/apache/arrow-rs/issues/5146
-                    // These two attributes are only available when reading csv files.
-                    // To avoid error
-                    builder
-                },
-                _ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for CSV format!")))
-            }
+    fn try_from(value: &CsvOptions) -> Result<Self> {
+        let mut builder = WriterBuilder::default()
+            .with_header(value.has_header)
+            .with_delimiter(value.delimiter);
+
+        if let Some(v) = &value.date_format {
+            builder = builder.with_date_format(v.into())
+        }
+        if let Some(v) = &value.datetime_format {
+            builder = builder.with_datetime_format(v.into())
+        }
+        if let Some(v) = &value.timestamp_format {
+            builder = builder.with_timestamp_format(v.into())
+        }
+        if let Some(v) = &value.time_format {
+            builder = builder.with_time_format(v.into())
+        }
+        if let Some(v) = &value.null_value {
+            builder = builder.with_null(v.into())
         }
         Ok(CsvWriterOptions {
             writer_options: builder,
-            compression,
+            compression: value.compression,
         })
     }
 }
diff --git a/datafusion/common/src/file_options/file_type.rs b/datafusion/common/src/file_options/file_type.rs
index 97362bdad3cc..812cb02a5f77 100644
--- a/datafusion/common/src/file_options/file_type.rs
+++ b/datafusion/common/src/file_options/file_type.rs
@@ -17,12 +17,11 @@
 
 //! File type abstraction
 
-use crate::error::{DataFusionError, Result};
-
-use core::fmt;
-use std::fmt::Display;
+use std::fmt::{self, Display};
 use std::str::FromStr;
 
+use crate::error::{DataFusionError, Result};
+
 /// The default file extension of arrow files
 pub const DEFAULT_ARROW_EXTENSION: &str = ".arrow";
 /// The default file extension of avro files
@@ -105,10 +104,11 @@ impl FromStr for FileType {
 #[cfg(test)]
 #[cfg(feature = "parquet")]
 mod tests {
-    use crate::error::DataFusionError;
-    use crate::file_options::FileType;
     use std::str::FromStr;
 
+    use crate::error::DataFusionError;
+    use crate::FileType;
+
     #[test]
     fn from_str() {
         for (ext, file_type) in [
diff --git a/datafusion/common/src/file_options/json_writer.rs b/datafusion/common/src/file_options/json_writer.rs
index 7f988016c69d..750d2972329b 100644
--- a/datafusion/common/src/file_options/json_writer.rs
+++ b/datafusion/common/src/file_options/json_writer.rs
@@ -17,16 +17,12 @@
 
 //! Options related to how json files should be written
 
-use std::str::FromStr;
-
 use crate::{
-    config::ConfigOptions,
+    config::JsonOptions,
     error::{DataFusionError, Result},
     parsers::CompressionTypeVariant,
 };
 
-use super::StatementOptions;
-
 /// Options for writing JSON files
 #[derive(Clone, Debug)]
 pub struct JsonWriterOptions {
@@ -39,21 +35,12 @@ impl JsonWriterOptions {
     }
 }
 
-impl TryFrom<(&ConfigOptions, &StatementOptions)> for JsonWriterOptions {
+impl TryFrom<&JsonOptions> for JsonWriterOptions {
     type Error = DataFusionError;
 
-    fn try_from(value: (&ConfigOptions, &StatementOptions)) -> Result<Self> {
-        let _configs = value.0;
-        let statement_options = value.1;
-        let mut compression = CompressionTypeVariant::UNCOMPRESSED;
-        for (option, value) in &statement_options.options {
-            match option.to_lowercase().as_str(){
-                "compression" => {
-                    compression = CompressionTypeVariant::from_str(value.replace('\'', "").as_str())?;
-                },
-                _ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for JSON format!")))
-            }
-        }
-        Ok(JsonWriterOptions { compression })
+    fn try_from(value: &JsonOptions) -> Result<Self> {
+        Ok(JsonWriterOptions {
+            compression: value.compression,
+        })
     }
 }
diff --git a/datafusion/common/src/file_options/mod.rs b/datafusion/common/src/file_options/mod.rs
index 3a48f188fb97..a72b812adc8d 100644
--- a/datafusion/common/src/file_options/mod.rs
+++ b/datafusion/common/src/file_options/mod.rs
@@ -24,346 +24,61 @@ pub mod file_type;
 pub mod json_writer;
 #[cfg(feature = "parquet")]
 pub mod parquet_writer;
-pub(crate) mod parse_utils;
-
-use std::{
-    collections::HashMap,
-    fmt::{self, Display},
-    path::Path,
-    str::FromStr,
-};
-
-use crate::{
-    config::ConfigOptions, file_options::parse_utils::parse_boolean_string,
-    DataFusionError, FileType, Result,
-};
-
-#[cfg(feature = "parquet")]
-use self::parquet_writer::ParquetWriterOptions;
-
-use self::{
-    arrow_writer::ArrowWriterOptions, avro_writer::AvroWriterOptions,
-    csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions,
-};
-
-/// Represents a single arbitrary setting in a
-/// [StatementOptions] where OptionTuple.0 determines
-/// the specific setting to be modified and OptionTuple.1
-/// determines the value which should be applied
-pub type OptionTuple = (String, String);
-
-/// Represents arbitrary tuples of options passed as String
-/// tuples from SQL statements. As in the following statement:
-/// COPY ... TO ... (setting1 value1, setting2 value2, ...)
-#[derive(Clone, PartialEq, Eq, Hash, Debug)]
-pub struct StatementOptions {
-    options: Vec<OptionTuple>,
-}
-
-/// Useful for conversion from external tables which use Hashmap<String, String>
-impl From<&HashMap<String, String>> for StatementOptions {
-    fn from(value: &HashMap<String, String>) -> Self {
-        Self {
-            options: value
-                .iter()
-                .map(|(k, v)| (k.to_owned(), v.to_owned()))
-                .collect::<Vec<OptionTuple>>(),
-        }
-    }
-}
-
-impl StatementOptions {
-    pub fn new(options: Vec<OptionTuple>) -> Self {
-        Self { options }
-    }
-
-    pub fn into_inner(self) -> Vec<OptionTuple> {
-        self.options
-    }
-
-    /// Scans for option and if it exists removes it and attempts to parse as a boolean
-    /// Returns none if it does not exist.
-    pub fn take_bool_option(&mut self, find: &str) -> Result<Option<bool>> {
-        let maybe_option = self.scan_and_remove_option(find);
-        maybe_option
-            .map(|(_, v)| parse_boolean_string(find, v))
-            .transpose()
-    }
-
-    /// Scans for option and if it exists removes it and returns it
-    /// Returns none if it does not exist
-    pub fn take_str_option(&mut self, find: &str) -> Option<String> {
-        let maybe_option = self.scan_and_remove_option(find);
-        maybe_option.map(|(_, v)| v)
-    }
-
-    /// Finds partition_by option if exists and parses into a `Vec<String>`.
-    /// If option doesn't exist, returns empty `vec![]`.
-    /// E.g. (partition_by 'colA, colB, colC') -> `vec!['colA','colB','colC']`
-    pub fn take_partition_by(&mut self) -> Vec<String> {
-        let partition_by = self.take_str_option("partition_by");
-        match partition_by {
-            Some(part_cols) => {
-                let dequoted = part_cols
-                    .chars()
-                    .enumerate()
-                    .filter(|(idx, c)| {
-                        !((*idx == 0 || *idx == part_cols.len() - 1)
-                            && (*c == '\'' || *c == '"'))
-                    })
-                    .map(|(_idx, c)| c)
-                    .collect::<String>();
-                dequoted
-                    .split(',')
-                    .map(|s| s.trim().replace("''", "'"))
-                    .collect::<Vec<_>>()
-            }
-            None => vec![],
-        }
-    }
-
-    /// Infers the file_type given a target and arbitrary options.
-    /// If the options contain an explicit "format" option, that will be used.
-    /// Otherwise, attempt to infer file_type from the extension of target.
-    /// Finally, return an error if unable to determine the file_type
-    /// If found, format is removed from the options list.
-    pub fn try_infer_file_type(&mut self, target: &str) -> Result<FileType> {
-        let explicit_format = self.scan_and_remove_option("format");
-        let format = match explicit_format {
-            Some(s) => FileType::from_str(s.1.as_str()),
-            None => {
-                // try to infer file format from file extension
-                let extension: &str = &Path::new(target)
-                    .extension()
-                    .ok_or(DataFusionError::Configuration(
-                        "Format not explicitly set and unable to get file extension!"
-                            .to_string(),
-                    ))?
-                    .to_str()
-                    .ok_or(DataFusionError::Configuration(
-                        "Format not explicitly set and failed to parse file extension!"
-                            .to_string(),
-                    ))?
-                    .to_lowercase();
-
-                FileType::from_str(extension)
-            }
-        }?;
-
-        Ok(format)
-    }
-
-    /// Finds an option in StatementOptions if exists, removes and returns it
-    /// along with the vec of remaining options.
-    fn scan_and_remove_option(&mut self, find: &str) -> Option<OptionTuple> {
-        let idx = self
-            .options
-            .iter()
-            .position(|(k, _)| k.to_lowercase() == find.to_lowercase());
-        match idx {
-            Some(i) => Some(self.options.swap_remove(i)),
-            None => None,
-        }
-    }
-}
-
-/// This type contains all options needed to initialize a particular
-/// RecordBatchWriter type. Each element in the enum contains a thin wrapper
-/// around a "writer builder" type (e.g. arrow::csv::WriterBuilder)
-/// plus any DataFusion specific writing options (e.g. CSV compression)
-#[derive(Clone, Debug)]
-pub enum FileTypeWriterOptions {
-    #[cfg(feature = "parquet")]
-    Parquet(ParquetWriterOptions),
-    CSV(CsvWriterOptions),
-    JSON(JsonWriterOptions),
-    Avro(AvroWriterOptions),
-    Arrow(ArrowWriterOptions),
-}
-
-impl FileTypeWriterOptions {
-    /// Constructs a FileTypeWriterOptions given a FileType to be written
-    /// and arbitrary String tuple options. May return an error if any
-    /// string setting is unrecognized or unsupported.
-    pub fn build(
-        file_type: &FileType,
-        config_defaults: &ConfigOptions,
-        statement_options: &StatementOptions,
-    ) -> Result<Self> {
-        let options = (config_defaults, statement_options);
-
-        let file_type_write_options = match file_type {
-            #[cfg(feature = "parquet")]
-            FileType::PARQUET => {
-                FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?)
-            }
-            FileType::CSV => {
-                FileTypeWriterOptions::CSV(CsvWriterOptions::try_from(options)?)
-            }
-            FileType::JSON => {
-                FileTypeWriterOptions::JSON(JsonWriterOptions::try_from(options)?)
-            }
-            FileType::AVRO => {
-                FileTypeWriterOptions::Avro(AvroWriterOptions::try_from(options)?)
-            }
-            FileType::ARROW => {
-                FileTypeWriterOptions::Arrow(ArrowWriterOptions::try_from(options)?)
-            }
-        };
-
-        Ok(file_type_write_options)
-    }
-
-    /// Constructs a FileTypeWriterOptions from session defaults only.
-    pub fn build_default(
-        file_type: &FileType,
-        config_defaults: &ConfigOptions,
-    ) -> Result<Self> {
-        let empty_statement = StatementOptions::new(vec![]);
-        let options = (config_defaults, &empty_statement);
-
-        let file_type_write_options = match file_type {
-            #[cfg(feature = "parquet")]
-            FileType::PARQUET => {
-                FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?)
-            }
-            FileType::CSV => {
-                FileTypeWriterOptions::CSV(CsvWriterOptions::try_from(options)?)
-            }
-            FileType::JSON => {
-                FileTypeWriterOptions::JSON(JsonWriterOptions::try_from(options)?)
-            }
-            FileType::AVRO => {
-                FileTypeWriterOptions::Avro(AvroWriterOptions::try_from(options)?)
-            }
-            FileType::ARROW => {
-                FileTypeWriterOptions::Arrow(ArrowWriterOptions::try_from(options)?)
-            }
-        };
-
-        Ok(file_type_write_options)
-    }
-
-    /// Tries to extract ParquetWriterOptions from this FileTypeWriterOptions enum.
-    /// Returns an error if a different type from parquet is set.
-    #[cfg(feature = "parquet")]
-    pub fn try_into_parquet(&self) -> Result<&ParquetWriterOptions> {
-        match self {
-            FileTypeWriterOptions::Parquet(opt) => Ok(opt),
-            _ => Err(DataFusionError::Internal(format!(
-                "Expected parquet options but found options for: {}",
-                self
-            ))),
-        }
-    }
-
-    /// Tries to extract CsvWriterOptions from this FileTypeWriterOptions enum.
-    /// Returns an error if a different type from csv is set.
-    pub fn try_into_csv(&self) -> Result<&CsvWriterOptions> {
-        match self {
-            FileTypeWriterOptions::CSV(opt) => Ok(opt),
-            _ => Err(DataFusionError::Internal(format!(
-                "Expected csv options but found options for {}",
-                self
-            ))),
-        }
-    }
-
-    /// Tries to extract JsonWriterOptions from this FileTypeWriterOptions enum.
-    /// Returns an error if a different type from json is set.
-    pub fn try_into_json(&self) -> Result<&JsonWriterOptions> {
-        match self {
-            FileTypeWriterOptions::JSON(opt) => Ok(opt),
-            _ => Err(DataFusionError::Internal(format!(
-                "Expected json options but found options for {}",
-                self,
-            ))),
-        }
-    }
-
-    /// Tries to extract AvroWriterOptions from this FileTypeWriterOptions enum.
-    /// Returns an error if a different type from avro is set.
-    pub fn try_into_avro(&self) -> Result<&AvroWriterOptions> {
-        match self {
-            FileTypeWriterOptions::Avro(opt) => Ok(opt),
-            _ => Err(DataFusionError::Internal(format!(
-                "Expected avro options but found options for {}!",
-                self
-            ))),
-        }
-    }
-
-    /// Tries to extract ArrowWriterOptions from this FileTypeWriterOptions enum.
-    /// Returns an error if a different type from arrow is set.
-    pub fn try_into_arrow(&self) -> Result<&ArrowWriterOptions> {
-        match self {
-            FileTypeWriterOptions::Arrow(opt) => Ok(opt),
-            _ => Err(DataFusionError::Internal(format!(
-                "Expected arrow options but found options for {}",
-                self
-            ))),
-        }
-    }
-}
-
-impl Display for FileTypeWriterOptions {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let name = match self {
-            FileTypeWriterOptions::Arrow(_) => "ArrowWriterOptions",
-            FileTypeWriterOptions::Avro(_) => "AvroWriterOptions",
-            FileTypeWriterOptions::CSV(_) => "CsvWriterOptions",
-            FileTypeWriterOptions::JSON(_) => "JsonWriterOptions",
-            #[cfg(feature = "parquet")]
-            FileTypeWriterOptions::Parquet(_) => "ParquetWriterOptions",
-        };
-        write!(f, "{}", name)
-    }
-}
 
 #[cfg(test)]
 #[cfg(feature = "parquet")]
 mod tests {
     use std::collections::HashMap;
 
-    use parquet::{
-        basic::{Compression, Encoding, ZstdLevel},
-        file::properties::{EnabledStatistics, WriterVersion},
-        schema::types::ColumnPath,
-    };
-
+    use super::parquet_writer::ParquetWriterOptions;
     use crate::{
-        config::ConfigOptions,
+        config::TableOptions,
         file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
         parsers::CompressionTypeVariant,
+        Result,
     };
 
-    use crate::Result;
-
-    use super::{parquet_writer::ParquetWriterOptions, StatementOptions};
+    use parquet::{
+        basic::{Compression, Encoding, ZstdLevel},
+        file::properties::{EnabledStatistics, WriterVersion},
+        schema::types::ColumnPath,
+    };
 
     #[test]
     fn test_writeroptions_parquet_from_statement_options() -> Result<()> {
         let mut option_map: HashMap<String, String> = HashMap::new();
-        option_map.insert("max_row_group_size".to_owned(), "123".to_owned());
-        option_map.insert("data_pagesize_limit".to_owned(), "123".to_owned());
-        option_map.insert("write_batch_size".to_owned(), "123".to_owned());
-        option_map.insert("writer_version".to_owned(), "2.0".to_owned());
-        option_map.insert("dictionary_page_size_limit".to_owned(), "123".to_owned());
-        option_map.insert("created_by".to_owned(), "df write unit test".to_owned());
-        option_map.insert("column_index_truncate_length".to_owned(), "123".to_owned());
-        option_map.insert("data_page_row_count_limit".to_owned(), "123".to_owned());
-        option_map.insert("bloom_filter_enabled".to_owned(), "true".to_owned());
-        option_map.insert("encoding".to_owned(), "plain".to_owned());
-        option_map.insert("dictionary_enabled".to_owned(), "true".to_owned());
-        option_map.insert("compression".to_owned(), "zstd(4)".to_owned());
-        option_map.insert("statistics_enabled".to_owned(), "page".to_owned());
-        option_map.insert("bloom_filter_fpp".to_owned(), "0.123".to_owned());
-        option_map.insert("bloom_filter_ndv".to_owned(), "123".to_owned());
-
-        let options = StatementOptions::from(&option_map);
-        let config = ConfigOptions::new();
-
-        let parquet_options = ParquetWriterOptions::try_from((&config, &options))?;
+        option_map.insert("parquet.max_row_group_size".to_owned(), "123".to_owned());
+        option_map.insert("parquet.data_pagesize_limit".to_owned(), "123".to_owned());
+        option_map.insert("parquet.write_batch_size".to_owned(), "123".to_owned());
+        option_map.insert("parquet.writer_version".to_owned(), "2.0".to_owned());
+        option_map.insert(
+            "parquet.dictionary_page_size_limit".to_owned(),
+            "123".to_owned(),
+        );
+        option_map.insert(
+            "parquet.created_by".to_owned(),
+            "df write unit test".to_owned(),
+        );
+        option_map.insert(
+            "parquet.column_index_truncate_length".to_owned(),
+            "123".to_owned(),
+        );
+        option_map.insert(
+            "parquet.data_page_row_count_limit".to_owned(),
+            "123".to_owned(),
+        );
+        option_map.insert("parquet.bloom_filter_enabled".to_owned(), "true".to_owned());
+        option_map.insert("parquet.encoding".to_owned(), "plain".to_owned());
+        option_map.insert("parquet.dictionary_enabled".to_owned(), "true".to_owned());
+        option_map.insert("parquet.compression".to_owned(), "zstd(4)".to_owned());
+        option_map.insert("parquet.statistics_enabled".to_owned(), "page".to_owned());
+        option_map.insert("parquet.bloom_filter_fpp".to_owned(), "0.123".to_owned());
+        option_map.insert("parquet.bloom_filter_ndv".to_owned(), "123".to_owned());
+
+        let mut table_config = TableOptions::new();
+        table_config.alter_with_string_hash_map(&option_map)?;
+
+        let parquet_options = ParquetWriterOptions::try_from(&table_config.parquet)?;
         let properties = parquet_options.writer_options();
 
         // Verify the expected options propagated down to parquet crate WriterProperties struct
@@ -415,37 +130,58 @@ mod tests {
     fn test_writeroptions_parquet_column_specific() -> Result<()> {
         let mut option_map: HashMap<String, String> = HashMap::new();
 
-        option_map.insert("bloom_filter_enabled::col1".to_owned(), "true".to_owned());
         option_map.insert(
-            "bloom_filter_enabled::col2.nested".to_owned(),
+            "parquet.bloom_filter_enabled::col1".to_owned(),
+            "true".to_owned(),
+        );
+        option_map.insert(
+            "parquet.bloom_filter_enabled::col2.nested".to_owned(),
+            "true".to_owned(),
+        );
+        option_map.insert("parquet.encoding::col1".to_owned(), "plain".to_owned());
+        option_map.insert("parquet.encoding::col2.nested".to_owned(), "rle".to_owned());
+        option_map.insert(
+            "parquet.dictionary_enabled::col1".to_owned(),
             "true".to_owned(),
         );
-        option_map.insert("encoding::col1".to_owned(), "plain".to_owned());
-        option_map.insert("encoding::col2.nested".to_owned(), "rle".to_owned());
-        option_map.insert("dictionary_enabled::col1".to_owned(), "true".to_owned());
         option_map.insert(
-            "dictionary_enabled::col2.nested".to_owned(),
+            "parquet.dictionary_enabled::col2.nested".to_owned(),
             "true".to_owned(),
         );
-        option_map.insert("compression::col1".to_owned(), "zstd(4)".to_owned());
-        option_map.insert("compression::col2.nested".to_owned(), "zstd(10)".to_owned());
-        option_map.insert("statistics_enabled::col1".to_owned(), "page".to_owned());
+        option_map.insert("parquet.compression::col1".to_owned(), "zstd(4)".to_owned());
+        option_map.insert(
+            "parquet.compression::col2.nested".to_owned(),
+            "zstd(10)".to_owned(),
+        );
+        option_map.insert(
+            "parquet.statistics_enabled::col1".to_owned(),
+            "page".to_owned(),
+        );
         option_map.insert(
-            "statistics_enabled::col2.nested".to_owned(),
+            "parquet.statistics_enabled::col2.nested".to_owned(),
             "none".to_owned(),
         );
-        option_map.insert("bloom_filter_fpp::col1".to_owned(), "0.123".to_owned());
         option_map.insert(
-            "bloom_filter_fpp::col2.nested".to_owned(),
+            "parquet.bloom_filter_fpp::col1".to_owned(),
+            "0.123".to_owned(),
+        );
+        option_map.insert(
+            "parquet.bloom_filter_fpp::col2.nested".to_owned(),
             "0.456".to_owned(),
         );
-        option_map.insert("bloom_filter_ndv::col1".to_owned(), "123".to_owned());
-        option_map.insert("bloom_filter_ndv::col2.nested".to_owned(), "456".to_owned());
+        option_map.insert(
+            "parquet.bloom_filter_ndv::col1".to_owned(),
+            "123".to_owned(),
+        );
+        option_map.insert(
+            "parquet.bloom_filter_ndv::col2.nested".to_owned(),
+            "456".to_owned(),
+        );
 
-        let options = StatementOptions::from(&option_map);
-        let config = ConfigOptions::new();
+        let mut table_config = TableOptions::new();
+        table_config.alter_with_string_hash_map(&option_map)?;
 
-        let parquet_options = ParquetWriterOptions::try_from((&config, &options))?;
+        let parquet_options = ParquetWriterOptions::try_from(&table_config.parquet)?;
         let properties = parquet_options.writer_options();
 
         let col1 = ColumnPath::from(vec!["col1".to_owned()]);
@@ -535,20 +271,20 @@ mod tests {
     // for StatementOptions
     fn test_writeroptions_csv_from_statement_options() -> Result<()> {
         let mut option_map: HashMap<String, String> = HashMap::new();
-        option_map.insert("header".to_owned(), "true".to_owned());
-        option_map.insert("date_format".to_owned(), "123".to_owned());
-        option_map.insert("datetime_format".to_owned(), "123".to_owned());
-        option_map.insert("timestamp_format".to_owned(), "2.0".to_owned());
-        option_map.insert("time_format".to_owned(), "123".to_owned());
-        option_map.insert("rfc3339".to_owned(), "true".to_owned());
-        option_map.insert("null_value".to_owned(), "123".to_owned());
-        option_map.insert("compression".to_owned(), "gzip".to_owned());
-        option_map.insert("delimiter".to_owned(), ";".to_owned());
-
-        let options = StatementOptions::from(&option_map);
-        let config = ConfigOptions::new();
-
-        let csv_options = CsvWriterOptions::try_from((&config, &options))?;
+        option_map.insert("csv.has_header".to_owned(), "true".to_owned());
+        option_map.insert("csv.date_format".to_owned(), "123".to_owned());
+        option_map.insert("csv.datetime_format".to_owned(), "123".to_owned());
+        option_map.insert("csv.timestamp_format".to_owned(), "2.0".to_owned());
+        option_map.insert("csv.time_format".to_owned(), "123".to_owned());
+        option_map.insert("csv.null_value".to_owned(), "123".to_owned());
+        option_map.insert("csv.compression".to_owned(), "gzip".to_owned());
+        option_map.insert("csv.delimiter".to_owned(), ";".to_owned());
+
+        let mut table_config = TableOptions::new();
+        table_config.alter_with_string_hash_map(&option_map)?;
+
+        let csv_options = CsvWriterOptions::try_from(&table_config.csv)?;
+
         let builder = csv_options.writer_options;
         assert!(builder.header());
         let buff = Vec::new();
@@ -563,12 +299,12 @@ mod tests {
     // for StatementOptions
     fn test_writeroptions_json_from_statement_options() -> Result<()> {
         let mut option_map: HashMap<String, String> = HashMap::new();
-        option_map.insert("compression".to_owned(), "gzip".to_owned());
+        option_map.insert("json.compression".to_owned(), "gzip".to_owned());
 
-        let options = StatementOptions::from(&option_map);
-        let config = ConfigOptions::new();
+        let mut table_config = TableOptions::new();
+        table_config.alter_with_string_hash_map(&option_map)?;
 
-        let json_options = JsonWriterOptions::try_from((&config, &options))?;
+        let json_options = JsonWriterOptions::try_from(&table_config.json)?;
         assert_eq!(json_options.compression, CompressionTypeVariant::GZIP);
 
         Ok(())
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 80fa023587ee..e8a350e8d389 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -17,15 +17,11 @@
 
 //! Options related to how parquet files should be written
 
-use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
-
-use crate::{config::ConfigOptions, DataFusionError, Result};
-
-use super::StatementOptions;
+use crate::{config::TableParquetOptions, DataFusionError, Result};
 
 use parquet::{
     basic::{BrotliLevel, GzipLevel, ZstdLevel},
-    file::properties::{EnabledStatistics, WriterVersion},
+    file::properties::{EnabledStatistics, WriterProperties, WriterVersion},
     schema::types::ColumnPath,
 };
 
@@ -47,165 +43,102 @@ impl ParquetWriterOptions {
     }
 }
 
-/// Constructs a default Parquet WriterPropertiesBuilder using
-/// Session level ConfigOptions to initialize settings
-pub fn default_builder(options: &ConfigOptions) -> Result<WriterPropertiesBuilder> {
-    let parquet_session_options = &options.execution.parquet;
-    let mut builder = WriterProperties::builder()
-        .set_data_page_size_limit(parquet_session_options.data_pagesize_limit)
-        .set_write_batch_size(parquet_session_options.write_batch_size)
-        .set_writer_version(parse_version_string(
-            &parquet_session_options.writer_version,
-        )?)
-        .set_dictionary_page_size_limit(
-            parquet_session_options.dictionary_page_size_limit,
-        )
-        .set_max_row_group_size(parquet_session_options.max_row_group_size)
-        .set_created_by(parquet_session_options.created_by.clone())
-        .set_column_index_truncate_length(
-            parquet_session_options.column_index_truncate_length,
-        )
-        .set_data_page_row_count_limit(parquet_session_options.data_page_row_count_limit)
-        .set_bloom_filter_enabled(parquet_session_options.bloom_filter_enabled);
-
-    builder = match &parquet_session_options.encoding {
-        Some(encoding) => builder.set_encoding(parse_encoding_string(encoding)?),
-        None => builder,
-    };
-
-    builder = match &parquet_session_options.dictionary_enabled {
-        Some(enabled) => builder.set_dictionary_enabled(*enabled),
-        None => builder,
-    };
-
-    builder = match &parquet_session_options.compression {
-        Some(compression) => {
-            builder.set_compression(parse_compression_string(compression)?)
+impl TryFrom<&TableParquetOptions> for ParquetWriterOptions {
+    type Error = DataFusionError;
+
+    fn try_from(parquet_options: &TableParquetOptions) -> Result<Self> {
+        let parquet_session_options = &parquet_options.global;
+        let mut builder = WriterProperties::builder()
+            .set_data_page_size_limit(parquet_session_options.data_pagesize_limit)
+            .set_write_batch_size(parquet_session_options.write_batch_size)
+            .set_writer_version(parse_version_string(
+                &parquet_session_options.writer_version,
+            )?)
+            .set_dictionary_page_size_limit(
+                parquet_session_options.dictionary_page_size_limit,
+            )
+            .set_max_row_group_size(parquet_session_options.max_row_group_size)
+            .set_created_by(parquet_session_options.created_by.clone())
+            .set_column_index_truncate_length(
+                parquet_session_options.column_index_truncate_length,
+            )
+            .set_data_page_row_count_limit(
+                parquet_session_options.data_page_row_count_limit,
+            )
+            .set_bloom_filter_enabled(parquet_session_options.bloom_filter_enabled);
+
+        if let Some(encoding) = &parquet_session_options.encoding {
+            builder = builder.set_encoding(parse_encoding_string(encoding)?);
+        }
+
+        if let Some(enabled) = parquet_session_options.dictionary_enabled {
+            builder = builder.set_dictionary_enabled(enabled);
+        }
+
+        if let Some(compression) = &parquet_session_options.compression {
+            builder = builder.set_compression(parse_compression_string(compression)?);
+        }
+
+        if let Some(statistics) = &parquet_session_options.statistics_enabled {
+            builder =
+                builder.set_statistics_enabled(parse_statistics_string(statistics)?);
         }
-        None => builder,
-    };
 
-    builder = match &parquet_session_options.statistics_enabled {
-        Some(statistics) => {
-            builder.set_statistics_enabled(parse_statistics_string(statistics)?)
+        if let Some(size) = parquet_session_options.max_statistics_size {
+            builder = builder.set_max_statistics_size(size);
         }
-        None => builder,
-    };
 
-    builder = match &parquet_session_options.max_statistics_size {
-        Some(size) => builder.set_max_statistics_size(*size),
-        None => builder,
-    };
+        if let Some(fpp) = parquet_session_options.bloom_filter_fpp {
+            builder = builder.set_bloom_filter_fpp(fpp);
+        }
 
-    builder = match &parquet_session_options.bloom_filter_fpp {
-        Some(fpp) => builder.set_bloom_filter_fpp(*fpp),
-        None => builder,
-    };
+        if let Some(ndv) = parquet_session_options.bloom_filter_ndv {
+            builder = builder.set_bloom_filter_ndv(ndv);
+        }
 
-    builder = match &parquet_session_options.bloom_filter_ndv {
-        Some(ndv) => builder.set_bloom_filter_ndv(*ndv),
-        None => builder,
-    };
+        for (column, options) in &parquet_options.column_specific_options {
+            let path = ColumnPath::new(column.split('.').map(|s| s.to_owned()).collect());
 
-    Ok(builder)
-}
+            if let Some(bloom_filter_enabled) = options.bloom_filter_enabled {
+                builder = builder
+                    .set_column_bloom_filter_enabled(path.clone(), bloom_filter_enabled);
+            }
 
-impl TryFrom<(&ConfigOptions, &StatementOptions)> for ParquetWriterOptions {
-    type Error = DataFusionError;
+            if let Some(encoding) = &options.encoding {
+                let parsed_encoding = parse_encoding_string(encoding)?;
+                builder = builder.set_column_encoding(path.clone(), parsed_encoding);
+            }
+
+            if let Some(dictionary_enabled) = options.dictionary_enabled {
+                builder = builder
+                    .set_column_dictionary_enabled(path.clone(), dictionary_enabled);
+            }
+
+            if let Some(compression) = &options.compression {
+                let parsed_compression = parse_compression_string(compression)?;
+                builder =
+                    builder.set_column_compression(path.clone(), parsed_compression);
+            }
+
+            if let Some(statistics_enabled) = &options.statistics_enabled {
+                let parsed_value = parse_statistics_string(statistics_enabled)?;
+                builder =
+                    builder.set_column_statistics_enabled(path.clone(), parsed_value);
+            }
+
+            if let Some(bloom_filter_fpp) = options.bloom_filter_fpp {
+                builder =
+                    builder.set_column_bloom_filter_fpp(path.clone(), bloom_filter_fpp);
+            }
+
+            if let Some(bloom_filter_ndv) = options.bloom_filter_ndv {
+                builder =
+                    builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv);
+            }
 
-    fn try_from(
-        configs_and_statement_options: (&ConfigOptions, &StatementOptions),
-    ) -> Result<Self> {
-        let configs = configs_and_statement_options.0;
-        let statement_options = configs_and_statement_options.1;
-        let mut builder = default_builder(configs)?;
-        for (option, value) in &statement_options.options {
-            let (option, col_path) = split_option_and_column_path(option);
-            builder = match option.to_lowercase().as_str(){
-                "max_row_group_size" => builder
-                    .set_max_row_group_size(value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as u64 as required for {option}!")))?),
-                "data_pagesize_limit" => builder
-                    .set_data_page_size_limit(value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as usize as required for {option}!")))?),
-                "write_batch_size" => builder
-                    .set_write_batch_size(value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as usize as required for {option}!")))?),
-                "writer_version" => builder
-                    .set_writer_version(parse_version_string(value)?),
-                "dictionary_page_size_limit" => builder
-                    .set_dictionary_page_size_limit(value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as usize as required for {option}!")))?),
-                "created_by" => builder
-                    .set_created_by(value.to_owned()),
-                "column_index_truncate_length" => builder
-                    .set_column_index_truncate_length(Some(value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as usize as required for {option}!")))?)),
-                "data_page_row_count_limit" => builder
-                    .set_data_page_row_count_limit(value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as usize as required for {option}!")))?),
-                "bloom_filter_enabled" => {
-                    let parsed_value = value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as bool as required for {option}!")))?;
-                    match col_path{
-                        Some(path) => builder.set_column_bloom_filter_enabled(path, parsed_value),
-                        None => builder.set_bloom_filter_enabled(parsed_value)
-                    }
-                },
-                "encoding" => {
-                    let parsed_encoding = parse_encoding_string(value)?;
-                    match col_path{
-                        Some(path) => builder.set_column_encoding(path, parsed_encoding),
-                        None => builder.set_encoding(parsed_encoding)
-                    }
-                },
-                "dictionary_enabled" => {
-                    let parsed_value = value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as bool as required for {option}!")))?;
-                    match col_path{
-                        Some(path) => builder.set_column_dictionary_enabled(path, parsed_value),
-                        None => builder.set_dictionary_enabled(parsed_value)
-                    }
-                },
-                "compression" => {
-                    let parsed_compression = parse_compression_string(value)?;
-                    match col_path{
-                        Some(path) => builder.set_column_compression(path, parsed_compression),
-                        None => builder.set_compression(parsed_compression)
-                    }
-                },
-                "statistics_enabled" => {
-                    let parsed_value = parse_statistics_string(value)?;
-                    match col_path{
-                        Some(path) => builder.set_column_statistics_enabled(path, parsed_value),
-                        None => builder.set_statistics_enabled(parsed_value)
-                    }
-                },
-                "max_statistics_size" => {
-                    let parsed_value = value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as usize as required for {option}!")))?;
-                    match col_path{
-                        Some(path) => builder.set_column_max_statistics_size(path, parsed_value),
-                        None => builder.set_max_statistics_size(parsed_value)
-                    }
-                },
-                "bloom_filter_fpp" => {
-                    let parsed_value = value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as f64 as required for {option}!")))?;
-                    match col_path{
-                        Some(path) => builder.set_column_bloom_filter_fpp(path, parsed_value),
-                        None => builder.set_bloom_filter_fpp(parsed_value)
-                    }
-                },
-                "bloom_filter_ndv" => {
-                    let parsed_value = value.parse()
-                    .map_err(|_| DataFusionError::Configuration(format!("Unable to parse {value} as u64 as required for {option}!")))?;
-                    match col_path{
-                        Some(path) => builder.set_column_bloom_filter_ndv(path, parsed_value),
-                        None => builder.set_bloom_filter_ndv(parsed_value)
-                    }
-                },
-                _ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for Parquet format!")))
+            if let Some(max_statistics_size) = options.max_statistics_size {
+                builder =
+                    builder.set_column_max_statistics_size(path, max_statistics_size);
             }
         }
         Ok(ParquetWriterOptions {
@@ -282,7 +215,7 @@ fn require_level(codec: &str, level: Option<u32>) -> Result<u32> {
 }
 
 /// Parses datafusion.execution.parquet.compression String to a parquet::basic::Compression
-pub(crate) fn parse_compression_string(
+pub fn parse_compression_string(
     str_setting: &str,
 ) -> Result<parquet::basic::Compression> {
     let str_setting_lower: &str = &str_setting.to_lowercase();
@@ -359,15 +292,3 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
         ))),
     }
 }
-
-pub(crate) fn split_option_and_column_path(
-    str_setting: &str,
-) -> (String, Option<ColumnPath>) {
-    match str_setting.replace('\'', "").split_once("::") {
-        Some((s1, s2)) => {
-            let col_path = ColumnPath::new(s2.split('.').map(|s| s.to_owned()).collect());
-            (s1.to_owned(), Some(col_path))
-        }
-        None => (str_setting.to_owned(), None),
-    }
-}
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index ef77d25f1ec0..da7d6579bfe6 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -55,7 +55,6 @@ pub use file_options::file_type::{
     FileType, GetExt, DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION,
     DEFAULT_CSV_EXTENSION, DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION,
 };
-pub use file_options::FileTypeWriterOptions;
 pub use functional_dependencies::{
     aggregate_functional_dependencies, get_required_group_by_exprs_indices,
     get_target_functional_dependencies, Constraint, Constraints, Dependency,
diff --git a/datafusion/common/src/parsers.rs b/datafusion/common/src/parsers.rs
index 9583ecbdb733..e23edb4e2adb 100644
--- a/datafusion/common/src/parsers.rs
+++ b/datafusion/common/src/parsers.rs
@@ -16,12 +16,13 @@
 // under the License.
 
 //! Interval parsing logic
-use sqlparser::parser::ParserError;
-use std::fmt::Display;
 
+use std::fmt::Display;
 use std::result;
 use std::str::FromStr;
 
+use sqlparser::parser::ParserError;
+
 /// Readable file compression type
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum CompressionTypeVariant {
diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index d800bcfe5bfc..3f7d66f5cc15 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -166,6 +166,8 @@ fn create_context() -> SessionContext {
         .unwrap();
     ctx.register_table("t700", create_table_provider("c", 700))
         .unwrap();
+    ctx.register_table("t1000", create_table_provider("d", 1000))
+        .unwrap();
 
     let tpch_schemas = create_tpch_schemas();
     tpch_schemas.iter().for_each(|(name, schema)| {
@@ -194,6 +196,16 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| physical_plan(&ctx, "SELECT c1 FROM t700"))
     });
 
+    // Test simplest
+    c.bench_function("logical_select_all_from_1000", |b| {
+        b.iter(|| logical_plan(&ctx, "SELECT * FROM t1000"))
+    });
+
+    // Test simplest
+    c.bench_function("physical_select_all_from_1000", |b| {
+        b.iter(|| physical_plan(&ctx, "SELECT * FROM t1000"))
+    });
+
     c.bench_function("logical_trivial_join_low_numbered_columns", |b| {
         b.iter(|| {
             logical_plan(
diff --git a/datafusion/core/benches/sql_query_with_io.rs b/datafusion/core/benches/sql_query_with_io.rs
index c7a838385bd6..916f48ce40c6 100644
--- a/datafusion/core/benches/sql_query_with_io.rs
+++ b/datafusion/core/benches/sql_query_with_io.rs
@@ -123,7 +123,7 @@ async fn setup_context(object_store: Arc<dyn ObjectStore>) -> SessionContext {
 
     for table_id in 0..TABLES {
         let table_name = table_name(table_id);
-        let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
+        let file_format = ParquetFormat::default().with_enable_pruning(true);
         let options = ListingOptions::new(Arc::new(file_format))
             .with_table_partition_cols(vec![(String::from("partition"), DataType::UInt8)])
             .with_target_partitions(THREADS);
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 3bdf2af4552d..5f192b83fdd9 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -21,6 +21,7 @@
 mod parquet;
 
 use std::any::Any;
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::arrow::record_batch::RecordBatch;
@@ -41,16 +42,12 @@ use crate::prelude::SessionContext;
 
 use arrow::array::{Array, ArrayRef, Int64Array, StringArray};
 use arrow::compute::{cast, concat};
-use arrow::csv::WriterBuilder;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion_common::file_options::csv_writer::CsvWriterOptions;
-use datafusion_common::file_options::json_writer::JsonWriterOptions;
-use datafusion_common::parsers::CompressionTypeVariant;
+use arrow::datatypes::{DataType, Field};
+use arrow_schema::{Schema, SchemaRef};
+use datafusion_common::config::{CsvOptions, FormatOptions, JsonOptions};
 use datafusion_common::{
-    plan_err, Column, DFSchema, DataFusionError, FileType, FileTypeWriterOptions,
-    ParamValues, SchemaError, UnnestOptions,
+    plan_err, Column, DFSchema, DataFusionError, ParamValues, SchemaError, UnnestOptions,
 };
-use datafusion_expr::dml::CopyOptions;
 use datafusion_expr::{
     avg, count, is_null, max, median, min, stddev, utils::COUNT_STAR_EXPANSION,
     TableProviderFilterPushDown, UNNAMED_TABLE,
@@ -66,10 +63,6 @@ pub struct DataFrameWriteOptions {
     /// Controls if all partitions should be coalesced into a single output file
     /// Generally will have slower performance when set to true.
     single_file_output: bool,
-    /// Sets compression by DataFusion applied after file serialization.
-    /// Allows compression of CSV and JSON.
-    /// Not supported for parquet.
-    compression: CompressionTypeVariant,
     /// Sets which columns should be used for hive-style partitioned writes by name.
     /// Can be set to empty vec![] for non-partitioned writes.
     partition_by: Vec<String>,
@@ -81,7 +74,6 @@ impl DataFrameWriteOptions {
         DataFrameWriteOptions {
             overwrite: false,
             single_file_output: false,
-            compression: CompressionTypeVariant::UNCOMPRESSED,
             partition_by: vec![],
         }
     }
@@ -97,12 +89,6 @@ impl DataFrameWriteOptions {
         self
     }
 
-    /// Sets the compression type applied to the output file(s)
-    pub fn with_compression(mut self, compression: CompressionTypeVariant) -> Self {
-        self.compression = compression;
-        self
-    }
-
     /// Sets the partition_by columns for output partitioning
     pub fn with_partition_by(mut self, partition_by: Vec<String>) -> Self {
         self.partition_by = partition_by;
@@ -1168,28 +1154,22 @@ impl DataFrame {
         self,
         path: &str,
         options: DataFrameWriteOptions,
-        writer_properties: Option<WriterBuilder>,
+        writer_options: Option<CsvOptions>,
     ) -> Result<Vec<RecordBatch>, DataFusionError> {
         if options.overwrite {
             return Err(DataFusionError::NotImplemented(
                 "Overwrites are not implemented for DataFrame::write_csv.".to_owned(),
             ));
         }
-        let props = match writer_properties {
-            Some(props) => props,
-            None => WriterBuilder::new(),
-        };
-
-        let file_type_writer_options =
-            FileTypeWriterOptions::CSV(CsvWriterOptions::new(props, options.compression));
-        let copy_options = CopyOptions::WriterOptions(Box::new(file_type_writer_options));
+        let table_options = self.session_state.default_table_options();
+        let props = writer_options.unwrap_or_else(|| table_options.csv.clone());
 
         let plan = LogicalPlanBuilder::copy_to(
             self.plan,
             path.into(),
-            FileType::CSV,
+            FormatOptions::CSV(props),
+            HashMap::new(),
             options.partition_by,
-            copy_options,
         )?
         .build()?;
         DataFrame::new(self.session_state, plan).collect().await
@@ -1212,6 +1192,7 @@ impl DataFrame {
     ///   .write_json(
     ///     "output.json",
     ///     DataFrameWriteOptions::new(),
+    ///     None
     /// ).await?;
     /// # fs::remove_file("output.json")?;
     /// # Ok(())
@@ -1221,21 +1202,24 @@ impl DataFrame {
         self,
         path: &str,
         options: DataFrameWriteOptions,
+        writer_options: Option<JsonOptions>,
     ) -> Result<Vec<RecordBatch>, DataFusionError> {
         if options.overwrite {
             return Err(DataFusionError::NotImplemented(
                 "Overwrites are not implemented for DataFrame::write_json.".to_owned(),
             ));
         }
-        let file_type_writer_options =
-            FileTypeWriterOptions::JSON(JsonWriterOptions::new(options.compression));
-        let copy_options = CopyOptions::WriterOptions(Box::new(file_type_writer_options));
+
+        let table_options = self.session_state.default_table_options();
+
+        let props = writer_options.unwrap_or_else(|| table_options.json.clone());
+
         let plan = LogicalPlanBuilder::copy_to(
             self.plan,
             path.into(),
-            FileType::JSON,
+            FormatOptions::JSON(props),
+            Default::default(),
             options.partition_by,
-            copy_options,
         )?
         .build()?;
         DataFrame::new(self.session_state, plan).collect().await
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index b7d63bf0a4b7..f4e8c9dfcd6f 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -15,16 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::file_options::parquet_writer::{
-    default_builder, ParquetWriterOptions,
-};
-use parquet::file::properties::WriterProperties;
-
 use super::{
-    CompressionTypeVariant, CopyOptions, DataFrame, DataFrameWriteOptions,
-    DataFusionError, FileType, FileTypeWriterOptions, LogicalPlanBuilder, RecordBatch,
+    DataFrame, DataFrameWriteOptions, DataFusionError, LogicalPlanBuilder, RecordBatch,
 };
 
+use datafusion_common::config::{FormatOptions, TableParquetOptions};
+
 impl DataFrame {
     /// Execute the `DataFrame` and write the results to Parquet file(s).
     ///
@@ -53,30 +49,24 @@ impl DataFrame {
         self,
         path: &str,
         options: DataFrameWriteOptions,
-        writer_properties: Option<WriterProperties>,
+        writer_options: Option<TableParquetOptions>,
     ) -> Result<Vec<RecordBatch>, DataFusionError> {
         if options.overwrite {
             return Err(DataFusionError::NotImplemented(
                 "Overwrites are not implemented for DataFrame::write_parquet.".to_owned(),
             ));
         }
-        match options.compression{
-            CompressionTypeVariant::UNCOMPRESSED => (),
-            _ => return Err(DataFusionError::Configuration("DataFrame::write_parquet method does not support compression set via DataFrameWriteOptions. Set parquet compression via writer_properties instead.".to_owned()))
-        }
-        let props = match writer_properties {
-            Some(props) => props,
-            None => default_builder(self.session_state.config_options())?.build(),
-        };
-        let file_type_writer_options =
-            FileTypeWriterOptions::Parquet(ParquetWriterOptions::new(props));
-        let copy_options = CopyOptions::WriterOptions(Box::new(file_type_writer_options));
+
+        let table_options = self.session_state.default_table_options();
+
+        let props = writer_options.unwrap_or_else(|| table_options.parquet.clone());
+
         let plan = LogicalPlanBuilder::copy_to(
             self.plan,
             path.into(),
-            FileType::PARQUET,
+            FormatOptions::PARQUET(props),
+            Default::default(),
             options.partition_by,
-            copy_options,
         )?
         .build()?;
         DataFrame::new(self.session_state, plan).collect().await
@@ -87,21 +77,20 @@ impl DataFrame {
 mod tests {
     use std::sync::Arc;
 
-    use object_store::local::LocalFileSystem;
-    use parquet::basic::{BrotliLevel, GzipLevel, ZstdLevel};
-    use parquet::file::reader::FileReader;
-    use tempfile::TempDir;
-    use url::Url;
-
-    use datafusion_expr::{col, lit};
-
+    use super::super::Result;
+    use super::*;
     use crate::arrow::util::pretty;
     use crate::execution::context::SessionContext;
     use crate::execution::options::ParquetReadOptions;
     use crate::test_util;
 
-    use super::super::Result;
-    use super::*;
+    use datafusion_common::file_options::parquet_writer::parse_compression_string;
+    use datafusion_expr::{col, lit};
+
+    use object_store::local::LocalFileSystem;
+    use parquet::file::reader::FileReader;
+    use tempfile::TempDir;
+    use url::Url;
 
     #[tokio::test]
     async fn filter_pushdown_dataframe() -> Result<()> {
@@ -136,15 +125,14 @@ mod tests {
     #[tokio::test]
     async fn write_parquet_with_compression() -> Result<()> {
         let test_df = test_util::test_table().await?;
-
         let output_path = "file://local/test.parquet";
         let test_compressions = vec![
-            parquet::basic::Compression::SNAPPY,
-            parquet::basic::Compression::LZ4,
-            parquet::basic::Compression::LZ4_RAW,
-            parquet::basic::Compression::GZIP(GzipLevel::default()),
-            parquet::basic::Compression::BROTLI(BrotliLevel::default()),
-            parquet::basic::Compression::ZSTD(ZstdLevel::default()),
+            "snappy",
+            "brotli(1)",
+            "lz4",
+            "lz4_raw",
+            "gzip(6)",
+            "zstd(1)",
         ];
         for compression in test_compressions.into_iter() {
             let df = test_df.clone();
@@ -153,14 +141,12 @@ mod tests {
             let local_url = Url::parse("file://local").unwrap();
             let ctx = &test_df.session_state;
             ctx.runtime_env().register_object_store(&local_url, local);
+            let mut options = TableParquetOptions::default();
+            options.global.compression = Some(compression.to_string());
             df.write_parquet(
                 output_path,
                 DataFrameWriteOptions::new().with_single_file_output(true),
-                Some(
-                    WriterProperties::builder()
-                        .set_compression(compression)
-                        .build(),
-                ),
+                Some(options),
             )
             .await?;
 
@@ -176,7 +162,7 @@ mod tests {
             let written_compression =
                 parquet_metadata.row_group(0).column(0).compression();
 
-            assert_eq!(written_compression, compression);
+            assert_eq!(written_compression, parse_compression_string(compression)?);
         }
 
         Ok(())
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 90417a978137..99bfbbad9d10 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -24,40 +24,36 @@ use std::borrow::Cow;
 use std::fmt::{self, Debug};
 use std::sync::Arc;
 
+use super::file_compression_type::FileCompressionType;
+use super::write::demux::start_demuxer_task;
+use super::write::{create_writer, SharedBuffer};
 use crate::datasource::file_format::FileFormat;
 use crate::datasource::physical_plan::{
     ArrowExec, FileGroupDisplay, FileScanConfig, FileSinkConfig,
 };
 use crate::error::Result;
 use crate::execution::context::SessionState;
-use crate::physical_plan::ExecutionPlan;
+use crate::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 
 use arrow::ipc::convert::fb_to_schema;
 use arrow::ipc::reader::FileReader;
-use arrow::ipc::root_as_message;
-use arrow_ipc::writer::IpcWriteOptions;
-use arrow_ipc::CompressionType;
+use arrow::ipc::writer::IpcWriteOptions;
+use arrow::ipc::{root_as_message, CompressionType};
 use arrow_schema::{ArrowError, Schema, SchemaRef};
-
-use bytes::Bytes;
 use datafusion_common::{not_impl_err, DataFusionError, FileType, Statistics};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
-
-use crate::physical_plan::{DisplayAs, DisplayFormatType};
-use async_trait::async_trait;
 use datafusion_physical_plan::insert::{DataSink, FileSinkExec};
 use datafusion_physical_plan::metrics::MetricsSet;
+
+use async_trait::async_trait;
+use bytes::Bytes;
 use futures::stream::BoxStream;
 use futures::StreamExt;
 use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
 use tokio::io::AsyncWriteExt;
 use tokio::task::JoinSet;
 
-use super::file_compression_type::FileCompressionType;
-use super::write::demux::start_demuxer_task;
-use super::write::{create_writer, SharedBuffer};
-
 /// Initial writing buffer size. Note this is just a size hint for efficiency. It
 /// will grow beyond the set value if needed.
 const INITIAL_BUFFER_BYTES: usize = 1048576;
@@ -215,11 +211,6 @@ impl DataSink for ArrowFileSink {
         data: SendableRecordBatchStream,
         context: &Arc<TaskContext>,
     ) -> Result<u64> {
-        // No props are supported yet, but can be by updating FileTypeWriterOptions
-        // to populate this struct and use those options to initialize the arrow_ipc::writer::FileWriter
-        // https://github.com/apache/arrow-datafusion/issues/8635
-        let _arrow_props = self.config.file_type_writer_options.try_into_arrow()?;
-
         let object_store = context
             .runtime_env()
             .object_store(&self.config.object_store_url)?;
@@ -390,12 +381,11 @@ async fn collect_at_least_n_bytes(
 
 #[cfg(test)]
 mod tests {
-    use chrono::DateTime;
-    use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path};
-
+    use super::*;
     use crate::execution::context::SessionContext;
 
-    use super::*;
+    use chrono::DateTime;
+    use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path};
 
     #[tokio::test]
     async fn test_infer_schema_stream() -> Result<()> {
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 9cae6675e825..a7849258329b 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -23,7 +23,7 @@ use std::fmt::{self, Debug};
 use std::sync::Arc;
 
 use super::write::orchestration::stateless_multipart_put;
-use super::{FileFormat, DEFAULT_SCHEMA_INFER_MAX_RECORD};
+use super::FileFormat;
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
 use crate::datasource::file_format::write::BatchSerializer;
 use crate::datasource::physical_plan::{
@@ -39,6 +39,8 @@ use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use arrow::datatypes::{DataType, Field, Fields, Schema};
 use arrow::{self, datatypes::SchemaRef};
+use datafusion_common::config::CsvOptions;
+use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
@@ -51,27 +53,9 @@ use futures::{pin_mut, Stream, StreamExt, TryStreamExt};
 use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore};
 
 /// Character Separated Value `FileFormat` implementation.
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub struct CsvFormat {
-    has_header: bool,
-    delimiter: u8,
-    quote: u8,
-    escape: Option<u8>,
-    schema_infer_max_rec: Option<usize>,
-    file_compression_type: FileCompressionType,
-}
-
-impl Default for CsvFormat {
-    fn default() -> Self {
-        Self {
-            schema_infer_max_rec: Some(DEFAULT_SCHEMA_INFER_MAX_RECORD),
-            has_header: true,
-            delimiter: b',',
-            quote: b'"',
-            escape: None,
-            file_compression_type: FileCompressionType::UNCOMPRESSED,
-        }
-    }
+    options: CsvOptions,
 }
 
 impl CsvFormat {
@@ -110,7 +94,7 @@ impl CsvFormat {
         &self,
         stream: BoxStream<'static, Result<Bytes>>,
     ) -> BoxStream<'static, Result<Bytes>> {
-        let file_compression_type = self.file_compression_type.to_owned();
+        let file_compression_type: FileCompressionType = self.options.compression.into();
         let decoder = file_compression_type.convert_stream(stream);
         let steam = match decoder {
             Ok(decoded_stream) => {
@@ -131,43 +115,54 @@ impl CsvFormat {
         steam.boxed()
     }
 
+    /// Set the csv options
+    pub fn with_options(mut self, options: CsvOptions) -> Self {
+        self.options = options;
+        self
+    }
+
+    /// Retrieve the csv options
+    pub fn options(&self) -> &CsvOptions {
+        &self.options
+    }
+
     /// Set a limit in terms of records to scan to infer the schema
     /// - default to `DEFAULT_SCHEMA_INFER_MAX_RECORD`
-    pub fn with_schema_infer_max_rec(mut self, max_rec: Option<usize>) -> Self {
-        self.schema_infer_max_rec = max_rec;
+    pub fn with_schema_infer_max_rec(mut self, max_rec: usize) -> Self {
+        self.options.schema_infer_max_rec = max_rec;
         self
     }
 
     /// Set true to indicate that the first line is a header.
     /// - default to true
     pub fn with_has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
+        self.options.has_header = has_header;
         self
     }
 
     /// True if the first line is a header.
     pub fn has_header(&self) -> bool {
-        self.has_header
+        self.options.has_header
     }
 
     /// The character separating values within a row.
     /// - default to ','
     pub fn with_delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = delimiter;
+        self.options.delimiter = delimiter;
         self
     }
 
     /// The quote character in a row.
     /// - default to '"'
     pub fn with_quote(mut self, quote: u8) -> Self {
-        self.quote = quote;
+        self.options.quote = quote;
         self
     }
 
     /// The escape character in a row.
     /// - default is None
     pub fn with_escape(mut self, escape: Option<u8>) -> Self {
-        self.escape = escape;
+        self.options.escape = escape;
         self
     }
 
@@ -177,23 +172,23 @@ impl CsvFormat {
         mut self,
         file_compression_type: FileCompressionType,
     ) -> Self {
-        self.file_compression_type = file_compression_type;
+        self.options.compression = file_compression_type.into();
         self
     }
 
     /// The delimiter character.
     pub fn delimiter(&self) -> u8 {
-        self.delimiter
+        self.options.delimiter
     }
 
     /// The quote character.
     pub fn quote(&self) -> u8 {
-        self.quote
+        self.options.quote
     }
 
     /// The escape character.
     pub fn escape(&self) -> Option<u8> {
-        self.escape
+        self.options.escape
     }
 }
 
@@ -211,7 +206,7 @@ impl FileFormat for CsvFormat {
     ) -> Result<SchemaRef> {
         let mut schemas = vec![];
 
-        let mut records_to_read = self.schema_infer_max_rec.unwrap_or(usize::MAX);
+        let mut records_to_read = self.options.schema_infer_max_rec;
 
         for object in objects {
             let stream = self.read_to_delimited_chunks(store, object).await;
@@ -247,11 +242,11 @@ impl FileFormat for CsvFormat {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let exec = CsvExec::new(
             conf,
-            self.has_header,
-            self.delimiter,
-            self.quote,
-            self.escape,
-            self.file_compression_type.to_owned(),
+            self.options.has_header,
+            self.options.delimiter,
+            self.options.quote,
+            self.options.escape,
+            self.options.compression.into(),
         );
         Ok(Arc::new(exec))
     }
@@ -267,12 +262,10 @@ impl FileFormat for CsvFormat {
             return not_impl_err!("Overwrites are not implemented yet for CSV");
         }
 
-        if self.file_compression_type != FileCompressionType::UNCOMPRESSED {
-            return not_impl_err!("Inserting compressed CSV is not implemented yet.");
-        }
+        let writer_options = CsvWriterOptions::try_from(&self.options)?;
 
         let sink_schema = conf.output_schema().clone();
-        let sink = Arc::new(CsvSink::new(conf));
+        let sink = Arc::new(CsvSink::new(conf, writer_options));
 
         Ok(Arc::new(FileSinkExec::new(
             input,
@@ -305,8 +298,8 @@ impl CsvFormat {
 
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
-                .with_header(self.has_header && first_chunk)
-                .with_delimiter(self.delimiter);
+                .with_header(self.options.has_header && first_chunk)
+                .with_delimiter(self.options.delimiter);
 
             let (Schema { fields, .. }, records_read) =
                 format.infer_schema(chunk.reader(), Some(records_to_read))?;
@@ -439,6 +432,7 @@ impl BatchSerializer for CsvSerializer {
 pub struct CsvSink {
     /// Config options for writing data
     config: FileSinkConfig,
+    writer_options: CsvWriterOptions,
 }
 
 impl Debug for CsvSink {
@@ -461,8 +455,11 @@ impl DisplayAs for CsvSink {
 
 impl CsvSink {
     /// Create from config.
-    pub fn new(config: FileSinkConfig) -> Self {
-        Self { config }
+    pub fn new(config: FileSinkConfig, writer_options: CsvWriterOptions) -> Self {
+        Self {
+            config,
+            writer_options,
+        }
     }
 
     /// Retrieve the inner [`FileSinkConfig`].
@@ -475,11 +472,10 @@ impl CsvSink {
         data: SendableRecordBatchStream,
         context: &Arc<TaskContext>,
     ) -> Result<u64> {
-        let writer_options = self.config.file_type_writer_options.try_into_csv()?;
-        let builder = &writer_options.writer_options;
+        let builder = &self.writer_options.writer_options;
 
         let builder_clone = builder.clone();
-        let options_clone = writer_options.clone();
+        let options_clone = self.writer_options.clone();
         let get_serializer = move || {
             Arc::new(
                 CsvSerializer::new()
@@ -494,10 +490,15 @@ impl CsvSink {
             "csv".into(),
             Box::new(get_serializer),
             &self.config,
-            writer_options.compression.into(),
+            self.writer_options.compression.into(),
         )
         .await
     }
+
+    /// Retrieve the writer options
+    pub fn writer_options(&self) -> &CsvWriterOptions {
+        &self.writer_options
+    }
 }
 
 #[async_trait]
@@ -668,11 +669,9 @@ mod tests {
         };
 
         let num_rows_to_read = 100;
-        let csv_format = CsvFormat {
-            has_header: false,
-            schema_infer_max_rec: Some(num_rows_to_read),
-            ..Default::default()
-        };
+        let csv_format = CsvFormat::default()
+            .with_has_header(false)
+            .with_schema_infer_max_rec(num_rows_to_read);
         let inferred_schema = csv_format
             .infer_schema(
                 &state,
@@ -723,7 +722,7 @@ mod tests {
 
         let path = Path::from("csv/aggregate_test_100.csv");
         let csv = CsvFormat::default().with_has_header(true);
-        let records_to_read = csv.schema_infer_max_rec.unwrap_or(usize::MAX);
+        let records_to_read = csv.options().schema_infer_max_rec;
         let store = Arc::new(integration) as Arc<dyn ObjectStore>;
         let original_stream = store.get(&path).await?;
 
diff --git a/datafusion/core/src/datasource/file_format/file_compression_type.rs b/datafusion/core/src/datasource/file_format/file_compression_type.rs
index 48094eede87b..c538819e2684 100644
--- a/datafusion/core/src/datasource/file_format/file_compression_type.rs
+++ b/datafusion/core/src/datasource/file_format/file_compression_type.rs
@@ -17,7 +17,13 @@
 
 //! File Compression type abstraction
 
+use std::str::FromStr;
+
 use crate::error::{DataFusionError, Result};
+
+use datafusion_common::parsers::CompressionTypeVariant::{self, *};
+use datafusion_common::{FileType, GetExt};
+
 #[cfg(feature = "compression")]
 use async_compression::tokio::bufread::{
     BzDecoder as AsyncBzDecoder, BzEncoder as AsyncBzEncoder,
@@ -31,15 +37,12 @@ use async_compression::tokio::write::{BzEncoder, GzipEncoder, XzEncoder, ZstdEnc
 use bytes::Bytes;
 #[cfg(feature = "compression")]
 use bzip2::read::MultiBzDecoder;
-use datafusion_common::{parsers::CompressionTypeVariant, FileType, GetExt};
 #[cfg(feature = "compression")]
 use flate2::read::MultiGzDecoder;
-
 use futures::stream::BoxStream;
 use futures::StreamExt;
 #[cfg(feature = "compression")]
 use futures::TryStreamExt;
-use std::str::FromStr;
 use tokio::io::AsyncWrite;
 #[cfg(feature = "compression")]
 use tokio_util::io::{ReaderStream, StreamReader};
@@ -47,7 +50,6 @@ use tokio_util::io::{ReaderStream, StreamReader};
 use xz2::read::XzDecoder;
 #[cfg(feature = "compression")]
 use zstd::Decoder as ZstdDecoder;
-use CompressionTypeVariant::*;
 
 /// Readable file compression type
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -73,6 +75,12 @@ impl From<CompressionTypeVariant> for FileCompressionType {
     }
 }
 
+impl From<FileCompressionType> for CompressionTypeVariant {
+    fn from(t: FileCompressionType) -> Self {
+        t.variant
+    }
+}
+
 impl FromStr for FileCompressionType {
     type Err = DataFusionError;
 
@@ -261,14 +269,17 @@ impl FileTypeExt for FileType {
 
 #[cfg(test)]
 mod tests {
+    use std::str::FromStr;
+
     use crate::datasource::file_format::file_compression_type::{
         FileCompressionType, FileTypeExt,
     };
     use crate::error::DataFusionError;
-    use bytes::Bytes;
+
     use datafusion_common::file_options::file_type::FileType;
+
+    use bytes::Bytes;
     use futures::StreamExt;
-    use std::str::FromStr;
 
     #[test]
     fn get_ext_with_compression() {
diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index 121fe5e8dcb1..0cc38bbb5554 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -27,7 +27,6 @@ use super::write::orchestration::stateless_multipart_put;
 use super::{FileFormat, FileScanConfig};
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
 use crate::datasource::file_format::write::BatchSerializer;
-use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD;
 use crate::datasource::physical_plan::FileGroupDisplay;
 use crate::datasource::physical_plan::{FileSinkConfig, NdJsonExec};
 use crate::error::Result;
@@ -42,6 +41,8 @@ use arrow::datatypes::SchemaRef;
 use arrow::json;
 use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
 use arrow_array::RecordBatch;
+use datafusion_common::config::JsonOptions;
+use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::{not_impl_err, FileType};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
@@ -53,26 +54,27 @@ use bytes::{Buf, Bytes};
 use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
 
 /// New line delimited JSON `FileFormat` implementation.
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub struct JsonFormat {
-    schema_infer_max_rec: Option<usize>,
-    file_compression_type: FileCompressionType,
+    options: JsonOptions,
 }
 
-impl Default for JsonFormat {
-    fn default() -> Self {
-        Self {
-            schema_infer_max_rec: Some(DEFAULT_SCHEMA_INFER_MAX_RECORD),
-            file_compression_type: FileCompressionType::UNCOMPRESSED,
-        }
+impl JsonFormat {
+    /// Set JSON options
+    pub fn with_options(mut self, options: JsonOptions) -> Self {
+        self.options = options;
+        self
+    }
+
+    /// Retrieve JSON options
+    pub fn options(&self) -> &JsonOptions {
+        &self.options
     }
-}
 
-impl JsonFormat {
     /// Set a limit in terms of records to scan to infer the schema
     /// - defaults to `DEFAULT_SCHEMA_INFER_MAX_RECORD`
-    pub fn with_schema_infer_max_rec(mut self, max_rec: Option<usize>) -> Self {
-        self.schema_infer_max_rec = max_rec;
+    pub fn with_schema_infer_max_rec(mut self, max_rec: usize) -> Self {
+        self.options.schema_infer_max_rec = max_rec;
         self
     }
 
@@ -82,7 +84,7 @@ impl JsonFormat {
         mut self,
         file_compression_type: FileCompressionType,
     ) -> Self {
-        self.file_compression_type = file_compression_type;
+        self.options.compression = file_compression_type.into();
         self
     }
 }
@@ -100,8 +102,8 @@ impl FileFormat for JsonFormat {
         objects: &[ObjectMeta],
     ) -> Result<SchemaRef> {
         let mut schemas = Vec::new();
-        let mut records_to_read = self.schema_infer_max_rec.unwrap_or(usize::MAX);
-        let file_compression_type = self.file_compression_type.to_owned();
+        let mut records_to_read = self.options.schema_infer_max_rec;
+        let file_compression_type = FileCompressionType::from(self.options.compression);
         for object in objects {
             let mut take_while = || {
                 let should_take = records_to_read > 0;
@@ -154,7 +156,8 @@ impl FileFormat for JsonFormat {
         conf: FileScanConfig,
         _filters: Option<&Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let exec = NdJsonExec::new(conf, self.file_compression_type.to_owned());
+        let exec =
+            NdJsonExec::new(conf, FileCompressionType::from(self.options.compression));
         Ok(Arc::new(exec))
     }
 
@@ -169,11 +172,10 @@ impl FileFormat for JsonFormat {
             return not_impl_err!("Overwrites are not implemented yet for Json");
         }
 
-        if self.file_compression_type != FileCompressionType::UNCOMPRESSED {
-            return not_impl_err!("Inserting compressed JSON is not implemented yet.");
-        }
+        let writer_options = JsonWriterOptions::try_from(&self.options)?;
+
         let sink_schema = conf.output_schema().clone();
-        let sink = Arc::new(JsonSink::new(conf));
+        let sink = Arc::new(JsonSink::new(conf, writer_options));
 
         Ok(Arc::new(FileSinkExec::new(
             input,
@@ -217,6 +219,8 @@ impl BatchSerializer for JsonSerializer {
 pub struct JsonSink {
     /// Config options for writing data
     config: FileSinkConfig,
+    ///
+    writer_options: JsonWriterOptions,
 }
 
 impl Debug for JsonSink {
@@ -239,8 +243,11 @@ impl DisplayAs for JsonSink {
 
 impl JsonSink {
     /// Create from config.
-    pub fn new(config: FileSinkConfig) -> Self {
-        Self { config }
+    pub fn new(config: FileSinkConfig, writer_options: JsonWriterOptions) -> Self {
+        Self {
+            config,
+            writer_options,
+        }
     }
 
     /// Retrieve the inner [`FileSinkConfig`].
@@ -253,9 +260,6 @@ impl JsonSink {
         data: SendableRecordBatchStream,
         context: &Arc<TaskContext>,
     ) -> Result<u64> {
-        let writer_options = self.config.file_type_writer_options.try_into_json()?;
-        let compression = &writer_options.compression;
-
         let get_serializer = move || Arc::new(JsonSerializer::new()) as _;
 
         stateless_multipart_put(
@@ -264,10 +268,14 @@ impl JsonSink {
             "json".into(),
             Box::new(get_serializer),
             &self.config,
-            (*compression).into(),
+            self.writer_options.compression.into(),
         )
         .await
     }
+    /// Retrieve the writer options
+    pub fn writer_options(&self) -> &JsonWriterOptions {
+        &self.writer_options
+    }
 }
 
 #[async_trait]
@@ -293,21 +301,22 @@ impl DataSink for JsonSink {
 #[cfg(test)]
 mod tests {
     use super::super::test_util::scan_format;
+    use super::*;
+    use crate::execution::options::NdJsonReadOptions;
+    use crate::physical_plan::collect;
+    use crate::prelude::{SessionConfig, SessionContext};
+    use crate::test::object_store::local_unpartitioned_file;
+
     use arrow::util::pretty;
     use datafusion_common::cast::as_int64_array;
     use datafusion_common::stats::Precision;
     use datafusion_common::{assert_batches_eq, internal_err};
+
     use futures::StreamExt;
     use object_store::local::LocalFileSystem;
     use regex::Regex;
     use rstest::rstest;
 
-    use super::*;
-    use crate::execution::options::NdJsonReadOptions;
-    use crate::physical_plan::collect;
-    use crate::prelude::{SessionConfig, SessionContext};
-    use crate::test::object_store::local_unpartitioned_file;
-
     #[tokio::test]
     async fn read_small_batches() -> Result<()> {
         let config = SessionConfig::new().with_batch_size(2);
@@ -413,7 +422,7 @@ mod tests {
         let ctx = session.state();
         let store = Arc::new(LocalFileSystem::new()) as _;
         let filename = "tests/data/schema_infer_limit.json";
-        let format = JsonFormat::default().with_schema_infer_max_rec(Some(3));
+        let format = JsonFormat::default().with_schema_infer_max_rec(3);
 
         let file_schema = format
             .infer_schema(&ctx, &store, &[local_unpartitioned_file(filename)])
diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs
index d389137785ff..f66683c311c1 100644
--- a/datafusion/core/src/datasource/file_format/options.rs
+++ b/datafusion/core/src/datasource/file_format/options.rs
@@ -19,9 +19,6 @@
 
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, Schema, SchemaRef};
-use async_trait::async_trait;
-
 use crate::datasource::file_format::arrow::ArrowFormat;
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
 #[cfg(feature = "parquet")]
@@ -35,11 +32,16 @@ use crate::datasource::{
 use crate::error::Result;
 use crate::execution::context::{SessionConfig, SessionState};
 use crate::logical_expr::Expr;
+
+use arrow::datatypes::{DataType, Schema, SchemaRef};
+use datafusion_common::config::TableOptions;
 use datafusion_common::{
     DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
     DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION,
 };
 
+use async_trait::async_trait;
+
 /// Options that control the reading of CSV files.
 ///
 /// Note this structure is supplied when a datasource is created and
@@ -430,7 +432,11 @@ impl<'a> NdJsonReadOptions<'a> {
 /// ['ReadOptions'] is implemented by Options like ['CsvReadOptions'] that control the reading of respective files/sources.
 pub trait ReadOptions<'a> {
     /// Helper to convert these user facing options to `ListingTable` options
-    fn to_listing_options(&self, config: &SessionConfig) -> ListingOptions;
+    fn to_listing_options(
+        &self,
+        config: &SessionConfig,
+        table_options: TableOptions,
+    ) -> ListingOptions;
 
     /// Infer and resolve the schema from the files/sources provided.
     async fn get_resolved_schema(
@@ -455,7 +461,7 @@ pub trait ReadOptions<'a> {
             return Ok(Arc::new(s.to_owned()));
         }
 
-        self.to_listing_options(config)
+        self.to_listing_options(config, state.default_table_options().clone())
             .infer_schema(&state, &table_path)
             .await
     }
@@ -463,13 +469,18 @@ pub trait ReadOptions<'a> {
 
 #[async_trait]
 impl ReadOptions<'_> for CsvReadOptions<'_> {
-    fn to_listing_options(&self, config: &SessionConfig) -> ListingOptions {
+    fn to_listing_options(
+        &self,
+        config: &SessionConfig,
+        table_options: TableOptions,
+    ) -> ListingOptions {
         let file_format = CsvFormat::default()
+            .with_options(table_options.csv)
             .with_has_header(self.has_header)
             .with_delimiter(self.delimiter)
             .with_quote(self.quote)
             .with_escape(self.escape)
-            .with_schema_infer_max_rec(Some(self.schema_infer_max_records))
+            .with_schema_infer_max_rec(self.schema_infer_max_records)
             .with_file_compression_type(self.file_compression_type.to_owned());
 
         ListingOptions::new(Arc::new(file_format))
@@ -493,10 +504,19 @@ impl ReadOptions<'_> for CsvReadOptions<'_> {
 #[cfg(feature = "parquet")]
 #[async_trait]
 impl ReadOptions<'_> for ParquetReadOptions<'_> {
-    fn to_listing_options(&self, config: &SessionConfig) -> ListingOptions {
-        let file_format = ParquetFormat::new()
-            .with_enable_pruning(self.parquet_pruning)
-            .with_skip_metadata(self.skip_metadata);
+    fn to_listing_options(
+        &self,
+        config: &SessionConfig,
+        table_options: TableOptions,
+    ) -> ListingOptions {
+        let mut file_format = ParquetFormat::new().with_options(table_options.parquet);
+
+        if let Some(parquet_pruning) = self.parquet_pruning {
+            file_format = file_format.with_enable_pruning(parquet_pruning)
+        }
+        if let Some(skip_metadata) = self.skip_metadata {
+            file_format = file_format.with_skip_metadata(skip_metadata)
+        }
 
         ListingOptions::new(Arc::new(file_format))
             .with_file_extension(self.file_extension)
@@ -518,9 +538,14 @@ impl ReadOptions<'_> for ParquetReadOptions<'_> {
 
 #[async_trait]
 impl ReadOptions<'_> for NdJsonReadOptions<'_> {
-    fn to_listing_options(&self, config: &SessionConfig) -> ListingOptions {
+    fn to_listing_options(
+        &self,
+        config: &SessionConfig,
+        table_options: TableOptions,
+    ) -> ListingOptions {
         let file_format = JsonFormat::default()
-            .with_schema_infer_max_rec(Some(self.schema_infer_max_records))
+            .with_options(table_options.json)
+            .with_schema_infer_max_rec(self.schema_infer_max_records)
             .with_file_compression_type(self.file_compression_type.to_owned());
 
         ListingOptions::new(Arc::new(file_format))
@@ -543,7 +568,11 @@ impl ReadOptions<'_> for NdJsonReadOptions<'_> {
 
 #[async_trait]
 impl ReadOptions<'_> for AvroReadOptions<'_> {
-    fn to_listing_options(&self, config: &SessionConfig) -> ListingOptions {
+    fn to_listing_options(
+        &self,
+        config: &SessionConfig,
+        _table_options: TableOptions,
+    ) -> ListingOptions {
         let file_format = AvroFormat;
 
         ListingOptions::new(Arc::new(file_format))
@@ -565,7 +594,11 @@ impl ReadOptions<'_> for AvroReadOptions<'_> {
 
 #[async_trait]
 impl ReadOptions<'_> for ArrowReadOptions<'_> {
-    fn to_listing_options(&self, config: &SessionConfig) -> ListingOptions {
+    fn to_listing_options(
+        &self,
+        config: &SessionConfig,
+        _table_options: TableOptions,
+    ) -> ListingOptions {
         let file_format = ArrowFormat;
 
         ListingOptions::new(Arc::new(file_format))
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 3824177cb363..c04c536e7ca6 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -17,56 +17,23 @@
 
 //! [`ParquetFormat`]: Parquet [`FileFormat`] abstractions
 
-use arrow_array::RecordBatch;
-use async_trait::async_trait;
-use datafusion_common::stats::Precision;
-use datafusion_physical_plan::metrics::MetricsSet;
-use parquet::arrow::arrow_writer::{
-    compute_leaves, get_column_writers, ArrowColumnChunk, ArrowColumnWriter,
-    ArrowLeafColumn,
-};
-use parquet::file::writer::SerializedFileWriter;
 use std::any::Any;
 use std::fmt;
 use std::fmt::Debug;
 use std::sync::Arc;
-use tokio::io::{AsyncWrite, AsyncWriteExt};
-use tokio::sync::mpsc::{self, Receiver, Sender};
-use tokio::task::JoinSet;
-
-use crate::datasource::file_format::file_compression_type::FileCompressionType;
-use crate::datasource::statistics::{create_max_min_accs, get_col_stats};
-use arrow::datatypes::SchemaRef;
-use arrow::datatypes::{Fields, Schema};
-use bytes::{BufMut, BytesMut};
-use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
-use datafusion_common_runtime::SpawnedTask;
-use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
-use futures::{StreamExt, TryStreamExt};
-use hashbrown::HashMap;
-use object_store::path::Path;
-use object_store::{ObjectMeta, ObjectStore};
-use parquet::arrow::{
-    arrow_to_parquet_schema, parquet_to_arrow_schema, AsyncArrowWriter,
-};
-use parquet::file::footer::{decode_footer, decode_metadata};
-use parquet::file::metadata::ParquetMetaData;
-use parquet::file::properties::WriterProperties;
-use parquet::file::statistics::Statistics as ParquetStatistics;
 
 use super::write::demux::start_demuxer_task;
 use super::write::{create_writer, AbortableWrite, SharedBuffer};
 use super::{FileFormat, FileScanConfig};
 use crate::arrow::array::{
-    BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array,
+    BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, RecordBatch,
 };
-use crate::arrow::datatypes::DataType;
-use crate::config::ConfigOptions;
-
+use crate::arrow::datatypes::{DataType, Fields, Schema, SchemaRef};
+use crate::datasource::file_format::file_compression_type::FileCompressionType;
 use crate::datasource::physical_plan::{
     FileGroupDisplay, FileSinkConfig, ParquetExec, SchemaAdapter,
 };
+use crate::datasource::statistics::{create_max_min_accs, get_col_stats};
 use crate::error::Result;
 use crate::execution::context::SessionState;
 use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator};
@@ -76,6 +43,41 @@ use crate::physical_plan::{
     Statistics,
 };
 
+use datafusion_common::config::TableParquetOptions;
+use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
+use datafusion_common::stats::Precision;
+use datafusion_common::{
+    exec_err, internal_datafusion_err, not_impl_err, DataFusionError, FileType,
+};
+use datafusion_common_runtime::SpawnedTask;
+use datafusion_execution::TaskContext;
+use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_plan::metrics::MetricsSet;
+
+use async_trait::async_trait;
+use bytes::{BufMut, BytesMut};
+use parquet::arrow::arrow_writer::{
+    compute_leaves, get_column_writers, ArrowColumnChunk, ArrowColumnWriter,
+    ArrowLeafColumn,
+};
+use parquet::arrow::{
+    arrow_to_parquet_schema, parquet_to_arrow_schema, AsyncArrowWriter,
+};
+use parquet::file::footer::{decode_footer, decode_metadata};
+use parquet::file::metadata::ParquetMetaData;
+use parquet::file::properties::WriterProperties;
+use parquet::file::statistics::Statistics as ParquetStatistics;
+use parquet::file::writer::SerializedFileWriter;
+use parquet::format::FileMetaData;
+use tokio::io::{AsyncWrite, AsyncWriteExt};
+use tokio::sync::mpsc::{self, Receiver, Sender};
+use tokio::task::JoinSet;
+
+use futures::{StreamExt, TryStreamExt};
+use hashbrown::HashMap;
+use object_store::path::Path;
+use object_store::{ObjectMeta, ObjectStore};
+
 /// Size of the buffer for [`AsyncArrowWriter`].
 const PARQUET_WRITER_BUFFER_SIZE: usize = 10485760;
 
@@ -88,20 +90,9 @@ const INITIAL_BUFFER_BYTES: usize = 1048576;
 const BUFFER_FLUSH_BYTES: usize = 1024000;
 
 /// The Apache Parquet `FileFormat` implementation
-///
-/// Note it is recommended these are instead configured on the [`ConfigOptions`]
-/// associated with the [`SessionState`] instead of overridden on a format-basis
-///
-/// TODO: Deprecate and remove overrides
-/// <https://github.com/apache/arrow-datafusion/issues/4349>
 #[derive(Debug, Default)]
 pub struct ParquetFormat {
-    /// Override the global setting for `enable_pruning`
-    enable_pruning: Option<bool>,
-    /// Override the global setting for `metadata_size_hint`
-    metadata_size_hint: Option<usize>,
-    /// Override the global setting for `skip_metadata`
-    skip_metadata: Option<bool>,
+    options: TableParquetOptions,
 }
 
 impl ParquetFormat {
@@ -112,15 +103,14 @@ impl ParquetFormat {
 
     /// Activate statistics based row group level pruning
     /// - If `None`, defaults to value on `config_options`
-    pub fn with_enable_pruning(mut self, enable: Option<bool>) -> Self {
-        self.enable_pruning = enable;
+    pub fn with_enable_pruning(mut self, enable: bool) -> Self {
+        self.options.global.pruning = enable;
         self
     }
 
     /// Return `true` if pruning is enabled
-    pub fn enable_pruning(&self, config_options: &ConfigOptions) -> bool {
-        self.enable_pruning
-            .unwrap_or(config_options.execution.parquet.pruning)
+    pub fn enable_pruning(&self) -> bool {
+        self.options.global.pruning
     }
 
     /// Provide a hint to the size of the file metadata. If a hint is provided
@@ -130,14 +120,13 @@ impl ParquetFormat {
     ///
     /// - If `None`, defaults to value on `config_options`
     pub fn with_metadata_size_hint(mut self, size_hint: Option<usize>) -> Self {
-        self.metadata_size_hint = size_hint;
+        self.options.global.metadata_size_hint = size_hint;
         self
     }
 
     /// Return the metadata size hint if set
-    pub fn metadata_size_hint(&self, config_options: &ConfigOptions) -> Option<usize> {
-        let hint = config_options.execution.parquet.metadata_size_hint;
-        self.metadata_size_hint.or(hint)
+    pub fn metadata_size_hint(&self) -> Option<usize> {
+        self.options.global.metadata_size_hint
     }
 
     /// Tell the parquet reader to skip any metadata that may be in
@@ -145,16 +134,26 @@ impl ParquetFormat {
     /// metadata.
     ///
     /// - If `None`, defaults to value on `config_options`
-    pub fn with_skip_metadata(mut self, skip_metadata: Option<bool>) -> Self {
-        self.skip_metadata = skip_metadata;
+    pub fn with_skip_metadata(mut self, skip_metadata: bool) -> Self {
+        self.options.global.skip_metadata = skip_metadata;
         self
     }
 
     /// Returns `true` if schema metadata will be cleared prior to
     /// schema merging.
-    pub fn skip_metadata(&self, config_options: &ConfigOptions) -> bool {
-        self.skip_metadata
-            .unwrap_or(config_options.execution.parquet.skip_metadata)
+    pub fn skip_metadata(&self) -> bool {
+        self.options.global.skip_metadata
+    }
+
+    /// Set Parquet options for the ParquetFormat
+    pub fn with_options(mut self, options: TableParquetOptions) -> Self {
+        self.options = options;
+        self
+    }
+
+    /// Parquet options
+    pub fn options(&self) -> &TableParquetOptions {
+        &self.options
     }
 }
 
@@ -202,7 +201,7 @@ impl FileFormat for ParquetFormat {
                 fetch_schema_with_location(
                     store.as_ref(),
                     object,
-                    self.metadata_size_hint,
+                    self.metadata_size_hint(),
                 )
             })
             .boxed() // Workaround https://github.com/rust-lang/rust/issues/64552
@@ -223,7 +222,7 @@ impl FileFormat for ParquetFormat {
             .map(|(_, schema)| schema)
             .collect::<Vec<_>>();
 
-        let schema = if self.skip_metadata(state.config_options()) {
+        let schema = if self.skip_metadata() {
             Schema::try_merge(clear_metadata(schemas))
         } else {
             Schema::try_merge(schemas)
@@ -243,7 +242,7 @@ impl FileFormat for ParquetFormat {
             store.as_ref(),
             table_schema,
             object,
-            self.metadata_size_hint,
+            self.metadata_size_hint(),
         )
         .await?;
         Ok(stats)
@@ -251,22 +250,20 @@ impl FileFormat for ParquetFormat {
 
     async fn create_physical_plan(
         &self,
-        state: &SessionState,
+        _state: &SessionState,
         conf: FileScanConfig,
         filters: Option<&Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         // If enable pruning then combine the filters to build the predicate.
         // If disable pruning then set the predicate to None, thus readers
         // will not prune data based on the statistics.
-        let predicate = self
-            .enable_pruning(state.config_options())
-            .then(|| filters.cloned())
-            .flatten();
+        let predicate = self.enable_pruning().then(|| filters.cloned()).flatten();
 
         Ok(Arc::new(ParquetExec::new(
             conf,
             predicate,
-            self.metadata_size_hint(state.config_options()),
+            self.metadata_size_hint(),
+            self.options.clone(),
         )))
     }
 
@@ -282,7 +279,7 @@ impl FileFormat for ParquetFormat {
         }
 
         let sink_schema = conf.output_schema().clone();
-        let sink = Arc::new(ParquetSink::new(conf));
+        let sink = Arc::new(ParquetSink::new(conf, self.options.clone()));
 
         Ok(Arc::new(FileSinkExec::new(
             input,
@@ -541,6 +538,11 @@ async fn fetch_statistics(
 pub struct ParquetSink {
     /// Config options for writing data
     config: FileSinkConfig,
+    ///
+    parquet_options: TableParquetOptions,
+    /// File metadata from successfully produced parquet files. The Mutex is only used
+    /// to allow inserting to HashMap from behind borrowed reference in DataSink::write_all.
+    written: Arc<parking_lot::Mutex<HashMap<Path, FileMetaData>>>,
 }
 
 impl Debug for ParquetSink {
@@ -563,14 +565,25 @@ impl DisplayAs for ParquetSink {
 
 impl ParquetSink {
     /// Create from config.
-    pub fn new(config: FileSinkConfig) -> Self {
-        Self { config }
+    pub fn new(config: FileSinkConfig, parquet_options: TableParquetOptions) -> Self {
+        Self {
+            config,
+            parquet_options,
+            written: Default::default(),
+        }
     }
 
     /// Retrieve the inner [`FileSinkConfig`].
     pub fn config(&self) -> &FileSinkConfig {
         &self.config
     }
+
+    /// Retrieve the file metadata for the written files, keyed to the path
+    /// which may be partitioned (in the case of hive style partitioning).
+    pub fn written(&self) -> HashMap<Path, FileMetaData> {
+        self.written.lock().clone()
+    }
+
     /// Converts table schema to writer schema, which may differ in the case
     /// of hive style partitioning where some columns are removed from the
     /// underlying files.
@@ -616,8 +629,14 @@ impl ParquetSink {
             PARQUET_WRITER_BUFFER_SIZE,
             Some(parquet_props),
         )?;
+
         Ok(writer)
     }
+
+    /// Parquet options
+    pub fn parquet_options(&self) -> &TableParquetOptions {
+        &self.parquet_options
+    }
 }
 
 #[async_trait]
@@ -635,18 +654,15 @@ impl DataSink for ParquetSink {
         data: SendableRecordBatchStream,
         context: &Arc<TaskContext>,
     ) -> Result<u64> {
-        let parquet_props = self
-            .config
-            .file_type_writer_options
-            .try_into_parquet()?
-            .writer_options();
+        let parquet_props = ParquetWriterOptions::try_from(&self.parquet_options)?;
 
         let object_store = context
             .runtime_env()
             .object_store(&self.config.object_store_url)?;
 
-        let parquet_opts = &context.session_config().options().execution.parquet;
-        let allow_single_file_parallelism = parquet_opts.allow_single_file_parallelism;
+        let parquet_opts = &self.parquet_options;
+        let allow_single_file_parallelism =
+            parquet_opts.global.allow_single_file_parallelism;
 
         let part_col = if !self.config.table_partition_cols.is_empty() {
             Some(self.config.table_partition_cols.clone())
@@ -655,8 +671,11 @@ impl DataSink for ParquetSink {
         };
 
         let parallel_options = ParallelParquetWriterOptions {
-            max_parallel_row_groups: parquet_opts.maximum_parallel_row_group_writers,
+            max_parallel_row_groups: parquet_opts
+                .global
+                .maximum_parallel_row_group_writers,
             max_buffered_record_batches_per_stream: parquet_opts
+                .global
                 .maximum_buffered_record_batches_per_stream,
         };
 
@@ -668,25 +687,28 @@ impl DataSink for ParquetSink {
             "parquet".into(),
         );
 
-        let mut file_write_tasks: JoinSet<std::result::Result<usize, DataFusionError>> =
-            JoinSet::new();
+        let mut file_write_tasks: JoinSet<
+            std::result::Result<(Path, FileMetaData), DataFusionError>,
+        > = JoinSet::new();
+
         while let Some((path, mut rx)) = file_stream_rx.recv().await {
             if !allow_single_file_parallelism {
                 let mut writer = self
                     .create_async_arrow_writer(
                         &path,
                         object_store.clone(),
-                        parquet_props.clone(),
+                        parquet_props.writer_options().clone(),
                     )
                     .await?;
                 file_write_tasks.spawn(async move {
-                    let mut row_count = 0;
                     while let Some(batch) = rx.recv().await {
-                        row_count += batch.num_rows();
                         writer.write(&batch).await?;
                     }
-                    writer.close().await?;
-                    Ok(row_count)
+                    let file_metadata = writer
+                        .close()
+                        .await
+                        .map_err(DataFusionError::ParquetError)?;
+                    Ok((path, file_metadata))
                 });
             } else {
                 let writer = create_writer(
@@ -701,14 +723,15 @@ impl DataSink for ParquetSink {
                 let props = parquet_props.clone();
                 let parallel_options_clone = parallel_options.clone();
                 file_write_tasks.spawn(async move {
-                    output_single_parquet_file_parallelized(
+                    let file_metadata = output_single_parquet_file_parallelized(
                         writer,
                         rx,
                         schema,
-                        &props,
+                        props.writer_options(),
                         parallel_options_clone,
                     )
-                    .await
+                    .await?;
+                    Ok((path, file_metadata))
                 });
             }
         }
@@ -717,7 +740,13 @@ impl DataSink for ParquetSink {
         while let Some(result) = file_write_tasks.join_next().await {
             match result {
                 Ok(r) => {
-                    row_count += r?;
+                    let (path, file_metadata) = r?;
+                    row_count += file_metadata.num_rows;
+                    let mut written_files = self.written.lock();
+                    written_files
+                        .try_insert(path.clone(), file_metadata)
+                        .map_err(|e| internal_datafusion_err!("duplicate entry detected for partitioned file {path}: {e}"))?;
+                    drop(written_files);
                 }
                 Err(e) => {
                     if e.is_panic() {
@@ -919,7 +948,7 @@ async fn concatenate_parallel_row_groups(
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
     mut object_store_writer: AbortableWrite<Box<dyn AsyncWrite + Send + Unpin>>,
-) -> Result<usize> {
+) -> Result<FileMetaData> {
     let merged_buff = SharedBuffer::new(INITIAL_BUFFER_BYTES);
 
     let schema_desc = arrow_to_parquet_schema(schema.as_ref())?;
@@ -929,13 +958,10 @@ async fn concatenate_parallel_row_groups(
         writer_props,
     )?;
 
-    let mut row_count = 0;
-
     while let Some(task) = serialize_rx.recv().await {
         let result = task.join_unwind().await;
         let mut rg_out = parquet_writer.next_row_group()?;
-        let (serialized_columns, cnt) = result?;
-        row_count += cnt;
+        let (serialized_columns, _cnt) = result?;
         for chunk in serialized_columns {
             chunk.append_to_row_group(&mut rg_out)?;
             let mut buff_to_flush = merged_buff.buffer.try_lock().unwrap();
@@ -949,13 +975,13 @@ async fn concatenate_parallel_row_groups(
         rg_out.close()?;
     }
 
-    let inner_writer = parquet_writer.into_inner()?;
-    let final_buff = inner_writer.buffer.try_lock().unwrap();
+    let file_metadata = parquet_writer.close()?;
+    let final_buff = merged_buff.buffer.try_lock().unwrap();
 
     object_store_writer.write_all(final_buff.as_slice()).await?;
     object_store_writer.shutdown().await?;
 
-    Ok(row_count)
+    Ok(file_metadata)
 }
 
 /// Parallelizes the serialization of a single parquet file, by first serializing N
@@ -968,7 +994,7 @@ async fn output_single_parquet_file_parallelized(
     output_schema: Arc<Schema>,
     parquet_props: &WriterProperties,
     parallel_options: ParallelParquetWriterOptions,
-) -> Result<usize> {
+) -> Result<FileMetaData> {
     let max_rowgroups = parallel_options.max_parallel_row_groups;
     // Buffer size of this channel limits maximum number of RowGroups being worked on in parallel
     let (serialize_tx, serialize_rx) =
@@ -982,7 +1008,7 @@ async fn output_single_parquet_file_parallelized(
         arc_props.clone(),
         parallel_options,
     );
-    let row_count = concatenate_parallel_row_groups(
+    let file_metadata = concatenate_parallel_row_groups(
         serialize_rx,
         output_schema.clone(),
         arc_props.clone(),
@@ -991,14 +1017,16 @@ async fn output_single_parquet_file_parallelized(
     .await?;
 
     launch_serialization_task.join_unwind().await?;
-    Ok(row_count)
+    Ok(file_metadata)
 }
 
 #[cfg(test)]
 pub(crate) mod test_util {
     use super::*;
     use crate::test::object_store::local_unpartitioned_file;
+
     use arrow::record_batch::RecordBatch;
+
     use parquet::arrow::ArrowWriter;
     use parquet::file::properties::WriterProperties;
     use tempfile::NamedTempFile;
@@ -1077,6 +1105,7 @@ pub(crate) mod test_util {
 #[cfg(test)]
 mod tests {
     use super::super::test_util::scan_format;
+    use crate::datasource::listing::{ListingTableUrl, PartitionedFile};
     use crate::physical_plan::collect;
     use std::fmt::{Display, Formatter};
     use std::sync::atomic::{AtomicUsize, Ordering};
@@ -1088,13 +1117,19 @@ mod tests {
     use crate::prelude::{SessionConfig, SessionContext};
     use arrow::array::{Array, ArrayRef, StringArray};
     use arrow::record_batch::RecordBatch;
+    use arrow_schema::Field;
     use async_trait::async_trait;
     use bytes::Bytes;
     use datafusion_common::cast::{
         as_binary_array, as_boolean_array, as_float32_array, as_float64_array,
         as_int32_array, as_timestamp_nanosecond_array,
     };
+    use datafusion_common::config::ParquetOptions;
+    use datafusion_common::config::TableParquetOptions;
     use datafusion_common::ScalarValue;
+    use datafusion_execution::object_store::ObjectStoreUrl;
+    use datafusion_execution::runtime_env::RuntimeEnv;
+    use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
     use futures::stream::BoxStream;
     use futures::StreamExt;
     use log::error;
@@ -1789,4 +1824,183 @@ mod tests {
         let format = ParquetFormat::default();
         scan_format(state, &format, &testdata, file_name, projection, limit).await
     }
+
+    fn build_ctx(store_url: &url::Url) -> Arc<TaskContext> {
+        let tmp_dir = tempfile::TempDir::new().unwrap();
+        let local = Arc::new(
+            LocalFileSystem::new_with_prefix(&tmp_dir)
+                .expect("should create object store"),
+        );
+
+        let mut session = SessionConfig::default();
+        let mut parquet_opts = ParquetOptions {
+            allow_single_file_parallelism: true,
+            ..Default::default()
+        };
+        parquet_opts.allow_single_file_parallelism = true;
+        session.options_mut().execution.parquet = parquet_opts;
+
+        let runtime = RuntimeEnv::default();
+        runtime
+            .object_store_registry
+            .register_store(store_url, local);
+
+        Arc::new(
+            TaskContext::default()
+                .with_session_config(session)
+                .with_runtime(Arc::new(runtime)),
+        )
+    }
+
+    #[tokio::test]
+    async fn parquet_sink_write() -> Result<()> {
+        let field_a = Field::new("a", DataType::Utf8, false);
+        let field_b = Field::new("b", DataType::Utf8, false);
+        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+        let object_store_url = ObjectStoreUrl::local_filesystem();
+
+        let file_sink_config = FileSinkConfig {
+            object_store_url: object_store_url.clone(),
+            file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)],
+            table_paths: vec![ListingTableUrl::parse("file:///")?],
+            output_schema: schema.clone(),
+            table_partition_cols: vec![],
+            overwrite: true,
+        };
+        let parquet_sink = Arc::new(ParquetSink::new(
+            file_sink_config,
+            TableParquetOptions::default(),
+        ));
+
+        // create data
+        let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"]));
+        let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"]));
+        let batch = RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap();
+
+        // write stream
+        parquet_sink
+            .write_all(
+                Box::pin(RecordBatchStreamAdapter::new(
+                    schema,
+                    futures::stream::iter(vec![Ok(batch)]),
+                )),
+                &build_ctx(object_store_url.as_ref()),
+            )
+            .await
+            .unwrap();
+
+        // assert written
+        let mut written = parquet_sink.written();
+        let written = written.drain();
+        assert_eq!(
+            written.len(),
+            1,
+            "expected a single parquet files to be written, instead found {}",
+            written.len()
+        );
+
+        // check the file metadata
+        let (
+            path,
+            FileMetaData {
+                num_rows, schema, ..
+            },
+        ) = written.take(1).next().unwrap();
+        let path_parts = path.parts().collect::<Vec<_>>();
+        assert_eq!(path_parts.len(), 1, "should not have path prefix");
+
+        assert_eq!(num_rows, 2, "file metdata to have 2 rows");
+        assert!(
+            schema.iter().any(|col_schema| col_schema.name == "a"),
+            "output file metadata should contain col a"
+        );
+        assert!(
+            schema.iter().any(|col_schema| col_schema.name == "b"),
+            "output file metadata should contain col b"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn parquet_sink_write_partitions() -> Result<()> {
+        let field_a = Field::new("a", DataType::Utf8, false);
+        let field_b = Field::new("b", DataType::Utf8, false);
+        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+        let object_store_url = ObjectStoreUrl::local_filesystem();
+
+        // set file config to include partitioning on field_a
+        let file_sink_config = FileSinkConfig {
+            object_store_url: object_store_url.clone(),
+            file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)],
+            table_paths: vec![ListingTableUrl::parse("file:///")?],
+            output_schema: schema.clone(),
+            table_partition_cols: vec![("a".to_string(), DataType::Utf8)], // add partitioning
+            overwrite: true,
+        };
+        let parquet_sink = Arc::new(ParquetSink::new(
+            file_sink_config,
+            TableParquetOptions::default(),
+        ));
+
+        // create data with 2 partitions
+        let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"]));
+        let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"]));
+        let batch = RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap();
+
+        // write stream
+        parquet_sink
+            .write_all(
+                Box::pin(RecordBatchStreamAdapter::new(
+                    schema,
+                    futures::stream::iter(vec![Ok(batch)]),
+                )),
+                &build_ctx(object_store_url.as_ref()),
+            )
+            .await
+            .unwrap();
+
+        // assert written
+        let mut written = parquet_sink.written();
+        let written = written.drain();
+        assert_eq!(
+            written.len(),
+            2,
+            "expected two parquet files to be written, instead found {}",
+            written.len()
+        );
+
+        // check the file metadata includes partitions
+        let mut expected_partitions = std::collections::HashSet::from(["a=foo", "a=bar"]);
+        for (
+            path,
+            FileMetaData {
+                num_rows, schema, ..
+            },
+        ) in written.take(2)
+        {
+            let path_parts = path.parts().collect::<Vec<_>>();
+            assert_eq!(path_parts.len(), 2, "should have path prefix");
+
+            let prefix = path_parts[0].as_ref();
+            assert!(
+                expected_partitions.contains(prefix),
+                "expected path prefix to match partition, instead found {:?}",
+                prefix
+            );
+            expected_partitions.remove(prefix);
+
+            assert_eq!(num_rows, 1, "file metdata to have 1 row");
+            assert!(
+                !schema.iter().any(|col_schema| col_schema.name == "a"),
+                "output file metadata will not contain partitioned col a"
+            );
+            assert!(
+                schema.iter().any(|col_schema| col_schema.name == "b"),
+                "output file metadata should contain col b"
+            );
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 88476ffb0966..2a2551236e1b 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -51,8 +51,7 @@ use crate::{
 use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef};
 use arrow_schema::Schema;
 use datafusion_common::{
-    internal_err, plan_err, project_schema, Constraints, FileType, FileTypeWriterOptions,
-    SchemaExt, ToDFSchema,
+    internal_err, plan_err, project_schema, Constraints, FileType, SchemaExt, ToDFSchema,
 };
 use datafusion_execution::cache::cache_manager::FileStatisticsCache;
 use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
@@ -247,9 +246,6 @@ pub struct ListingOptions {
     ///       multiple equivalent orderings, the outer `Vec` will have a
     ///       single element.
     pub file_sort_order: Vec<Vec<Expr>>,
-    /// This setting holds file format specific options which should be used
-    /// when inserting into this table.
-    pub file_type_write_options: Option<FileTypeWriterOptions>,
 }
 
 impl ListingOptions {
@@ -267,7 +263,6 @@ impl ListingOptions {
             collect_stat: true,
             target_partitions: 1,
             file_sort_order: vec![],
-            file_type_write_options: None,
         }
     }
 
@@ -418,15 +413,6 @@ impl ListingOptions {
         self
     }
 
-    /// Configure file format specific writing options.
-    pub fn with_write_options(
-        mut self,
-        file_type_write_options: FileTypeWriterOptions,
-    ) -> Self {
-        self.file_type_write_options = Some(file_type_write_options);
-        self
-    }
-
     /// Infer the schema of the files at the given path on the provided object store.
     /// The inferred schema does not include the partitioning columns.
     ///
@@ -760,15 +746,6 @@ impl TableProvider for ListingTable {
         .await?;
 
         let file_groups = file_list_stream.try_collect::<Vec<_>>().await?;
-        let file_format = self.options().format.as_ref();
-
-        let file_type_writer_options = match &self.options().file_type_write_options {
-            Some(opt) => opt.clone(),
-            None => FileTypeWriterOptions::build_default(
-                &file_format.file_type(),
-                state.config_options(),
-            )?,
-        };
 
         // Sink related option, apart from format
         let config = FileSinkConfig {
@@ -778,7 +755,6 @@ impl TableProvider for ListingTable {
             output_schema: self.schema(),
             table_partition_cols: self.options.table_partition_cols.clone(),
             overwrite,
-            file_type_writer_options,
         };
 
         let unsorted: Vec<Vec<Expr>> = vec![];
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index bcf1f81b3a0b..4e126bbba9f9 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -24,8 +24,7 @@ use std::sync::Arc;
 #[cfg(feature = "parquet")]
 use crate::datasource::file_format::parquet::ParquetFormat;
 use crate::datasource::file_format::{
-    arrow::ArrowFormat, avro::AvroFormat, csv::CsvFormat,
-    file_compression_type::FileCompressionType, json::JsonFormat, FileFormat,
+    arrow::ArrowFormat, avro::AvroFormat, csv::CsvFormat, json::JsonFormat, FileFormat,
 };
 use crate::datasource::listing::{
     ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
@@ -35,7 +34,7 @@ use crate::datasource::TableProvider;
 use crate::execution::context::SessionState;
 
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::file_options::{FileTypeWriterOptions, StatementOptions};
+use datafusion_common::config::TableOptions;
 use datafusion_common::{arrow_datafusion_err, DataFusionError, FileType};
 use datafusion_expr::CreateExternalTable;
 
@@ -59,34 +58,32 @@ impl TableProviderFactory for ListingTableFactory {
         state: &SessionState,
         cmd: &CreateExternalTable,
     ) -> datafusion_common::Result<Arc<dyn TableProvider>> {
-        let file_compression_type = FileCompressionType::from(cmd.file_compression_type);
+        let mut table_options =
+            TableOptions::default_from_session_config(state.config_options());
         let file_type = FileType::from_str(cmd.file_type.as_str()).map_err(|_| {
             DataFusionError::Execution(format!("Unknown FileType {}", cmd.file_type))
         })?;
-
+        table_options.set_file_format(file_type.clone());
+        table_options.alter_with_string_hash_map(&cmd.options)?;
         let file_extension = get_extension(cmd.location.as_str());
-
         let file_format: Arc<dyn FileFormat> = match file_type {
             FileType::CSV => {
-                let mut statement_options = StatementOptions::from(&cmd.options);
-                let mut csv_format = CsvFormat::default()
-                    .with_has_header(cmd.has_header)
-                    .with_delimiter(cmd.delimiter as u8)
-                    .with_file_compression_type(file_compression_type);
-                if let Some(quote) = statement_options.take_str_option("quote") {
-                    csv_format = csv_format.with_quote(quote.as_bytes()[0])
-                }
-                if let Some(escape) = statement_options.take_str_option("escape") {
-                    csv_format = csv_format.with_escape(Some(escape.as_bytes()[0]))
-                }
-                Arc::new(csv_format)
+                let mut csv_options = table_options.csv;
+                csv_options.has_header = cmd.has_header;
+                csv_options.delimiter = cmd.delimiter as u8;
+                csv_options.compression = cmd.file_compression_type;
+                Arc::new(CsvFormat::default().with_options(csv_options))
             }
             #[cfg(feature = "parquet")]
-            FileType::PARQUET => Arc::new(ParquetFormat::default()),
+            FileType::PARQUET => {
+                Arc::new(ParquetFormat::default().with_options(table_options.parquet))
+            }
             FileType::AVRO => Arc::new(AvroFormat),
-            FileType::JSON => Arc::new(
-                JsonFormat::default().with_file_compression_type(file_compression_type),
-            ),
+            FileType::JSON => {
+                let mut json_options = table_options.json;
+                json_options.compression = cmd.file_compression_type;
+                Arc::new(JsonFormat::default().with_options(json_options))
+            }
             FileType::ARROW => Arc::new(ArrowFormat),
         };
 
@@ -133,48 +130,6 @@ impl TableProviderFactory for ListingTableFactory {
             (Some(schema), table_partition_cols)
         };
 
-        let mut statement_options = StatementOptions::from(&cmd.options);
-
-        statement_options.take_str_option("unbounded");
-
-        let file_type = file_format.file_type();
-
-        // Use remaining options and session state to build FileTypeWriterOptions
-        let file_type_writer_options = FileTypeWriterOptions::build(
-            &file_type,
-            state.config_options(),
-            &statement_options,
-        )?;
-
-        // Some options have special syntax which takes precedence
-        // e.g. "WITH HEADER ROW" overrides (header false, ...)
-        let file_type_writer_options = match file_type {
-            FileType::CSV => {
-                let mut csv_writer_options =
-                    file_type_writer_options.try_into_csv()?.clone();
-                csv_writer_options.writer_options = csv_writer_options
-                    .writer_options
-                    .with_header(cmd.has_header)
-                    .with_delimiter(cmd.delimiter.try_into().map_err(|_| {
-                        DataFusionError::Internal(
-                            "Unable to convert CSV delimiter into u8".into(),
-                        )
-                    })?);
-                csv_writer_options.compression = cmd.file_compression_type;
-                FileTypeWriterOptions::CSV(csv_writer_options)
-            }
-            FileType::JSON => {
-                let mut json_writer_options =
-                    file_type_writer_options.try_into_json()?.clone();
-                json_writer_options.compression = cmd.file_compression_type;
-                FileTypeWriterOptions::JSON(json_writer_options)
-            }
-            #[cfg(feature = "parquet")]
-            FileType::PARQUET => file_type_writer_options,
-            FileType::ARROW => file_type_writer_options,
-            FileType::AVRO => file_type_writer_options,
-        };
-
         let table_path = ListingTableUrl::parse(&cmd.location)?;
 
         let options = ListingOptions::new(file_format)
@@ -182,8 +137,7 @@ impl TableProviderFactory for ListingTableFactory {
             .with_file_extension(file_extension)
             .with_target_partitions(state.config().target_partitions())
             .with_table_partition_cols(table_partition_cols)
-            .with_file_sort_order(cmd.order_exprs.clone())
-            .with_write_options(file_type_writer_options);
+            .with_file_sort_order(cmd.order_exprs.clone());
 
         let resolved_schema = match provided_schema {
             None => options.infer_schema(state, &table_path).await?,
@@ -258,4 +212,50 @@ mod tests {
         let listing_options = listing_table.options();
         assert_eq!(".tbl", listing_options.file_extension);
     }
+
+    #[tokio::test]
+    async fn test_create_using_non_std_file_ext_csv_options() {
+        let csv_file = tempfile::Builder::new()
+            .prefix("foo")
+            .suffix(".tbl")
+            .tempfile()
+            .unwrap();
+
+        let factory = ListingTableFactory::new();
+        let context = SessionContext::new();
+        let state = context.state();
+        let name = OwnedTableReference::bare("foo".to_string());
+
+        let mut options = HashMap::new();
+        options.insert("csv.schema_infer_max_rec".to_owned(), "1000".to_owned());
+        let cmd = CreateExternalTable {
+            name,
+            location: csv_file.path().to_str().unwrap().to_string(),
+            file_type: "csv".to_string(),
+            has_header: true,
+            delimiter: ',',
+            schema: Arc::new(DFSchema::empty()),
+            table_partition_cols: vec![],
+            if_not_exists: false,
+            file_compression_type: CompressionTypeVariant::UNCOMPRESSED,
+            definition: None,
+            order_exprs: vec![],
+            unbounded: false,
+            options,
+            constraints: Constraints::empty(),
+            column_defaults: HashMap::new(),
+        };
+        let table_provider = factory.create(&state, &cmd).await.unwrap();
+        let listing_table = table_provider
+            .as_any()
+            .downcast_ref::<ListingTable>()
+            .unwrap();
+
+        let format = listing_table.options().format.clone();
+        let csv_format = format.as_any().downcast_ref::<CsvFormat>().unwrap();
+        let csv_options = csv_format.options().clone();
+        assert_eq!(csv_options.schema_infer_max_rec, 1000);
+        let listing_options = listing_table.options();
+        assert_eq!(".tbl", listing_options.file_extension);
+    }
 }
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index ca466b5c6a92..068426e0fdcb 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -756,7 +756,7 @@ mod tests {
         let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/";
         let out_dir_url = "file://local/out/";
         let df = ctx.sql("SELECT a, b FROM test").await?;
-        df.write_json(out_dir_url, DataFrameWriteOptions::new())
+        df.write_json(out_dir_url, DataFrameWriteOptions::new(), None)
             .await?;
 
         // create a new context and verify that the results were saved to a partitioned csv file
@@ -850,7 +850,7 @@ mod tests {
         let df = ctx.read_csv("tests/data/corrupt.csv", options).await?;
         let out_dir_url = "file://local/out";
         let e = df
-            .write_json(out_dir_url, DataFrameWriteOptions::new())
+            .write_json(out_dir_url, DataFrameWriteOptions::new(), None)
             .await
             .expect_err("should fail because input file does not match inferred schema");
         assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value d for column 0 at line 4");
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index 08f1cc9f2726..ddb8d032f3d8 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -26,16 +26,16 @@ mod file_stream;
 mod json;
 #[cfg(feature = "parquet")]
 pub mod parquet;
-pub use file_groups::FileGroupPartitioner;
 
 pub(crate) use self::csv::plan_to_csv;
-pub use self::csv::{CsvConfig, CsvExec, CsvOpener};
 pub(crate) use self::json::plan_to_json;
 #[cfg(feature = "parquet")]
 pub use self::parquet::{ParquetExec, ParquetFileMetrics, ParquetFileReaderFactory};
 
 pub use arrow_file::ArrowExec;
 pub use avro::AvroExec;
+pub use csv::{CsvConfig, CsvExec, CsvOpener};
+pub use file_groups::FileGroupPartitioner;
 pub use file_scan_config::{
     wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig,
 };
@@ -66,7 +66,7 @@ use arrow::{
     datatypes::{DataType, Schema, SchemaRef},
     record_batch::{RecordBatch, RecordBatchOptions},
 };
-use datafusion_common::{file_options::FileTypeWriterOptions, plan_err};
+use datafusion_common::plan_err;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalSortExpr;
 
@@ -90,8 +90,6 @@ pub struct FileSinkConfig {
     pub table_partition_cols: Vec<(String, DataType)>,
     /// Controls whether existing data should be overwritten by this sink
     pub overwrite: bool,
-    /// Contains settings specific to writing a given FileType, e.g. parquet max_row_group_size
-    pub file_type_writer_options: FileTypeWriterOptions,
 }
 
 impl FileSinkConfig {
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 2f3b151e7763..2cfbb578da66 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -31,7 +31,7 @@ use crate::datasource::physical_plan::{
     FileMeta, FileScanConfig, SchemaAdapter,
 };
 use crate::{
-    config::ConfigOptions,
+    config::{ConfigOptions, TableParquetOptions},
     datasource::listing::ListingTableUrl,
     error::{DataFusionError, Result},
     execution::context::TaskContext,
@@ -73,18 +73,6 @@ pub use metrics::ParquetFileMetrics;
 /// Execution plan for scanning one or more Parquet partitions
 #[derive(Debug, Clone)]
 pub struct ParquetExec {
-    /// Override for `Self::with_pushdown_filters`. If None, uses
-    /// values from base_config
-    pushdown_filters: Option<bool>,
-    /// Override for `Self::with_reorder_filters`. If None, uses
-    /// values from base_config
-    reorder_filters: Option<bool>,
-    /// Override for `Self::with_enable_page_index`. If None, uses
-    /// values from base_config
-    enable_page_index: Option<bool>,
-    /// Override for `Self::with_enable_bloom_filter`. If None, uses
-    /// values from base_config
-    enable_bloom_filter: Option<bool>,
     /// Base configuration for this scan
     base_config: FileScanConfig,
     projected_statistics: Statistics,
@@ -101,6 +89,8 @@ pub struct ParquetExec {
     /// Optional user defined parquet file reader factory
     parquet_file_reader_factory: Option<Arc<dyn ParquetFileReaderFactory>>,
     cache: PlanProperties,
+    /// Parquet Options
+    parquet_options: TableParquetOptions,
 }
 
 impl ParquetExec {
@@ -109,6 +99,7 @@ impl ParquetExec {
         base_config: FileScanConfig,
         predicate: Option<Arc<dyn PhysicalExpr>>,
         metadata_size_hint: Option<usize>,
+        parquet_options: TableParquetOptions,
     ) -> Self {
         debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate: {:?}, limit: {:?}",
         base_config.file_groups, base_config.projection, predicate, base_config.limit);
@@ -154,10 +145,6 @@ impl ParquetExec {
             &base_config,
         );
         Self {
-            pushdown_filters: None,
-            reorder_filters: None,
-            enable_page_index: None,
-            enable_bloom_filter: None,
             base_config,
             projected_statistics,
             metrics,
@@ -167,6 +154,7 @@ impl ParquetExec {
             metadata_size_hint,
             parquet_file_reader_factory: None,
             cache,
+            parquet_options,
         }
     }
 
@@ -208,14 +196,13 @@ impl ParquetExec {
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_pushdown_filters(mut self, pushdown_filters: bool) -> Self {
-        self.pushdown_filters = Some(pushdown_filters);
+        self.parquet_options.global.pushdown_filters = pushdown_filters;
         self
     }
 
     /// Return the value described in [`Self::with_pushdown_filters`]
-    fn pushdown_filters(&self, config_options: &ConfigOptions) -> bool {
-        self.pushdown_filters
-            .unwrap_or(config_options.execution.parquet.pushdown_filters)
+    fn pushdown_filters(&self) -> bool {
+        self.parquet_options.global.pushdown_filters
     }
 
     /// If true, the `RowFilter` made by `pushdown_filters` may try to
@@ -225,14 +212,13 @@ impl ParquetExec {
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_reorder_filters(mut self, reorder_filters: bool) -> Self {
-        self.reorder_filters = Some(reorder_filters);
+        self.parquet_options.global.reorder_filters = reorder_filters;
         self
     }
 
     /// Return the value described in [`Self::with_reorder_filters`]
-    fn reorder_filters(&self, config_options: &ConfigOptions) -> bool {
-        self.reorder_filters
-            .unwrap_or(config_options.execution.parquet.reorder_filters)
+    fn reorder_filters(&self) -> bool {
+        self.parquet_options.global.reorder_filters
     }
 
     /// If enabled, the reader will read the page index
@@ -240,26 +226,24 @@ impl ParquetExec {
     /// via `RowSelector` and `RowFilter` by
     /// eliminating unnecessary IO and decoding
     pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
-        self.enable_page_index = Some(enable_page_index);
+        self.parquet_options.global.enable_page_index = enable_page_index;
         self
     }
 
     /// Return the value described in [`Self::with_enable_page_index`]
-    fn enable_page_index(&self, config_options: &ConfigOptions) -> bool {
-        self.enable_page_index
-            .unwrap_or(config_options.execution.parquet.enable_page_index)
+    fn enable_page_index(&self) -> bool {
+        self.parquet_options.global.enable_page_index
     }
 
     /// If enabled, the reader will read by the bloom filter
     pub fn with_enable_bloom_filter(mut self, enable_bloom_filter: bool) -> Self {
-        self.enable_bloom_filter = Some(enable_bloom_filter);
+        self.parquet_options.global.bloom_filter_enabled = enable_bloom_filter;
         self
     }
 
     /// Return the value described in [`Self::with_enable_bloom_filter`]
-    fn enable_bloom_filter(&self, config_options: &ConfigOptions) -> bool {
-        self.enable_bloom_filter
-            .unwrap_or(config_options.execution.parquet.bloom_filter_enabled)
+    fn enable_bloom_filter(&self) -> bool {
+        self.parquet_options.global.bloom_filter_enabled
     }
 
     fn output_partitioning_helper(file_config: &FileScanConfig) -> Partitioning {
@@ -397,8 +381,6 @@ impl ExecutionPlan for ParquetExec {
                     })
             })?;
 
-        let config_options = ctx.session_config().options();
-
         let opener = ParquetOpener {
             partition_index,
             projection: Arc::from(projection),
@@ -411,10 +393,10 @@ impl ExecutionPlan for ParquetExec {
             metadata_size_hint: self.metadata_size_hint,
             metrics: self.metrics.clone(),
             parquet_file_reader_factory,
-            pushdown_filters: self.pushdown_filters(config_options),
-            reorder_filters: self.reorder_filters(config_options),
-            enable_page_index: self.enable_page_index(config_options),
-            enable_bloom_filter: self.enable_bloom_filter(config_options),
+            pushdown_filters: self.pushdown_filters(),
+            reorder_filters: self.reorder_filters(),
+            enable_page_index: self.enable_page_index(),
+            enable_bloom_filter: self.enable_bloom_filter(),
         };
 
         let stream =
@@ -917,6 +899,7 @@ mod tests {
                 },
                 predicate,
                 None,
+                Default::default(),
             );
 
             if pushdown_predicate {
@@ -1573,6 +1556,7 @@ mod tests {
                 },
                 None,
                 None,
+                Default::default(),
             );
             assert_eq!(
                 parquet_exec
@@ -1693,6 +1677,7 @@ mod tests {
             },
             None,
             None,
+            Default::default(),
         );
         assert_eq!(
             parquet_exec.cache.output_partitioning().partition_count(),
@@ -1759,6 +1744,7 @@ mod tests {
             },
             None,
             None,
+            Default::default(),
         );
 
         let mut results = parquet_exec.execute(0, state.task_ctx())?;
@@ -2021,7 +2007,7 @@ mod tests {
         ctx.runtime_env().register_object_store(&local_url, local);
 
         // Configure listing options
-        let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
+        let file_format = ParquetFormat::default().with_enable_pruning(true);
         let listing_options = ListingOptions::new(Arc::new(file_format))
             .with_file_extension(FileType::PARQUET.get_ext());
 
diff --git a/datafusion/core/src/datasource/provider.rs b/datafusion/core/src/datasource/provider.rs
index e769084df636..f2e3e907e5ce 100644
--- a/datafusion/core/src/datasource/provider.rs
+++ b/datafusion/core/src/datasource/provider.rs
@@ -166,6 +166,7 @@ pub trait TableProvider: Sync + Send {
 
     /// Tests whether the table provider can make use of any or all filter expressions
     /// to optimise data retrieval.
+    /// Note:  the returned vector much have the same size as the filters argument.
     #[allow(deprecated)]
     fn supports_filters_pushdown(
         &self,
diff --git a/datafusion/core/src/execution/context/avro.rs b/datafusion/core/src/execution/context/avro.rs
index d60e79862ef2..1eca3b133757 100644
--- a/datafusion/core/src/execution/context/avro.rs
+++ b/datafusion/core/src/execution/context/avro.rs
@@ -43,7 +43,8 @@ impl SessionContext {
         table_path: &str,
         options: AvroReadOptions<'_>,
     ) -> Result<()> {
-        let listing_options = options.to_listing_options(&self.copied_config());
+        let listing_options = options
+            .to_listing_options(&self.copied_config(), self.copied_table_options());
 
         self.register_listing_table(
             name,
@@ -60,6 +61,7 @@ impl SessionContext {
 #[cfg(test)]
 mod tests {
     use super::*;
+
     use async_trait::async_trait;
 
     // Test for compilation error when calling read_* functions from an #[async_trait] function.
diff --git a/datafusion/core/src/execution/context/csv.rs b/datafusion/core/src/execution/context/csv.rs
index f3675422c7d5..f59d77664645 100644
--- a/datafusion/core/src/execution/context/csv.rs
+++ b/datafusion/core/src/execution/context/csv.rs
@@ -59,7 +59,8 @@ impl SessionContext {
         table_path: &str,
         options: CsvReadOptions<'_>,
     ) -> Result<()> {
-        let listing_options = options.to_listing_options(&self.copied_config());
+        let listing_options = options
+            .to_listing_options(&self.copied_config(), self.copied_table_options());
 
         self.register_listing_table(
             name,
@@ -88,6 +89,7 @@ mod tests {
     use super::*;
     use crate::assert_batches_eq;
     use crate::test_util::{plan_and_collect, populate_csv_partitions};
+
     use async_trait::async_trait;
     use tempfile::TempDir;
 
diff --git a/datafusion/core/src/execution/context/json.rs b/datafusion/core/src/execution/context/json.rs
index f67693aa8f31..c21e32cfdefb 100644
--- a/datafusion/core/src/execution/context/json.rs
+++ b/datafusion/core/src/execution/context/json.rs
@@ -45,7 +45,8 @@ impl SessionContext {
         table_path: &str,
         options: NdJsonReadOptions<'_>,
     ) -> Result<()> {
-        let listing_options = options.to_listing_options(&self.copied_config());
+        let listing_options = options
+            .to_listing_options(&self.copied_config(), self.copied_table_options());
 
         self.register_listing_table(
             name,
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index dc4e39d37c5f..8bc65a0ca2cc 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -17,109 +17,85 @@
 
 //! [`SessionContext`] contains methods for registering data sources and executing queries
 
-mod avro;
-mod csv;
-mod json;
-#[cfg(feature = "parquet")]
-mod parquet;
+use std::collections::{hash_map::Entry, HashMap, HashSet};
+use std::fmt::Debug;
+use std::ops::ControlFlow;
+use std::string::String;
+use std::sync::{Arc, Weak};
 
+use super::options::ReadOptions;
 use crate::{
-    catalog::{CatalogProviderList, MemoryCatalogProviderList},
+    catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA},
+    catalog::listing_schema::ListingSchemaProvider,
+    catalog::schema::{MemorySchemaProvider, SchemaProvider},
+    catalog::{
+        CatalogProvider, CatalogProviderList, MemoryCatalogProvider,
+        MemoryCatalogProviderList,
+    },
+    config::ConfigOptions,
+    dataframe::DataFrame,
     datasource::{
         cte_worktable::CteWorkTable,
         function::{TableFunction, TableFunctionImpl},
-        listing::{ListingOptions, ListingTable},
-        provider::TableProviderFactory,
+        listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
+        object_store::ObjectStoreUrl,
+        provider::{DefaultTableFactory, TableProviderFactory},
+    },
+    datasource::{provider_as_source, MemTable, TableProvider, ViewTable},
+    error::{DataFusionError, Result},
+    execution::{options::ArrowReadOptions, runtime_env::RuntimeEnv, FunctionRegistry},
+    logical_expr::{
+        CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
+        CreateMemoryTable, CreateView, DropCatalogSchema, DropFunction, DropTable,
+        DropView, Explain, LogicalPlan, LogicalPlanBuilder, PlanType, SetVariable,
+        TableSource, TableType, ToStringifiedPlan, UNNAMED_TABLE,
     },
-    datasource::{MemTable, ViewTable},
-    logical_expr::{PlanType, ToStringifiedPlan},
-    optimizer::optimizer::Optimizer,
+    optimizer::analyzer::{Analyzer, AnalyzerRule},
+    optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
     physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
+    physical_plan::{udaf::AggregateUDF, udf::ScalarUDF, ExecutionPlan},
+    physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
+    variable::{VarProvider, VarType},
 };
+
+use arrow::datatypes::{DataType, SchemaRef};
+use arrow::record_batch::RecordBatch;
 use arrow_schema::Schema;
 use datafusion_common::{
     alias::AliasGenerator,
+    config::{ConfigExtension, TableOptions},
     exec_err, not_impl_err, plan_datafusion_err, plan_err,
     tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
+    OwnedTableReference, SchemaReference,
 };
 use datafusion_execution::registry::SerializerRegistry;
-pub use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::{
     logical_plan::{DdlStatement, Statement},
+    var_provider::is_system_variables,
     Expr, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
 };
-use parking_lot::RwLock;
-use std::collections::hash_map::Entry;
-use std::string::String;
-use std::sync::Arc;
-use std::{
-    collections::{HashMap, HashSet},
-    fmt::Debug,
-};
-use std::{ops::ControlFlow, sync::Weak};
-
-use arrow::datatypes::{DataType, SchemaRef};
-use arrow::record_batch::RecordBatch;
-
-use crate::catalog::{
-    schema::{MemorySchemaProvider, SchemaProvider},
-    {CatalogProvider, MemoryCatalogProvider},
-};
-use crate::dataframe::DataFrame;
-use crate::datasource::{
-    listing::{ListingTableConfig, ListingTableUrl},
-    provider_as_source, TableProvider,
-};
-use crate::error::{DataFusionError, Result};
-use crate::logical_expr::{
-    CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
-    CreateMemoryTable, CreateView, DropCatalogSchema, DropFunction, DropTable, DropView,
-    Explain, LogicalPlan, LogicalPlanBuilder, SetVariable, TableSource, TableType,
-    UNNAMED_TABLE,
-};
-use crate::optimizer::OptimizerRule;
 use datafusion_sql::{
-    parser::{CopyToSource, CopyToStatement},
-    planner::ParserOptions,
+    parser::{CopyToSource, CopyToStatement, DFParser},
+    planner::{object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel},
     ResolvedTableReference, TableReference,
 };
-use sqlparser::dialect::dialect_from_str;
 
-use crate::config::ConfigOptions;
-use crate::execution::{runtime_env::RuntimeEnv, FunctionRegistry};
-use crate::physical_plan::udaf::AggregateUDF;
-use crate::physical_plan::udf::ScalarUDF;
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_planner::DefaultPhysicalPlanner;
-use crate::physical_planner::PhysicalPlanner;
-use crate::variable::{VarProvider, VarType};
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
-use datafusion_common::{OwnedTableReference, SchemaReference};
-use datafusion_sql::{
-    parser::DFParser,
-    planner::{ContextProvider, SqlToRel},
-};
+use parking_lot::RwLock;
+use sqlparser::dialect::dialect_from_str;
 use url::Url;
-
-use crate::catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA};
-use crate::catalog::listing_schema::ListingSchemaProvider;
-use crate::datasource::object_store::ObjectStoreUrl;
-use datafusion_optimizer::{
-    analyzer::{Analyzer, AnalyzerRule},
-    OptimizerConfig,
-};
-use datafusion_sql::planner::object_name_to_table_reference;
 use uuid::Uuid;
 
-// backwards compatibility
-use crate::datasource::provider::DefaultTableFactory;
-use crate::execution::options::ArrowReadOptions;
 pub use datafusion_execution::config::SessionConfig;
 pub use datafusion_execution::TaskContext;
+pub use datafusion_expr::execution_props::ExecutionProps;
 
-use super::options::ReadOptions;
+mod avro;
+mod csv;
+mod json;
+#[cfg(feature = "parquet")]
+mod parquet;
 
 /// DataFilePaths adds a method to convert strings and vector of strings to vector of [`ListingTableUrl`] URLs.
 /// This allows methods such [`SessionContext::read_csv`] and [`SessionContext::read_avro`]
@@ -407,6 +383,11 @@ impl SessionContext {
         self.state.read().config.clone()
     }
 
+    /// Return a copied version of config for this Session
+    pub fn copied_table_options(&self) -> TableOptions {
+        self.state.read().default_table_options().clone()
+    }
+
     /// Creates a [`DataFrame`] from SQL query text.
     ///
     /// Note: This API implements DDL statements such as `CREATE TABLE` and
@@ -936,7 +917,8 @@ impl SessionContext {
     ) -> Result<DataFrame> {
         let table_paths = table_paths.to_urls()?;
         let session_config = self.copied_config();
-        let listing_options = options.to_listing_options(&session_config);
+        let listing_options =
+            options.to_listing_options(&session_config, self.copied_table_options());
 
         let option_extension = listing_options.file_extension.clone();
 
@@ -1073,7 +1055,8 @@ impl SessionContext {
         table_path: &str,
         options: ArrowReadOptions<'_>,
     ) -> Result<()> {
-        let listing_options = options.to_listing_options(&self.copied_config());
+        let listing_options = options
+            .to_listing_options(&self.copied_config(), self.copied_table_options());
 
         self.register_listing_table(
             name,
@@ -1262,6 +1245,16 @@ impl SessionContext {
     pub fn register_catalog_list(&mut self, catalog_list: Arc<dyn CatalogProviderList>) {
         self.state.write().catalog_list = catalog_list;
     }
+
+    /// Registers a [`ConfigExtension`] as a table option extention that can be
+    /// referenced from SQL statements executed against this context.
+    pub fn register_table_options_extension<T: ConfigExtension>(&self, extension: T) {
+        self.state
+            .write()
+            .table_option_namespace
+            .extensions
+            .insert(extension)
+    }
 }
 
 impl FunctionRegistry for SessionContext {
@@ -1378,6 +1371,8 @@ pub struct SessionState {
     serializer_registry: Arc<dyn SerializerRegistry>,
     /// Session configuration
     config: SessionConfig,
+    /// Table options
+    table_option_namespace: TableOptions,
     /// Execution properties
     execution_props: ExecutionProps,
     /// TableProviderFactories for different file formats.
@@ -1478,6 +1473,9 @@ impl SessionState {
             aggregate_functions: HashMap::new(),
             window_functions: HashMap::new(),
             serializer_registry: Arc::new(EmptySerializerRegistry),
+            table_option_namespace: TableOptions::default_from_session_config(
+                config.options(),
+            ),
             config,
             execution_props: ExecutionProps::new(),
             runtime_env: runtime,
@@ -1662,6 +1660,15 @@ impl SessionState {
         self
     }
 
+    /// Adds a new [`ConfigExtension`] to TableOptions
+    pub fn add_table_options_extension<T: ConfigExtension>(
+        mut self,
+        extension: T,
+    ) -> Self {
+        self.table_option_namespace.extensions.insert(extension);
+        self
+    }
+
     /// Registers a [`FunctionFactory`] to handle `CREATE FUNCTION` statements
     pub fn with_function_factory(
         mut self,
@@ -1990,6 +1997,11 @@ impl SessionState {
         self.config.options()
     }
 
+    /// return the TableOptions options with its extensions
+    pub fn default_table_options(&self) -> &TableOptions {
+        &self.table_option_namespace
+    }
+
     /// Get a new TaskContext to run in this session
     pub fn task_ctx(&self) -> Arc<TaskContext> {
         Arc::new(TaskContext::from(self))
@@ -2155,10 +2167,16 @@ impl FunctionRegistry for SessionState {
         &mut self,
         udaf: Arc<AggregateUDF>,
     ) -> Result<Option<Arc<AggregateUDF>>> {
+        udaf.aliases().iter().for_each(|alias| {
+            self.aggregate_functions.insert(alias.clone(), udaf.clone());
+        });
         Ok(self.aggregate_functions.insert(udaf.name().into(), udaf))
     }
 
     fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
+        udwf.aliases().iter().for_each(|alias| {
+            self.window_functions.insert(alias.clone(), udwf.clone());
+        });
         Ok(self.window_functions.insert(udwf.name().into(), udwf))
     }
 
@@ -2173,11 +2191,23 @@ impl FunctionRegistry for SessionState {
     }
 
     fn deregister_udaf(&mut self, name: &str) -> Result<Option<Arc<AggregateUDF>>> {
-        Ok(self.aggregate_functions.remove(name))
+        let udaf = self.aggregate_functions.remove(name);
+        if let Some(udaf) = &udaf {
+            for alias in udaf.aliases() {
+                self.aggregate_functions.remove(alias);
+            }
+        }
+        Ok(udaf)
     }
 
     fn deregister_udwf(&mut self, name: &str) -> Result<Option<Arc<WindowUDF>>> {
-        Ok(self.window_functions.remove(name))
+        let udwf = self.window_functions.remove(name);
+        if let Some(udwf) = &udwf {
+            for alias in udwf.aliases() {
+                self.window_functions.remove(alias);
+            }
+        }
+        Ok(udwf)
     }
 }
 
@@ -2332,8 +2362,11 @@ impl<'a> TreeNodeVisitor for BadPlanVisitor<'a> {
 
 #[cfg(test)]
 mod tests {
-    use super::super::options::CsvReadOptions;
-    use super::*;
+    use std::env;
+    use std::path::PathBuf;
+    use std::sync::Weak;
+
+    use super::{super::options::CsvReadOptions, *};
     use crate::assert_batches_eq;
     use crate::execution::context::QueryPlanner;
     use crate::execution::memory_pool::MemoryConsumer;
@@ -2341,12 +2374,11 @@ mod tests {
     use crate::test;
     use crate::test_util::{plan_and_collect, populate_csv_partitions};
     use crate::variable::VarType;
-    use async_trait::async_trait;
+
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::Expr;
-    use std::env;
-    use std::path::PathBuf;
-    use std::sync::Weak;
+
+    use async_trait::async_trait;
     use tempfile::TempDir;
 
     #[tokio::test]
diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs
index 7825d9b88297..528bb0fa05af 100644
--- a/datafusion/core/src/execution/context/parquet.rs
+++ b/datafusion/core/src/execution/context/parquet.rs
@@ -17,11 +17,11 @@
 
 use std::sync::Arc;
 
-use crate::datasource::physical_plan::parquet::plan_to_parquet;
-use parquet::file::properties::WriterProperties;
-
 use super::super::options::{ParquetReadOptions, ReadOptions};
 use super::{DataFilePaths, DataFrame, ExecutionPlan, Result, SessionContext};
+use crate::datasource::physical_plan::parquet::plan_to_parquet;
+
+use parquet::file::properties::WriterProperties;
 
 impl SessionContext {
     /// Creates a [`DataFrame`] for reading a Parquet data source.
@@ -46,7 +46,8 @@ impl SessionContext {
         table_path: &str,
         options: ParquetReadOptions<'_>,
     ) -> Result<()> {
-        let listing_options = options.to_listing_options(&self.state.read().config);
+        let listing_options = options
+            .to_listing_options(&self.copied_config(), self.copied_table_options());
 
         self.register_listing_table(
             name,
@@ -72,18 +73,19 @@ impl SessionContext {
 
 #[cfg(test)]
 mod tests {
-    use async_trait::async_trait;
-
+    use super::*;
     use crate::arrow::array::{Float32Array, Int32Array};
     use crate::arrow::datatypes::{DataType, Field, Schema};
     use crate::arrow::record_batch::RecordBatch;
     use crate::dataframe::DataFrameWriteOptions;
     use crate::parquet::basic::Compression;
     use crate::test_util::parquet_test_data;
+
+    use datafusion_common::config::TableParquetOptions;
     use datafusion_execution::config::SessionConfig;
-    use tempfile::tempdir;
 
-    use super::*;
+    use async_trait::async_trait;
+    use tempfile::tempdir;
 
     #[tokio::test]
     async fn read_with_glob_path() -> Result<()> {
@@ -199,17 +201,16 @@ mod tests {
             .to_string();
         std::fs::create_dir(dir).expect("create dir failed");
 
+        let mut options = TableParquetOptions::default();
+        options.global.compression = Some(Compression::SNAPPY.to_string());
+
         // Write the dataframe to a parquet file named 'output1.parquet'
         write_df
             .clone()
             .write_parquet(
                 &path1,
                 DataFrameWriteOptions::new().with_single_file_output(true),
-                Some(
-                    WriterProperties::builder()
-                        .set_compression(Compression::SNAPPY)
-                        .build(),
-                ),
+                Some(options.clone()),
             )
             .await?;
 
@@ -219,11 +220,7 @@ mod tests {
             .write_parquet(
                 &path2,
                 DataFrameWriteOptions::new().with_single_file_output(true),
-                Some(
-                    WriterProperties::builder()
-                        .set_compression(Compression::SNAPPY)
-                        .build(),
-                ),
+                Some(options.clone()),
             )
             .await?;
 
@@ -233,11 +230,7 @@ mod tests {
             .write_parquet(
                 &path3,
                 DataFrameWriteOptions::new().with_single_file_output(true),
-                Some(
-                    WriterProperties::builder()
-                        .set_compression(Compression::SNAPPY)
-                        .build(),
-                ),
+                Some(options.clone()),
             )
             .await?;
 
@@ -246,11 +239,7 @@ mod tests {
             .write_parquet(
                 &path5,
                 DataFrameWriteOptions::new().with_single_file_output(true),
-                Some(
-                    WriterProperties::builder()
-                        .set_compression(Compression::SNAPPY)
-                        .build(),
-                ),
+                Some(options),
             )
             .await?;
 
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index c45e14100e82..1cba8f025895 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -262,6 +262,7 @@ mod tests {
             },
             None,
             None,
+            Default::default(),
         ))
     }
 
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 3b4f7acf1be6..60ee1e20a9ac 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1474,6 +1474,7 @@ pub(crate) mod tests {
             },
             None,
             None,
+            Default::default(),
         ))
     }
 
@@ -1501,6 +1502,7 @@ pub(crate) mod tests {
             },
             None,
             None,
+            Default::default(),
         ))
     }
 
diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index d280726d5acd..2e6e3af5dfe2 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -287,6 +287,7 @@ pub fn parquet_exec(schema: &SchemaRef) -> Arc<ParquetExec> {
         },
         None,
         None,
+        Default::default(),
     ))
 }
 
@@ -310,6 +311,7 @@ pub fn parquet_exec_sorted(
         },
         None,
         None,
+        Default::default(),
     ))
 }
 
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 6d49287debb4..0feff860fd93 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -75,11 +75,10 @@ use arrow::datatypes::{Schema, SchemaRef};
 use arrow_array::builder::StringBuilder;
 use arrow_array::RecordBatch;
 use datafusion_common::display::ToStringifiedPlan;
-use datafusion_common::file_options::FileTypeWriterOptions;
 use datafusion_common::{
     exec_err, internal_err, not_impl_err, plan_err, DFSchema, FileType, ScalarValue,
 };
-use datafusion_expr::dml::{CopyOptions, CopyTo};
+use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
     self, AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr,
     Cast, GetFieldAccess, GetIndexedField, GroupingSet, InList, Like, TryCast,
@@ -96,6 +95,7 @@ use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
 use async_trait::async_trait;
+use datafusion_common::config::FormatOptions;
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
 use itertools::{multiunzip, Itertools};
@@ -568,9 +568,9 @@ impl DefaultPhysicalPlanner {
                 LogicalPlan::Copy(CopyTo{
                     input,
                     output_url,
-                    file_format,
-                    copy_options,
+                    format_options,
                     partition_by,
+                    options: source_option_tuples
                 }) => {
                     let input_exec = self.create_initial_plan(input, session_state).await?;
                     let parsed_url = ListingTableUrl::parse(output_url)?;
@@ -578,16 +578,6 @@ impl DefaultPhysicalPlanner {
 
                     let schema: Schema = (**input.schema()).clone().into();
 
-                    let file_type_writer_options = match copy_options{
-                        CopyOptions::SQLOptions(statement_options) => {
-                            FileTypeWriterOptions::build(
-                                file_format,
-                                session_state.config_options(),
-                                statement_options)?
-                        },
-                        CopyOptions::WriterOptions(writer_options) => *writer_options.clone()
-                    };
-
                     // Note: the DataType passed here is ignored for the purposes of writing and inferred instead
                     // from the schema of the RecordBatch being written. This allows COPY statements to specify only
                     // the column name rather than column name + explicit data type.
@@ -603,16 +593,30 @@ impl DefaultPhysicalPlanner {
                         output_schema: Arc::new(schema),
                         table_partition_cols,
                         overwrite: false,
-                        file_type_writer_options
                     };
-
-                    let sink_format: Arc<dyn FileFormat> = match file_format {
-                        FileType::CSV => Arc::new(CsvFormat::default()),
+                    let mut table_options = session_state.default_table_options().clone();
+                    let sink_format: Arc<dyn FileFormat> = match format_options {
+                        FormatOptions::CSV(options) => {
+                            table_options.csv = options.clone();
+                            table_options.set_file_format(FileType::CSV);
+                            table_options.alter_with_string_hash_map(source_option_tuples)?;
+                            Arc::new(CsvFormat::default().with_options(table_options.csv))
+                        },
+                        FormatOptions::JSON(options) => {
+                            table_options.json = options.clone();
+                            table_options.set_file_format(FileType::JSON);
+                            table_options.alter_with_string_hash_map(source_option_tuples)?;
+                            Arc::new(JsonFormat::default().with_options(table_options.json))
+                        },
                         #[cfg(feature = "parquet")]
-                        FileType::PARQUET => Arc::new(ParquetFormat::default()),
-                        FileType::JSON => Arc::new(JsonFormat::default()),
-                        FileType::AVRO => Arc::new(AvroFormat {} ),
-                        FileType::ARROW => Arc::new(ArrowFormat {}),
+                        FormatOptions::PARQUET(options) => {
+                            table_options.parquet = options.clone();
+                            table_options.set_file_format(FileType::PARQUET);
+                            table_options.alter_with_string_hash_map(source_option_tuples)?;
+                            Arc::new(ParquetFormat::default().with_options(table_options.parquet))
+                        },
+                        FormatOptions::AVRO => Arc::new(AvroFormat {} ),
+                        FormatOptions::ARROW => Arc::new(ArrowFormat {}),
                     };
 
                     sink_format.create_writer_physical_plan(input_exec, session_state, config, None).await
diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs
index 6d0711610b5a..7a466a666d8d 100644
--- a/datafusion/core/src/test_util/parquet.rs
+++ b/datafusion/core/src/test_util/parquet.rs
@@ -35,7 +35,7 @@ use crate::physical_expr::create_physical_expr;
 use crate::physical_plan::filter::FilterExec;
 use crate::physical_plan::metrics::MetricsSet;
 use crate::physical_plan::ExecutionPlan;
-use crate::prelude::{Expr, SessionConfig};
+use crate::prelude::{Expr, SessionConfig, SessionContext};
 
 use datafusion_common::Statistics;
 
@@ -141,6 +141,7 @@ impl TestParquetFile {
     /// Otherwise if `maybe_filter` is None, return just a `ParquetExec`
     pub async fn create_scan(
         &self,
+        ctx: &SessionContext,
         maybe_filter: Option<Expr>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let scan_config = FileScanConfig {
@@ -164,6 +165,7 @@ impl TestParquetFile {
         // run coercion on the filters to coerce types etc.
         let props = ExecutionProps::new();
         let context = SimplifyContext::new(&props).with_schema(df_schema.clone());
+        let parquet_options = ctx.state().default_table_options().parquet.clone();
         if let Some(filter) = maybe_filter {
             let simplifier = ExprSimplifier::new(context);
             let filter = simplifier.coerce(filter, df_schema.clone()).unwrap();
@@ -173,12 +175,18 @@ impl TestParquetFile {
                 scan_config,
                 Some(physical_filter_expr.clone()),
                 None,
+                parquet_options,
             ));
 
             let exec = Arc::new(FilterExec::try_new(physical_filter_expr, parquet_exec)?);
             Ok(exec)
         } else {
-            Ok(Arc::new(ParquetExec::new(scan_config, None, None)))
+            Ok(Arc::new(ParquetExec::new(
+                scan_config,
+                None,
+                None,
+                parquet_options,
+            )))
         }
     }
 
diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs
index e76b201e0222..4bacc80579ed 100644
--- a/datafusion/core/tests/parquet/custom_reader.rs
+++ b/datafusion/core/tests/parquet/custom_reader.rs
@@ -23,7 +23,6 @@ use std::time::SystemTime;
 use arrow::array::{ArrayRef, Int64Array, Int8Array, StringArray};
 use arrow::datatypes::{Field, Schema, SchemaBuilder};
 use arrow::record_batch::RecordBatch;
-use bytes::Bytes;
 use datafusion::assert_batches_sorted_eq;
 use datafusion::datasource::file_format::parquet::fetch_parquet_metadata;
 use datafusion::datasource::listing::PartitionedFile;
@@ -36,6 +35,7 @@ use datafusion::physical_plan::{collect, Statistics};
 use datafusion::prelude::SessionContext;
 use datafusion_common::Result;
 
+use bytes::Bytes;
 use futures::future::BoxFuture;
 use futures::{FutureExt, TryFutureExt};
 use object_store::memory::InMemory;
@@ -88,6 +88,7 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() {
         },
         None,
         None,
+        Default::default(),
     )
     .with_parquet_file_reader_factory(Arc::new(InMemoryParquetFileReaderFactory(
         Arc::clone(&in_memory_object_store),
diff --git a/datafusion/core/tests/parquet/filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs
index 64d3f45dee12..c0193fe04f04 100644
--- a/datafusion/core/tests/parquet/filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -34,6 +34,7 @@ use datafusion::prelude::{col, lit, lit_timestamp_nano, Expr, SessionContext};
 use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile};
 use datafusion_common::instant::Instant;
 use datafusion_expr::utils::{conjunction, disjunction, split_conjunction};
+
 use itertools::Itertools;
 use parquet::file::properties::WriterProperties;
 use tempfile::TempDir;
@@ -509,7 +510,7 @@ impl<'a> TestCase<'a> {
         let ctx = SessionContext::new_with_config(scan_options.config());
         let exec = self
             .test_parquet_file
-            .create_scan(Some(filter.clone()))
+            .create_scan(&ctx, Some(filter.clone()))
             .await
             .unwrap();
         let result = collect(exec.clone(), ctx.task_ctx()).await.unwrap();
diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs
index ac66d34798e4..3a43428f5bcf 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -83,6 +83,7 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr) -> ParquetExec {
         },
         Some(predicate),
         None,
+        Default::default(),
     );
     parquet_exec.with_enable_page_index(true)
 }
diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs
index 00f3eada496e..88f795d2a4fe 100644
--- a/datafusion/core/tests/parquet/schema_coercion.rs
+++ b/datafusion/core/tests/parquet/schema_coercion.rs
@@ -72,6 +72,7 @@ async fn multi_parquet_coercion() {
         },
         None,
         None,
+        Default::default(),
     );
 
     let session_ctx = SessionContext::new();
@@ -135,6 +136,7 @@ async fn multi_parquet_coercion_projection() {
         },
         None,
         None,
+        Default::default(),
     );
 
     let session_ctx = SessionContext::new();
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 9e231d25f298..3f40c55a3ed7 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -27,6 +27,7 @@ use std::sync::{
 };
 
 use datafusion::datasource::MemTable;
+use datafusion::test_util::plan_and_collect;
 use datafusion::{
     arrow::{
         array::{ArrayRef, Float64Array, TimestampNanosecondArray},
@@ -320,6 +321,42 @@ async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_user_defined_functions_with_alias() -> Result<()> {
+    let ctx = SessionContext::new();
+    let arr = Int32Array::from(vec![1]);
+    let batch = RecordBatch::try_from_iter(vec![("i", Arc::new(arr) as _)])?;
+    ctx.register_batch("t", batch).unwrap();
+
+    let my_avg = create_udaf(
+        "dummy",
+        vec![DataType::Float64],
+        Arc::new(DataType::Float64),
+        Volatility::Immutable,
+        Arc::new(|_| Ok(Box::<AvgAccumulator>::default())),
+        Arc::new(vec![DataType::UInt64, DataType::Float64]),
+    )
+    .with_aliases(vec!["dummy_alias"]);
+
+    ctx.register_udaf(my_avg);
+
+    let expected = [
+        "+------------+",
+        "| dummy(t.i) |",
+        "+------------+",
+        "| 1.0        |",
+        "+------------+",
+    ];
+
+    let result = plan_and_collect(&ctx, "SELECT dummy(i) FROM t").await?;
+    assert_batches_eq!(expected, &result);
+
+    let alias_result = plan_and_collect(&ctx, "SELECT dummy_alias(i) FROM t").await?;
+    assert_batches_eq!(expected, &alias_result);
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_groups_accumulator() -> Result<()> {
     let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
index cfd74f8861e3..3c607301fc98 100644
--- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
@@ -41,6 +41,10 @@ const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \
      odd_counter(val) OVER (PARTITION BY x ORDER BY y) \
      from t ORDER BY x, y";
 
+const UNBOUNDED_WINDOW_QUERY_WITH_ALIAS: &str = "SELECT x, y, val, \
+     odd_counter_alias(val) OVER (PARTITION BY x ORDER BY y) \
+     from t ORDER BY x, y";
+
 /// A query with a window function evaluated over a moving window
 const BOUNDED_WINDOW_QUERY:  &str  =
     "SELECT x, y, val, \
@@ -118,6 +122,35 @@ async fn test_deregister_udwf() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_udwf_with_alias() {
+    let test_state = TestState::new();
+    let TestContext { ctx, .. } = TestContext::new(test_state);
+
+    let expected = vec![
+        "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+        "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |",
+        "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+        "| 1 | a | 0   | 1                                                                                                                     |",
+        "| 1 | b | 1   | 1                                                                                                                     |",
+        "| 1 | c | 2   | 1                                                                                                                     |",
+        "| 2 | d | 3   | 2                                                                                                                     |",
+        "| 2 | e | 4   | 2                                                                                                                     |",
+        "| 2 | f | 5   | 2                                                                                                                     |",
+        "| 2 | g | 6   | 2                                                                                                                     |",
+        "| 2 | h | 6   | 2                                                                                                                     |",
+        "| 2 | i | 6   | 2                                                                                                                     |",
+        "| 2 | j | 6   | 2                                                                                                                     |",
+        "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    ];
+    assert_batches_eq!(
+        expected,
+        &execute(&ctx, UNBOUNDED_WINDOW_QUERY_WITH_ALIAS)
+            .await
+            .unwrap()
+    );
+}
+
 /// Basic user defined window function with bounded window
 #[tokio::test]
 async fn test_udwf_bounded_window_ignores_frame() {
@@ -491,6 +524,7 @@ impl OddCounter {
             signature: Signature,
             return_type: DataType,
             test_state: Arc<TestState>,
+            aliases: Vec<String>,
         }
 
         impl SimpleWindowUDF {
@@ -502,6 +536,7 @@ impl OddCounter {
                     signature,
                     return_type,
                     test_state,
+                    aliases: vec!["odd_counter_alias".to_string()],
                 }
             }
         }
@@ -526,6 +561,10 @@ impl OddCounter {
             fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
                 Ok(Box::new(OddCounter::new(Arc::clone(&self.test_state))))
             }
+
+            fn aliases(&self) -> &[String] {
+                &self.aliases
+            }
         }
 
         ctx.register_udwf(WindowUDF::from(SimpleWindowUDF::new(test_state)))
diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs
index b39b4a00327b..cae410655d10 100644
--- a/datafusion/execution/src/task.rs
+++ b/datafusion/execution/src/task.rs
@@ -207,9 +207,15 @@ impl FunctionRegistry for TaskContext {
         &mut self,
         udaf: Arc<AggregateUDF>,
     ) -> Result<Option<Arc<AggregateUDF>>> {
+        udaf.aliases().iter().for_each(|alias| {
+            self.aggregate_functions.insert(alias.clone(), udaf.clone());
+        });
         Ok(self.aggregate_functions.insert(udaf.name().into(), udaf))
     }
     fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
+        udwf.aliases().iter().for_each(|alias| {
+            self.window_functions.insert(alias.clone(), udwf.clone());
+        });
         Ok(self.window_functions.insert(udwf.name().into(), udwf))
     }
     fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 15dfe48b34f6..b881af18d92c 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -17,18 +17,16 @@
 
 //! Built-in functions module contains all the built-in functions definitions.
 
-use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::fmt;
 use std::str::FromStr;
 use std::sync::{Arc, OnceLock};
 
 use crate::signature::TIMEZONE_WILDCARD;
-use crate::type_coercion::binary::get_wider_type;
 use crate::type_coercion::functions::data_types;
 use crate::{FuncMonotonicity, Signature, TypeSignature, Volatility};
 
-use arrow::datatypes::{DataType, Field, Fields, TimeUnit};
+use arrow::datatypes::{DataType, Field, TimeUnit};
 use datafusion_common::{plan_err, DataFusionError, Result};
 
 use strum::IntoEnumIterator;
@@ -102,44 +100,28 @@ pub enum BuiltinScalarFunction {
     Sinh,
     /// sqrt
     Sqrt,
-    /// tan
-    Tan,
-    /// tanh
-    Tanh,
     /// trunc
     Trunc,
     /// cot
     Cot,
 
     // array functions
-    /// array_append
-    ArrayAppend,
-    /// array_sort
-    ArraySort,
-    /// array_concat
-    ArrayConcat,
     /// array_pop_front
     ArrayPopFront,
     /// array_pop_back
     ArrayPopBack,
-    /// array_distinct
-    ArrayDistinct,
     /// array_element
     ArrayElement,
     /// array_position
     ArrayPosition,
     /// array_positions
     ArrayPositions,
-    /// array_prepend
-    ArrayPrepend,
     /// array_remove
     ArrayRemove,
     /// array_remove_n
     ArrayRemoveN,
     /// array_remove_all
     ArrayRemoveAll,
-    /// array_repeat
-    ArrayRepeat,
     /// array_replace
     ArrayReplace,
     /// array_replace_n
@@ -158,12 +140,6 @@ pub enum BuiltinScalarFunction {
     ArrayExcept,
     /// array_resize
     ArrayResize,
-    /// construct an array from columns
-    MakeArray,
-
-    // struct functions
-    /// struct
-    Struct,
 
     // string functions
     /// ascii
@@ -220,8 +196,6 @@ pub enum BuiltinScalarFunction {
     SHA512,
     /// split_part
     SplitPart,
-    /// string_to_array
-    StringToArray,
     /// starts_with
     StartsWith,
     /// strpos
@@ -240,8 +214,6 @@ pub enum BuiltinScalarFunction {
     Upper,
     /// uuid
     Uuid,
-    /// arrow_typeof
-    ArrowTypeof,
     /// overlay
     OverLay,
     /// levenshtein
@@ -334,21 +306,13 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Sqrt => Volatility::Immutable,
             BuiltinScalarFunction::Cbrt => Volatility::Immutable,
             BuiltinScalarFunction::Cot => Volatility::Immutable,
-            BuiltinScalarFunction::Tan => Volatility::Immutable,
-            BuiltinScalarFunction::Tanh => Volatility::Immutable,
             BuiltinScalarFunction::Trunc => Volatility::Immutable,
-            BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
-            BuiltinScalarFunction::ArraySort => Volatility::Immutable,
-            BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
-            BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable,
             BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
             BuiltinScalarFunction::ArrayExcept => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPositions => Volatility::Immutable,
-            BuiltinScalarFunction::ArrayPrepend => Volatility::Immutable,
-            BuiltinScalarFunction::ArrayRepeat => Volatility::Immutable,
             BuiltinScalarFunction::ArrayRemove => Volatility::Immutable,
             BuiltinScalarFunction::ArrayRemoveN => Volatility::Immutable,
             BuiltinScalarFunction::ArrayRemoveAll => Volatility::Immutable,
@@ -360,7 +324,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
             BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
             BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
-            BuiltinScalarFunction::MakeArray => Volatility::Immutable,
             BuiltinScalarFunction::Ascii => Volatility::Immutable,
             BuiltinScalarFunction::BitLength => Volatility::Immutable,
             BuiltinScalarFunction::Btrim => Volatility::Immutable,
@@ -389,7 +352,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::SHA512 => Volatility::Immutable,
             BuiltinScalarFunction::Digest => Volatility::Immutable,
             BuiltinScalarFunction::SplitPart => Volatility::Immutable,
-            BuiltinScalarFunction::StringToArray => Volatility::Immutable,
             BuiltinScalarFunction::StartsWith => Volatility::Immutable,
             BuiltinScalarFunction::Strpos => Volatility::Immutable,
             BuiltinScalarFunction::Substr => Volatility::Immutable,
@@ -399,9 +361,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Translate => Volatility::Immutable,
             BuiltinScalarFunction::Trim => Volatility::Immutable,
             BuiltinScalarFunction::Upper => Volatility::Immutable,
-            BuiltinScalarFunction::Struct => Volatility::Immutable,
-            BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
-            BuiltinScalarFunction::OverLay => Volatility::Immutable,
+           BuiltinScalarFunction::OverLay => Volatility::Immutable,
             BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
             BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
             BuiltinScalarFunction::FindInSet => Volatility::Immutable,
@@ -412,25 +372,6 @@ impl BuiltinScalarFunction {
         }
     }
 
-    /// Returns the dimension [`DataType`] of [`DataType::List`] if
-    /// treated as a N-dimensional array.
-    ///
-    /// ## Examples:
-    ///
-    /// * `Int64` has dimension 1
-    /// * `List(Int64)` has dimension 2
-    /// * `List(List(Int64))` has dimension 3
-    /// * etc.
-    fn return_dimension(self, input_expr_type: &DataType) -> u64 {
-        let mut result: u64 = 1;
-        let mut current_data_type = input_expr_type;
-        while let DataType::List(field) = current_data_type {
-            current_data_type = field.data_type();
-            result += 1;
-        }
-        result
-    }
-
     /// Returns the output [`DataType`] of this function
     ///
     /// This method should be invoked only after `input_expr_types` have been validated
@@ -448,39 +389,6 @@ impl BuiltinScalarFunction {
         // the return type of the built in function.
         // Some built-in functions' return type depends on the incoming type.
         match self {
-            BuiltinScalarFunction::ArrayAppend => Ok(input_expr_types[0].clone()),
-            BuiltinScalarFunction::ArraySort => Ok(input_expr_types[0].clone()),
-            BuiltinScalarFunction::ArrayConcat => {
-                let mut expr_type = Null;
-                let mut max_dims = 0;
-                for input_expr_type in input_expr_types {
-                    match input_expr_type {
-                        List(field) => {
-                            if !field.data_type().equals_datatype(&Null) {
-                                let dims = self.return_dimension(input_expr_type);
-                                expr_type = match max_dims.cmp(&dims) {
-                                    Ordering::Greater => expr_type,
-                                    Ordering::Equal => {
-                                        get_wider_type(&expr_type, input_expr_type)?
-                                    }
-                                    Ordering::Less => {
-                                        max_dims = dims;
-                                        input_expr_type.clone()
-                                    }
-                                };
-                            }
-                        }
-                        _ => {
-                            return plan_err!(
-                                "The {self} function can only accept list as the args."
-                            );
-                        }
-                    }
-                }
-
-                Ok(expr_type)
-            }
-            BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] {
                 List(field)
                 | LargeList(field)
@@ -495,12 +403,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayPositions => {
                 Ok(List(Arc::new(Field::new("item", UInt64, true))))
             }
-            BuiltinScalarFunction::ArrayPrepend => Ok(input_expr_types[1].clone()),
-            BuiltinScalarFunction::ArrayRepeat => Ok(List(Arc::new(Field::new(
-                "item",
-                input_expr_types[0].clone(),
-                true,
-            )))),
             BuiltinScalarFunction::ArrayRemove => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayRemoveN => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayRemoveAll => Ok(input_expr_types[0].clone()),
@@ -536,20 +438,6 @@ impl BuiltinScalarFunction {
                     (dt, _) => Ok(dt),
                 }
             }
-            BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
-                0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
-                _ => {
-                    let mut expr_type = Null;
-                    for input_expr_type in input_expr_types {
-                        if !input_expr_type.equals_datatype(&Null) {
-                            expr_type = input_expr_type.clone();
-                            break;
-                        }
-                    }
-
-                    Ok(List(Arc::new(Field::new("item", expr_type, true))))
-                }
-            },
             BuiltinScalarFunction::Ascii => Ok(Int32),
             BuiltinScalarFunction::BitLength => {
                 utf8_to_int_type(&input_expr_types[0], "bit_length")
@@ -620,11 +508,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::SplitPart => {
                 utf8_to_str_type(&input_expr_types[0], "split_part")
             }
-            BuiltinScalarFunction::StringToArray => Ok(List(Arc::new(Field::new(
-                "item",
-                input_expr_types[0].clone(),
-                true,
-            )))),
             BuiltinScalarFunction::StartsWith => Ok(Boolean),
             BuiltinScalarFunction::EndsWith => Ok(Boolean),
             BuiltinScalarFunction::Strpos => {
@@ -664,14 +547,7 @@ impl BuiltinScalarFunction {
                 _ => Ok(Float64),
             },
 
-            BuiltinScalarFunction::Struct => {
-                let return_fields = input_expr_types
-                    .iter()
-                    .enumerate()
-                    .map(|(pos, dt)| Field::new(format!("c{pos}"), dt.clone(), true))
-                    .collect::<Vec<Field>>();
-                Ok(Struct(Fields::from(return_fields)))
-            }
+
 
             BuiltinScalarFunction::Atan2 => match &input_expr_types[0] {
                 Float32 => Ok(Float32),
@@ -690,8 +566,6 @@ impl BuiltinScalarFunction {
 
             BuiltinScalarFunction::Iszero => Ok(Boolean),
 
-            BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
-
             BuiltinScalarFunction::OverLay => {
                 utf8_to_str_type(&input_expr_types[0], "overlay")
             }
@@ -720,8 +594,6 @@ impl BuiltinScalarFunction {
             | BuiltinScalarFunction::Sinh
             | BuiltinScalarFunction::Sqrt
             | BuiltinScalarFunction::Cbrt
-            | BuiltinScalarFunction::Tan
-            | BuiltinScalarFunction::Tanh
             | BuiltinScalarFunction::Trunc
             | BuiltinScalarFunction::Cot => match input_expr_types[0] {
                 Float32 => Ok(Float32),
@@ -739,36 +611,18 @@ impl BuiltinScalarFunction {
 
         // for now, the list is small, as we do not have many built-in functions.
         match self {
-            BuiltinScalarFunction::ArraySort => {
-                Signature::variadic_any(self.volatility())
-            }
-            BuiltinScalarFunction::ArrayAppend => {
-                Signature::array_and_element(self.volatility())
-            }
-            BuiltinScalarFunction::MakeArray => {
-                // 0 or more arguments of arbitrary type
-                Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility())
-            }
             BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()),
-            BuiltinScalarFunction::ArrayConcat => {
-                Signature::variadic_any(self.volatility())
-            }
             BuiltinScalarFunction::ArrayElement => {
                 Signature::array_and_index(self.volatility())
             }
             BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()),
-            BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayPosition => {
                 Signature::array_and_element_and_optional_index(self.volatility())
             }
             BuiltinScalarFunction::ArrayPositions => {
                 Signature::array_and_element(self.volatility())
             }
-            BuiltinScalarFunction::ArrayPrepend => {
-                Signature::element_and_array(self.volatility())
-            }
-            BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()),
             BuiltinScalarFunction::ArrayRemove => {
                 Signature::array_and_element(self.volatility())
             }
@@ -792,7 +646,6 @@ impl BuiltinScalarFunction {
                 Signature::variadic_any(self.volatility())
             }
 
-            BuiltinScalarFunction::Struct => Signature::variadic_any(self.volatility()),
             BuiltinScalarFunction::Concat
             | BuiltinScalarFunction::ConcatWithSeparator => {
                 Signature::variadic(vec![Utf8], self.volatility())
@@ -901,13 +754,6 @@ impl BuiltinScalarFunction {
                 ],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::StringToArray => Signature::one_of(
-                vec![
-                    TypeSignature::Uniform(2, vec![Utf8, LargeUtf8]),
-                    TypeSignature::Uniform(3, vec![Utf8, LargeUtf8]),
-                ],
-                self.volatility(),
-            ),
 
             BuiltinScalarFunction::EndsWith
             | BuiltinScalarFunction::Strpos
@@ -994,7 +840,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => {
                 Signature::uniform(2, vec![Int64], self.volatility())
             }
-            BuiltinScalarFunction::ArrowTypeof => Signature::any(1, self.volatility()),
             BuiltinScalarFunction::OverLay => Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Utf8, Int64, Int64]),
@@ -1027,8 +872,6 @@ impl BuiltinScalarFunction {
             | BuiltinScalarFunction::Sin
             | BuiltinScalarFunction::Sinh
             | BuiltinScalarFunction::Sqrt
-            | BuiltinScalarFunction::Tan
-            | BuiltinScalarFunction::Tanh
             | BuiltinScalarFunction::Cot => {
                 // math expressions expect 1 argument of type f64 or f32
                 // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
@@ -1073,7 +916,6 @@ impl BuiltinScalarFunction {
                 | BuiltinScalarFunction::Sinh
                 | BuiltinScalarFunction::Sqrt
                 | BuiltinScalarFunction::Cbrt
-                | BuiltinScalarFunction::Tanh
                 | BuiltinScalarFunction::Trunc
                 | BuiltinScalarFunction::Pi
         ) {
@@ -1119,8 +961,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Sin => &["sin"],
             BuiltinScalarFunction::Sinh => &["sinh"],
             BuiltinScalarFunction::Sqrt => &["sqrt"],
-            BuiltinScalarFunction::Tan => &["tan"],
-            BuiltinScalarFunction::Tanh => &["tanh"],
             BuiltinScalarFunction::Trunc => &["trunc"],
 
             // conditional functions
@@ -1150,9 +990,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Rpad => &["rpad"],
             BuiltinScalarFunction::Rtrim => &["rtrim"],
             BuiltinScalarFunction::SplitPart => &["split_part"],
-            BuiltinScalarFunction::StringToArray => {
-                &["string_to_array", "string_to_list"]
-            }
             BuiltinScalarFunction::StartsWith => &["starts_with"],
             BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
             BuiltinScalarFunction::Substr => &["substr"],
@@ -1176,22 +1013,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::SHA256 => &["sha256"],
             BuiltinScalarFunction::SHA384 => &["sha384"],
             BuiltinScalarFunction::SHA512 => &["sha512"],
-
-            // other functions
-            BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
-
-            // array functions
-            BuiltinScalarFunction::ArrayAppend => &[
-                "array_append",
-                "list_append",
-                "array_push_back",
-                "list_push_back",
-            ],
-            BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
-            BuiltinScalarFunction::ArrayConcat => {
-                &["array_concat", "array_cat", "list_concat", "list_cat"]
-            }
-            BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"],
             BuiltinScalarFunction::ArrayElement => &[
                 "array_element",
                 "array_extract",
@@ -1212,13 +1033,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayPositions => {
                 &["array_positions", "list_positions"]
             }
-            BuiltinScalarFunction::ArrayPrepend => &[
-                "array_prepend",
-                "list_prepend",
-                "array_push_front",
-                "list_push_front",
-            ],
-            BuiltinScalarFunction::ArrayRepeat => &["array_repeat", "list_repeat"],
             BuiltinScalarFunction::ArrayRemove => &["array_remove", "list_remove"],
             BuiltinScalarFunction::ArrayRemoveN => &["array_remove_n", "list_remove_n"],
             BuiltinScalarFunction::ArrayRemoveAll => {
@@ -1235,14 +1049,10 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
             BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"],
             BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"],
-            BuiltinScalarFunction::MakeArray => &["make_array", "make_list"],
             BuiltinScalarFunction::ArrayIntersect => {
                 &["array_intersect", "list_intersect"]
             }
             BuiltinScalarFunction::OverLay => &["overlay"],
-
-            // struct functions
-            BuiltinScalarFunction::Struct => &["struct"],
         }
     }
 }
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index d32d1e9c5cce..8212f75583ea 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -538,11 +538,9 @@ scalar_expr!(Sqrt, sqrt, num, "square root of a number");
 scalar_expr!(Cbrt, cbrt, num, "cube root of a number");
 scalar_expr!(Sin, sin, num, "sine");
 scalar_expr!(Cos, cos, num, "cosine");
-scalar_expr!(Tan, tan, num, "tangent");
 scalar_expr!(Cot, cot, num, "cotangent");
 scalar_expr!(Sinh, sinh, num, "hyperbolic sine");
 scalar_expr!(Cosh, cosh, num, "hyperbolic cosine");
-scalar_expr!(Tanh, tanh, num, "hyperbolic tangent");
 scalar_expr!(Atan, atan, num, "inverse tangent");
 scalar_expr!(Asinh, asinh, num, "inverse hyperbolic sine");
 scalar_expr!(Acosh, acosh, num, "inverse hyperbolic cosine");
@@ -586,16 +584,6 @@ scalar_expr!(
 scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
 scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");
 
-// array functions
-scalar_expr!(
-    ArrayAppend,
-    array_append,
-    array element,
-    "appends an element to the end of an array."
-);
-
-scalar_expr!(ArraySort, array_sort, array desc null_first, "returns sorted array.");
-
 scalar_expr!(
     ArrayPopBack,
     array_pop_back,
@@ -610,7 +598,6 @@ scalar_expr!(
     "returns the array without the first element."
 );
 
-nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
 scalar_expr!(
     ArrayElement,
     array_element,
@@ -623,12 +610,6 @@ scalar_expr!(
     first_array second_array,
     "Returns an array of the elements that appear in the first array but not in the second."
 );
-scalar_expr!(
-    ArrayDistinct,
-    array_distinct,
-    array,
-    "return distinct values from the array after removing duplicates."
-);
 scalar_expr!(
     ArrayPosition,
     array_position,
@@ -641,18 +622,6 @@ scalar_expr!(
     array element,
     "searches for an element in the array, returns all occurrences."
 );
-scalar_expr!(
-    ArrayPrepend,
-    array_prepend,
-    array element,
-    "prepends an element to the beginning of an array."
-);
-scalar_expr!(
-    ArrayRepeat,
-    array_repeat,
-    element count,
-    "returns an array containing element `count` times."
-);
 scalar_expr!(
     ArrayRemove,
     array_remove,
@@ -710,11 +679,6 @@ scalar_expr!(
     "returns an array with the specified size filled with the given value."
 );
 
-nary_scalar_expr!(
-    MakeArray,
-    array,
-    "returns an Arrow array using the specified input expressions."
-);
 scalar_expr!(
     ArrayIntersect,
     array_intersect,
@@ -774,7 +738,6 @@ scalar_expr!(SHA256, sha256, string, "SHA-256 hash");
 scalar_expr!(SHA384, sha384, string, "SHA-384 hash");
 scalar_expr!(SHA512, sha512, string, "SHA-512 hash");
 scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
-scalar_expr!(StringToArray, string_to_array, string delimiter null_string, "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`");
 scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
 scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
 scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`");
@@ -834,18 +797,10 @@ scalar_expr!(
     "returns true if a given number is +0.0 or -0.0 otherwise returns false"
 );
 
-scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");
 scalar_expr!(Levenshtein, levenshtein, string1 string2, "Returns the Levenshtein distance between the two given strings");
 scalar_expr!(SubstrIndex, substr_index, string delimiter count, "Returns the substring from str before count occurrences of the delimiter");
 scalar_expr!(FindInSet, find_in_set, str strlist, "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings");
 
-scalar_expr!(
-    Struct,
-    struct_fun,
-    val,
-    "returns a vector of fields from the struct"
-);
-
 /// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression.
 pub fn case(expr: Expr) -> CaseBuilder {
     CaseBuilder::new(Some(Box::new(expr)), vec![], vec![], None)
@@ -1230,11 +1185,9 @@ mod test {
         test_unary_scalar_expr!(Cbrt, cbrt);
         test_unary_scalar_expr!(Sin, sin);
         test_unary_scalar_expr!(Cos, cos);
-        test_unary_scalar_expr!(Tan, tan);
         test_unary_scalar_expr!(Cot, cot);
         test_unary_scalar_expr!(Sinh, sinh);
         test_unary_scalar_expr!(Cosh, cosh);
-        test_unary_scalar_expr!(Tanh, tanh);
         test_unary_scalar_expr!(Atan, atan);
         test_unary_scalar_expr!(Asinh, asinh);
         test_unary_scalar_expr!(Acosh, acosh);
@@ -1286,7 +1239,6 @@ mod test {
         test_scalar_expr!(SHA384, sha384, string);
         test_scalar_expr!(SHA512, sha512, string);
         test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-        test_scalar_expr!(StringToArray, string_to_array, expr, delimiter, null_value);
         test_scalar_expr!(StartsWith, starts_with, string, characters);
         test_scalar_expr!(EndsWith, ends_with, string, characters);
         test_scalar_expr!(Strpos, strpos, string, substring);
@@ -1297,23 +1249,17 @@ mod test {
         test_scalar_expr!(Trim, trim, string);
         test_scalar_expr!(Upper, upper, string);
 
-        test_scalar_expr!(ArrayAppend, array_append, array, element);
-        test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
         test_scalar_expr!(ArrayPopFront, array_pop_front, array);
         test_scalar_expr!(ArrayPopBack, array_pop_back, array);
         test_scalar_expr!(ArrayPosition, array_position, array, element, index);
         test_scalar_expr!(ArrayPositions, array_positions, array, element);
-        test_scalar_expr!(ArrayPrepend, array_prepend, array, element);
-        test_scalar_expr!(ArrayRepeat, array_repeat, element, count);
         test_scalar_expr!(ArrayRemove, array_remove, array, element);
         test_scalar_expr!(ArrayRemoveN, array_remove_n, array, element, max);
         test_scalar_expr!(ArrayRemoveAll, array_remove_all, array, element);
         test_scalar_expr!(ArrayReplace, array_replace, array, from, to);
         test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max);
         test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
-        test_nary_scalar_expr!(MakeArray, array, input);
 
-        test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
         test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len);
         test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
         test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 0662396f611b..01e6af948762 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -24,7 +24,7 @@ use std::convert::TryFrom;
 use std::iter::zip;
 use std::sync::Arc;
 
-use crate::dml::{CopyOptions, CopyTo};
+use crate::dml::CopyTo;
 use crate::expr::Alias;
 use crate::expr_rewriter::{
     coerce_plan_expr_for_schema, normalize_col,
@@ -43,20 +43,19 @@ use crate::utils::{
     expand_wildcard, find_valid_equijoin_key_pair, group_window_expr_by_sort_keys,
 };
 use crate::{
-    and, binary_expr, DmlStatement, Expr, ExprSchemable, Operator,
+    and, binary_expr, DmlStatement, Expr, ExprSchemable, Operator, RecursiveQuery,
     TableProviderFilterPushDown, TableSource, WriteOp,
 };
 
 use arrow::datatypes::{DataType, Schema, SchemaRef};
+use datafusion_common::config::FormatOptions;
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::{
     get_target_functional_dependencies, plan_datafusion_err, plan_err, Column, DFField,
-    DFSchema, DFSchemaRef, DataFusionError, FileType, OwnedTableReference, Result,
-    ScalarValue, TableReference, ToDFSchema, UnnestOptions,
+    DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue,
+    TableReference, ToDFSchema, UnnestOptions,
 };
 
-use super::plan::RecursiveQuery;
-
 /// Default table name for unnamed table
 pub const UNNAMED_TABLE: &str = "?table?";
 
@@ -262,16 +261,16 @@ impl LogicalPlanBuilder {
     pub fn copy_to(
         input: LogicalPlan,
         output_url: String,
-        file_format: FileType,
+        format_options: FormatOptions,
+        options: HashMap<String, String>,
         partition_by: Vec<String>,
-        copy_options: CopyOptions,
     ) -> Result<Self> {
         Ok(Self::from(LogicalPlan::Copy(CopyTo {
             input: Arc::new(input),
             output_url,
-            file_format,
+            format_options,
+            options,
             partition_by,
-            copy_options,
         })))
     }
 
diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs
index 7f04bd8973d6..6ab06a57c1c2 100644
--- a/datafusion/expr/src/logical_plan/dml.rs
+++ b/datafusion/expr/src/logical_plan/dml.rs
@@ -15,70 +15,46 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{
-    fmt::{self, Display},
-    sync::Arc,
-};
+use std::collections::HashMap;
+use std::fmt::{self, Display};
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
 
-use datafusion_common::{
-    file_options::StatementOptions, DFSchemaRef, FileType, FileTypeWriterOptions,
-    OwnedTableReference,
-};
+use datafusion_common::config::FormatOptions;
+use datafusion_common::{DFSchemaRef, OwnedTableReference};
 
 use crate::LogicalPlan;
 
 /// Operator that copies the contents of a database to file(s)
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Clone)]
 pub struct CopyTo {
     /// The relation that determines the tuples to write to the output file(s)
     pub input: Arc<LogicalPlan>,
     /// The location to write the file(s)
     pub output_url: String,
-    /// The file format to output (explicitly defined or inferred from file extension)
-    pub file_format: FileType,
     /// Determines which, if any, columns should be used for hive-style partitioned writes
     pub partition_by: Vec<String>,
-    /// Arbitrary options as tuples
-    pub copy_options: CopyOptions,
-}
-
-/// When the logical plan is constructed from SQL, CopyOptions
-/// will contain arbitrary string tuples which must be parsed into
-/// FileTypeWriterOptions. When the logical plan is constructed directly
-/// from rust code (such as via the DataFrame API), FileTypeWriterOptions
-/// can be provided directly, avoiding the run time cost and fallibility of
-/// parsing string based options.
-#[derive(Clone)]
-pub enum CopyOptions {
-    /// Holds StatementOptions parsed from a SQL statement
-    SQLOptions(StatementOptions),
-    /// Holds FileTypeWriterOptions directly provided
-    WriterOptions(Box<FileTypeWriterOptions>),
+    /// File format options.
+    pub format_options: FormatOptions,
+    /// SQL Options that can affect the formats
+    pub options: HashMap<String, String>,
 }
 
-impl PartialEq for CopyOptions {
-    fn eq(&self, other: &CopyOptions) -> bool {
-        match self {
-            Self::SQLOptions(statement1) => match other {
-                Self::SQLOptions(statement2) => statement1.eq(statement2),
-                Self::WriterOptions(_) => false,
-            },
-            Self::WriterOptions(_) => false,
-        }
+// Implement PartialEq manually
+impl PartialEq for CopyTo {
+    fn eq(&self, other: &Self) -> bool {
+        self.input == other.input && self.output_url == other.output_url
     }
 }
 
-impl Eq for CopyOptions {}
+// Implement Eq (no need for additional logic over PartialEq)
+impl Eq for CopyTo {}
 
-impl std::hash::Hash for CopyOptions {
-    fn hash<H>(&self, hasher: &mut H)
-    where
-        H: std::hash::Hasher,
-    {
-        match self {
-            Self::SQLOptions(statement) => statement.hash(hasher),
-            Self::WriterOptions(_) => (),
-        }
+// Implement Hash manually
+impl Hash for CopyTo {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.input.hash(state);
+        self.output_url.hash(state);
     }
 }
 
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index ca021c4bfc28..a3f027d9fdb2 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -25,7 +25,6 @@ use std::sync::Arc;
 use super::dml::CopyTo;
 use super::DdlStatement;
 use crate::builder::change_redundant_column;
-use crate::dml::CopyOptions;
 use crate::expr::{
     Alias, Exists, InSubquery, Placeholder, Sort as SortExpr, WindowFunction,
 };
@@ -613,15 +612,15 @@ impl LogicalPlan {
             LogicalPlan::Copy(CopyTo {
                 input: _,
                 output_url,
-                file_format,
+                format_options,
+                options,
                 partition_by,
-                copy_options,
             }) => Ok(LogicalPlan::Copy(CopyTo {
                 input: Arc::new(inputs.swap_remove(0)),
                 output_url: output_url.clone(),
-                file_format: file_format.clone(),
+                format_options: format_options.clone(),
+                options: options.clone(),
                 partition_by: partition_by.clone(),
-                copy_options: copy_options.clone(),
             })),
             LogicalPlan::Values(Values { schema, .. }) => {
                 Ok(LogicalPlan::Values(Values {
@@ -1544,22 +1543,17 @@ impl LogicalPlan {
                     LogicalPlan::Copy(CopyTo {
                         input: _,
                         output_url,
-                        file_format,
-                        partition_by: _,
-                        copy_options,
+                        format_options,
+                        options,
+                        ..
                     }) => {
-                        let op_str = match copy_options {
-                            CopyOptions::SQLOptions(statement) => statement
-                                .clone()
-                                .into_inner()
-                                .iter()
-                                .map(|(k, v)| format!("{k} {v}"))
-                                .collect::<Vec<String>>()
-                                .join(", "),
-                            CopyOptions::WriterOptions(_) => "".into(),
-                        };
+                        let op_str = options
+                            .iter()
+                            .map(|(k, v)| format!("{k} {v}"))
+                            .collect::<Vec<String>>()
+                            .join(", ");
 
-                        write!(f, "CopyTo: format={file_format} output_url={output_url} options: ({op_str})")
+                        write!(f, "CopyTo: format={format_options} output_url={output_url} options: ({op_str})")
                     }
                     LogicalPlan::Ddl(ddl) => {
                         write!(f, "{}", ddl.display())
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index e56723063e41..c46dd9cd3a6f 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -118,6 +118,14 @@ impl AggregateUDF {
         self.inner.clone()
     }
 
+    /// Adds additional names that can be used to invoke this function, in
+    /// addition to `name`
+    ///
+    /// If you implement [`AggregateUDFImpl`] directly you should return aliases directly.
+    pub fn with_aliases(self, aliases: impl IntoIterator<Item = &'static str>) -> Self {
+        Self::new_from_impl(AliasedAggregateUDFImpl::new(self.inner.clone(), aliases))
+    }
+
     /// creates an [`Expr`] that calls the aggregate function.
     ///
     /// This utility allows using the UDAF without requiring access to
@@ -139,6 +147,11 @@ impl AggregateUDF {
         self.inner.name()
     }
 
+    /// Returns the aliases for this function.
+    pub fn aliases(&self) -> &[String] {
+        self.inner.aliases()
+    }
+
     /// Returns this function's signature (what input types are accepted)
     ///
     /// See [`AggregateUDFImpl::signature`] for more details.
@@ -277,6 +290,64 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         not_impl_err!("GroupsAccumulator hasn't been implemented for {self:?} yet")
     }
+
+    /// Returns any aliases (alternate names) for this function.
+    ///
+    /// Note: `aliases` should only include names other than [`Self::name`].
+    /// Defaults to `[]` (no aliases)
+    fn aliases(&self) -> &[String] {
+        &[]
+    }
+}
+
+/// AggregateUDF that adds an alias to the underlying function. It is better to
+/// implement [`AggregateUDFImpl`], which supports aliases, directly if possible.
+#[derive(Debug)]
+struct AliasedAggregateUDFImpl {
+    inner: Arc<dyn AggregateUDFImpl>,
+    aliases: Vec<String>,
+}
+
+impl AliasedAggregateUDFImpl {
+    pub fn new(
+        inner: Arc<dyn AggregateUDFImpl>,
+        new_aliases: impl IntoIterator<Item = &'static str>,
+    ) -> Self {
+        let mut aliases = inner.aliases().to_vec();
+        aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
+
+        Self { inner, aliases }
+    }
+}
+
+impl AggregateUDFImpl for AliasedAggregateUDFImpl {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        self.inner.name()
+    }
+
+    fn signature(&self) -> &Signature {
+        self.inner.signature()
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        self.inner.return_type(arg_types)
+    }
+
+    fn accumulator(&self, arg: &DataType) -> Result<Box<dyn Accumulator>> {
+        self.inner.accumulator(arg)
+    }
+
+    fn state_type(&self, return_type: &DataType) -> Result<Vec<DataType>> {
+        self.inner.state_type(return_type)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
 }
 
 /// Implementation of [`AggregateUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 3ab40fe70a91..d3925f2e1925 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -80,7 +80,7 @@ impl WindowUDF {
     ///
     /// See [`WindowUDFImpl`] for a more convenient way to create a
     /// `WindowUDF` using trait objects
-    #[deprecated(since = "34.0.0", note = "please implement ScalarUDFImpl instead")]
+    #[deprecated(since = "34.0.0", note = "please implement WindowUDFImpl instead")]
     pub fn new(
         name: &str,
         signature: &Signature,
@@ -112,6 +112,14 @@ impl WindowUDF {
         self.inner.clone()
     }
 
+    /// Adds additional names that can be used to invoke this function, in
+    /// addition to `name`
+    ///
+    /// If you implement [`WindowUDFImpl`] directly you should return aliases directly.
+    pub fn with_aliases(self, aliases: impl IntoIterator<Item = &'static str>) -> Self {
+        Self::new_from_impl(AliasedWindowUDFImpl::new(self.inner.clone(), aliases))
+    }
+
     /// creates a [`Expr`] that calls the window function given
     /// the `partition_by`, `order_by`, and `window_frame` definition
     ///
@@ -143,6 +151,11 @@ impl WindowUDF {
         self.inner.name()
     }
 
+    /// Returns the aliases for this function.
+    pub fn aliases(&self) -> &[String] {
+        self.inner.aliases()
+    }
+
     /// Returns this function's signature (what input types are accepted)
     ///
     /// See [`WindowUDFImpl::signature`] for more details.
@@ -217,7 +230,7 @@ where
 ///    fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> { unimplemented!() }
 /// }
 ///
-/// // Create a new ScalarUDF from the implementation
+/// // Create a new WindowUDF from the implementation
 /// let smooth_it = WindowUDF::from(SmoothIt::new());
 ///
 /// // Call the function `add_one(col)`
@@ -245,6 +258,60 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
 
     /// Invoke the function, returning the [`PartitionEvaluator`] instance
     fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>>;
+
+    /// Returns any aliases (alternate names) for this function.
+    ///
+    /// Note: `aliases` should only include names other than [`Self::name`].
+    /// Defaults to `[]` (no aliases)
+    fn aliases(&self) -> &[String] {
+        &[]
+    }
+}
+
+/// WindowUDF that adds an alias to the underlying function. It is better to
+/// implement [`WindowUDFImpl`], which supports aliases, directly if possible.
+#[derive(Debug)]
+struct AliasedWindowUDFImpl {
+    inner: Arc<dyn WindowUDFImpl>,
+    aliases: Vec<String>,
+}
+
+impl AliasedWindowUDFImpl {
+    pub fn new(
+        inner: Arc<dyn WindowUDFImpl>,
+        new_aliases: impl IntoIterator<Item = &'static str>,
+    ) -> Self {
+        let mut aliases = inner.aliases().to_vec();
+        aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
+
+        Self { inner, aliases }
+    }
+}
+
+impl WindowUDFImpl for AliasedWindowUDFImpl {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        self.inner.name()
+    }
+
+    fn signature(&self) -> &Signature {
+        self.inner.signature()
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        self.inner.return_type(arg_types)
+    }
+
+    fn partition_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
+        self.inner.partition_evaluator()
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
 }
 
 /// Implementation of [`WindowUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/functions-array/Cargo.toml b/datafusion/functions-array/Cargo.toml
index 17be817238c2..ba7d9e26ecaf 100644
--- a/datafusion/functions-array/Cargo.toml
+++ b/datafusion/functions-array/Cargo.toml
@@ -38,6 +38,9 @@ path = "src/lib.rs"
 
 [dependencies]
 arrow = { workspace = true }
+arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-schema = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
diff --git a/datafusion/functions-array/src/concat.rs b/datafusion/functions-array/src/concat.rs
new file mode 100644
index 000000000000..a8e7d1008f46
--- /dev/null
+++ b/datafusion/functions-array/src/concat.rs
@@ -0,0 +1,436 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Includes `array append`, `array prepend`, and `array concat` functions
+
+use std::{any::Any, cmp::Ordering, sync::Arc};
+
+use arrow::array::{Capacities, MutableArrayData};
+use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_schema::{DataType, Field};
+use datafusion_common::Result;
+use datafusion_common::{
+    cast::as_generic_list_array, exec_err, not_impl_err, plan_err, utils::list_ndims,
+};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::Expr;
+use datafusion_expr::{
+    type_coercion::binary::get_wider_type, ColumnarValue, ScalarUDFImpl, Signature,
+    Volatility,
+};
+
+use crate::utils::{align_array_dimensions, check_datatypes, make_scalar_function};
+
+make_udf_function!(
+    ArrayAppend,
+    array_append,
+    array element,                                         // arg name
+    "appends an element to the end of an array.", // doc
+    array_append_udf                              // internal function name
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayAppend {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayAppend {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array_and_element(Volatility::Immutable),
+            aliases: vec![
+                String::from("array_append"),
+                String::from("list_append"),
+                String::from("array_push_back"),
+                String::from("list_push_back"),
+            ],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayAppend {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "array_append"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[0].clone())
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_scalar_function(array_append_inner)(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayPrepend,
+    array_prepend,
+    element array,
+    "Prepends an element to the beginning of an array.",
+    array_prepend_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayPrepend {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayPrepend {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::element_and_array(Volatility::Immutable),
+            aliases: vec![
+                String::from("array_prepend"),
+                String::from("list_prepend"),
+                String::from("array_push_front"),
+                String::from("list_push_front"),
+            ],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayPrepend {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "array_prepend"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[1].clone())
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_scalar_function(array_prepend_inner)(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+make_udf_function!(
+    ArrayConcat,
+    array_concat,
+    "Concatenates arrays.",
+    array_concat_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayConcat {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayConcat {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![
+                String::from("array_concat"),
+                String::from("array_cat"),
+                String::from("list_concat"),
+                String::from("list_cat"),
+            ],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayConcat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "array_concat"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        let mut expr_type = DataType::Null;
+        let mut max_dims = 0;
+        for arg_type in arg_types {
+            match arg_type {
+                DataType::List(field) => {
+                    if !field.data_type().equals_datatype(&DataType::Null) {
+                        let dims = list_ndims(arg_type);
+                        expr_type = match max_dims.cmp(&dims) {
+                            Ordering::Greater => expr_type,
+                            Ordering::Equal => get_wider_type(&expr_type, arg_type)?,
+                            Ordering::Less => {
+                                max_dims = dims;
+                                arg_type.clone()
+                            }
+                        };
+                    }
+                }
+                _ => {
+                    return plan_err!(
+                        "The array_concat function can only accept list as the args."
+                    )
+                }
+            }
+        }
+
+        Ok(expr_type)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_scalar_function(array_concat_inner)(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+/// Array_concat/Array_cat SQL function
+pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.is_empty() {
+        return exec_err!("array_concat expects at least one arguments");
+    }
+
+    let mut new_args = vec![];
+    for arg in args {
+        let ndim = list_ndims(arg.data_type());
+        let base_type = datafusion_common::utils::base_type(arg.data_type());
+        if ndim == 0 {
+            return not_impl_err!("Array is not type '{base_type:?}'.");
+        }
+        if !base_type.eq(&DataType::Null) {
+            new_args.push(arg.clone());
+        }
+    }
+
+    match &args[0].data_type() {
+        DataType::LargeList(_) => concat_internal::<i64>(new_args.as_slice()),
+        _ => concat_internal::<i32>(new_args.as_slice()),
+    }
+}
+
+fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let args = align_array_dimensions::<O>(args.to_vec())?;
+
+    let list_arrays = args
+        .iter()
+        .map(|arg| as_generic_list_array::<O>(arg))
+        .collect::<Result<Vec<_>>>()?;
+    // Assume number of rows is the same for all arrays
+    let row_count = list_arrays[0].len();
+
+    let mut array_lengths = vec![];
+    let mut arrays = vec![];
+    let mut valid = BooleanBufferBuilder::new(row_count);
+    for i in 0..row_count {
+        let nulls = list_arrays
+            .iter()
+            .map(|arr| arr.is_null(i))
+            .collect::<Vec<_>>();
+
+        // If all the arrays are null, the concatenated array is null
+        let is_null = nulls.iter().all(|&x| x);
+        if is_null {
+            array_lengths.push(0);
+            valid.append(false);
+        } else {
+            // Get all the arrays on i-th row
+            let values = list_arrays
+                .iter()
+                .map(|arr| arr.value(i))
+                .collect::<Vec<_>>();
+
+            let elements = values
+                .iter()
+                .map(|a| a.as_ref())
+                .collect::<Vec<&dyn Array>>();
+
+            // Concatenated array on i-th row
+            let concated_array = arrow::compute::concat(elements.as_slice())?;
+            array_lengths.push(concated_array.len());
+            arrays.push(concated_array);
+            valid.append(true);
+        }
+    }
+    // Assume all arrays have the same data type
+    let data_type = list_arrays[0].value_type();
+    let buffer = valid.finish();
+
+    let elements = arrays
+        .iter()
+        .map(|a| a.as_ref())
+        .collect::<Vec<&dyn Array>>();
+
+    let list_arr = GenericListArray::<O>::new(
+        Arc::new(Field::new("item", data_type, true)),
+        OffsetBuffer::from_lengths(array_lengths),
+        Arc::new(arrow::compute::concat(elements.as_slice())?),
+        Some(NullBuffer::new(buffer)),
+    );
+
+    Ok(Arc::new(list_arr))
+}
+
+/// Kernal functions
+
+/// Array_append SQL function
+pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return exec_err!("array_append expects two arguments");
+    }
+
+    match args[0].data_type() {
+        DataType::LargeList(_) => general_append_and_prepend::<i64>(args, true),
+        _ => general_append_and_prepend::<i32>(args, true),
+    }
+}
+
+/// Array_prepend SQL function
+pub(crate) fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return exec_err!("array_prepend expects two arguments");
+    }
+
+    match args[1].data_type() {
+        DataType::LargeList(_) => general_append_and_prepend::<i64>(args, false),
+        _ => general_append_and_prepend::<i32>(args, false),
+    }
+}
+
+fn general_append_and_prepend<O: OffsetSizeTrait>(
+    args: &[ArrayRef],
+    is_append: bool,
+) -> Result<ArrayRef>
+where
+    i64: TryInto<O>,
+{
+    let (list_array, element_array) = if is_append {
+        let list_array = as_generic_list_array::<O>(&args[0])?;
+        let element_array = &args[1];
+        check_datatypes("array_append", &[element_array, list_array.values()])?;
+        (list_array, element_array)
+    } else {
+        let list_array = as_generic_list_array::<O>(&args[1])?;
+        let element_array = &args[0];
+        check_datatypes("array_prepend", &[list_array.values(), element_array])?;
+        (list_array, element_array)
+    };
+
+    let res = match list_array.value_type() {
+        DataType::List(_) => concat_internal::<i32>(args)?,
+        DataType::LargeList(_) => concat_internal::<i64>(args)?,
+        data_type => {
+            return generic_append_and_prepend::<O>(
+                list_array,
+                element_array,
+                &data_type,
+                is_append,
+            );
+        }
+    };
+
+    Ok(res)
+}
+
+/// Appends or prepends elements to a ListArray.
+///
+/// This function takes a ListArray, an ArrayRef, a FieldRef, and a boolean flag
+/// indicating whether to append or prepend the elements. It returns a `Result<ArrayRef>`
+/// representing the resulting ListArray after the operation.
+///
+/// # Arguments
+///
+/// * `list_array` - A reference to the ListArray to which elements will be appended/prepended.
+/// * `element_array` - A reference to the Array containing elements to be appended/prepended.
+/// * `field` - A reference to the Field describing the data type of the arrays.
+/// * `is_append` - A boolean flag indicating whether to append (`true`) or prepend (`false`) elements.
+///
+/// # Examples
+///
+/// generic_append_and_prepend(
+///     [1, 2, 3], 4, append => [1, 2, 3, 4]
+///     5, [6, 7, 8], prepend => [5, 6, 7, 8]
+/// )
+fn generic_append_and_prepend<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    element_array: &ArrayRef,
+    data_type: &DataType,
+    is_append: bool,
+) -> Result<ArrayRef>
+where
+    i64: TryInto<O>,
+{
+    let mut offsets = vec![O::usize_as(0)];
+    let values = list_array.values();
+    let original_data = values.to_data();
+    let element_data = element_array.to_data();
+    let capacity = Capacities::Array(original_data.len() + element_data.len());
+
+    let mut mutable = MutableArrayData::with_capacities(
+        vec![&original_data, &element_data],
+        false,
+        capacity,
+    );
+
+    let values_index = 0;
+    let element_index = 1;
+
+    for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() {
+        let start = offset_window[0].to_usize().unwrap();
+        let end = offset_window[1].to_usize().unwrap();
+        if is_append {
+            mutable.extend(values_index, start, end);
+            mutable.extend(element_index, row_index, row_index + 1);
+        } else {
+            mutable.extend(element_index, row_index, row_index + 1);
+            mutable.extend(values_index, start, end);
+        }
+        offsets.push(offsets[row_index] + O::usize_as(end - start + 1));
+    }
+
+    let data = mutable.freeze();
+
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        Arc::new(Field::new("item", data_type.to_owned(), true)),
+        OffsetBuffer::new(offsets.into()),
+        arrow_array::make_array(data),
+        None,
+    )?))
+}
diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs
index ad96d232aa4a..1a6ebdd9029d 100644
--- a/datafusion/functions-array/src/kernels.rs
+++ b/datafusion/functions-array/src/kernels.rs
@@ -18,22 +18,33 @@
 //! implementation kernels for array functions
 
 use arrow::array::{
-    Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array,
-    GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray,
-    OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+    Array, ArrayRef, BooleanArray, Capacities, Date32Array, Float32Array, Float64Array,
+    GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeListArray,
+    LargeStringArray, ListArray, ListBuilder, MutableArrayData, OffsetSizeTrait,
+    StringArray, StringBuilder, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
 };
-use arrow::array::{LargeListArray, ListArray};
-use arrow::buffer::OffsetBuffer;
-use arrow::datatypes::Field;
-use arrow::datatypes::UInt64Type;
-use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType};
+use arrow::compute;
+use arrow::datatypes::{
+    DataType, Date32Type, Field, IntervalMonthDayNanoType, UInt64Type,
+};
+use arrow::row::{RowConverter, SortField};
+use arrow_array::new_null_array;
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
+use arrow_schema::FieldRef;
+use arrow_schema::SortOptions;
+
 use datafusion_common::cast::{
-    as_date32_array, as_generic_list_array, as_int64_array, as_interval_mdn_array,
-    as_large_list_array, as_list_array, as_null_array, as_string_array,
+    as_date32_array, as_generic_list_array, as_generic_string_array, as_int64_array,
+    as_interval_mdn_array, as_large_list_array, as_list_array, as_null_array,
+    as_string_array,
 };
-use datafusion_common::{exec_err, not_impl_datafusion_err, DataFusionError, Result};
+use datafusion_common::{
+    exec_err, internal_err, not_impl_datafusion_err, DataFusionError, Result,
+};
+use itertools::Itertools;
 use std::any::type_name;
 use std::sync::Arc;
+
 macro_rules! downcast_arg {
     ($ARG:expr, $ARRAY_TYPE:ident) => {{
         $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
@@ -259,6 +270,98 @@ pub(super) fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
     Ok(Arc::new(string_arr))
 }
 
+/// Splits string at occurrences of delimiter and returns an array of parts
+/// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]'
+pub fn string_to_array<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() < 2 || args.len() > 3 {
+        return exec_err!("string_to_array expects two or three arguments");
+    }
+    let string_array = as_generic_string_array::<T>(&args[0])?;
+    let delimiter_array = as_generic_string_array::<T>(&args[1])?;
+
+    let mut list_builder = ListBuilder::new(StringBuilder::with_capacity(
+        string_array.len(),
+        string_array.get_buffer_memory_size(),
+    ));
+
+    match args.len() {
+        2 => {
+            string_array.iter().zip(delimiter_array.iter()).for_each(
+                |(string, delimiter)| {
+                    match (string, delimiter) {
+                        (Some(string), Some("")) => {
+                            list_builder.values().append_value(string);
+                            list_builder.append(true);
+                        }
+                        (Some(string), Some(delimiter)) => {
+                            string.split(delimiter).for_each(|s| {
+                                list_builder.values().append_value(s);
+                            });
+                            list_builder.append(true);
+                        }
+                        (Some(string), None) => {
+                            string.chars().map(|c| c.to_string()).for_each(|c| {
+                                list_builder.values().append_value(c);
+                            });
+                            list_builder.append(true);
+                        }
+                        _ => list_builder.append(false), // null value
+                    }
+                },
+            );
+        }
+
+        3 => {
+            let null_value_array = as_generic_string_array::<T>(&args[2])?;
+            string_array
+                .iter()
+                .zip(delimiter_array.iter())
+                .zip(null_value_array.iter())
+                .for_each(|((string, delimiter), null_value)| {
+                    match (string, delimiter) {
+                        (Some(string), Some("")) => {
+                            if Some(string) == null_value {
+                                list_builder.values().append_null();
+                            } else {
+                                list_builder.values().append_value(string);
+                            }
+                            list_builder.append(true);
+                        }
+                        (Some(string), Some(delimiter)) => {
+                            string.split(delimiter).for_each(|s| {
+                                if Some(s) == null_value {
+                                    list_builder.values().append_null();
+                                } else {
+                                    list_builder.values().append_value(s);
+                                }
+                            });
+                            list_builder.append(true);
+                        }
+                        (Some(string), None) => {
+                            string.chars().map(|c| c.to_string()).for_each(|c| {
+                                if Some(c.as_str()) == null_value {
+                                    list_builder.values().append_null();
+                                } else {
+                                    list_builder.values().append_value(c);
+                                }
+                            });
+                            list_builder.append(true);
+                        }
+                        _ => list_builder.append(false), // null value
+                    }
+                });
+        }
+        _ => {
+            return exec_err!(
+                "Expect string_to_array function to take two or three parameters"
+            )
+        }
+    }
+
+    let list_array = list_builder.finish();
+    Ok(Arc::new(list_array) as ArrayRef)
+}
+
 /// Generates an array of integers from start to stop with a given step.
 ///
 /// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values.
@@ -291,39 +394,66 @@ pub(super) fn gen_range(args: &[ArrayRef], include_upper: bool) -> Result<ArrayR
 
     let mut values = vec![];
     let mut offsets = vec![0];
+    let mut valid = BooleanBufferBuilder::new(stop_array.len());
     for (idx, stop) in stop_array.iter().enumerate() {
-        let stop = stop.unwrap_or(0);
-        let start = start_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(0);
-        let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1);
-        if step == 0 {
-            return exec_err!(
-                "step can't be 0 for function {}(start [, stop, step])",
-                if include_upper {
-                    "generate_series"
-                } else {
-                    "range"
-                }
-            );
-        }
-        // Below, we utilize `usize` to represent steps.
-        // On 32-bit targets, the absolute value of `i64` may fail to fit into `usize`.
-        let step_abs = usize::try_from(step.unsigned_abs()).map_err(|_| {
-            not_impl_datafusion_err!("step {} can't fit into usize", step)
-        })?;
-        values.extend(
-            gen_range_iter(start, stop, step < 0, include_upper).step_by(step_abs),
-        );
-        offsets.push(values.len() as i32);
+        match retrieve_range_args(start_array, stop, step_array, idx) {
+            Some((_, _, 0)) => {
+                return exec_err!(
+                    "step can't be 0 for function {}(start [, stop, step])",
+                    if include_upper {
+                        "generate_series"
+                    } else {
+                        "range"
+                    }
+                );
+            }
+            Some((start, stop, step)) => {
+                // Below, we utilize `usize` to represent steps.
+                // On 32-bit targets, the absolute value of `i64` may fail to fit into `usize`.
+                let step_abs = usize::try_from(step.unsigned_abs()).map_err(|_| {
+                    not_impl_datafusion_err!("step {} can't fit into usize", step)
+                })?;
+                values.extend(
+                    gen_range_iter(start, stop, step < 0, include_upper)
+                        .step_by(step_abs),
+                );
+                offsets.push(values.len() as i32);
+                valid.append(true);
+            }
+            // If any of the arguments is NULL, append a NULL value to the result.
+            None => {
+                offsets.push(values.len() as i32);
+                valid.append(false);
+            }
+        };
     }
     let arr = Arc::new(ListArray::try_new(
         Arc::new(Field::new("item", DataType::Int64, true)),
         OffsetBuffer::new(offsets.into()),
         Arc::new(Int64Array::from(values)),
-        None,
+        Some(NullBuffer::new(valid.finish())),
     )?);
     Ok(arr)
 }
 
+/// Get the (start, stop, step) args for the range and generate_series function.
+/// If any of the arguments is NULL, returns None.
+fn retrieve_range_args(
+    start_array: Option<&Int64Array>,
+    stop: Option<i64>,
+    step_array: Option<&Int64Array>,
+    idx: usize,
+) -> Option<(i64, i64, i64)> {
+    // Default start value is 0 if not provided
+    let start =
+        start_array.map_or(Some(0), |arr| arr.is_valid(idx).then(|| arr.value(idx)))?;
+    let stop = stop?;
+    // Default step value is 1 if not provided
+    let step =
+        step_array.map_or(Some(1), |arr| arr.is_valid(idx).then(|| arr.value(idx)))?;
+    Some((start, stop, step))
+}
+
 /// Returns an iterator of i64 values from start to stop
 fn gen_range_iter(
     start: i64,
@@ -604,6 +734,152 @@ fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayR
     Ok(Arc::new(result) as ArrayRef)
 }
 
+/// Array_repeat SQL function
+pub fn array_repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return exec_err!("array_repeat expects two arguments");
+    }
+
+    let element = &args[0];
+    let count_array = as_int64_array(&args[1])?;
+
+    match element.data_type() {
+        DataType::List(_) => {
+            let list_array = as_list_array(element)?;
+            general_list_repeat::<i32>(list_array, count_array)
+        }
+        DataType::LargeList(_) => {
+            let list_array = as_large_list_array(element)?;
+            general_list_repeat::<i64>(list_array, count_array)
+        }
+        _ => general_repeat::<i32>(element, count_array),
+    }
+}
+
+/// For each element of `array[i]` repeat `count_array[i]` times.
+///
+/// Assumption for the input:
+///     1. `count[i] >= 0`
+///     2. `array.len() == count_array.len()`
+///
+/// For example,
+/// ```text
+/// array_repeat(
+///     [1, 2, 3], [2, 0, 1] => [[1, 1], [], [3]]
+/// )
+/// ```
+fn general_repeat<O: OffsetSizeTrait>(
+    array: &ArrayRef,
+    count_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let data_type = array.data_type();
+    let mut new_values = vec![];
+
+    let count_vec = count_array
+        .values()
+        .to_vec()
+        .iter()
+        .map(|x| *x as usize)
+        .collect::<Vec<_>>();
+
+    for (row_index, &count) in count_vec.iter().enumerate() {
+        let repeated_array = if array.is_null(row_index) {
+            new_null_array(data_type, count)
+        } else {
+            let original_data = array.to_data();
+            let capacity = Capacities::Array(count);
+            let mut mutable =
+                MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+
+            for _ in 0..count {
+                mutable.extend(0, row_index, row_index + 1);
+            }
+
+            let data = mutable.freeze();
+            arrow_array::make_array(data)
+        };
+        new_values.push(repeated_array);
+    }
+
+    let new_values: Vec<_> = new_values.iter().map(|a| a.as_ref()).collect();
+    let values = compute::concat(&new_values)?;
+
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        Arc::new(Field::new("item", data_type.to_owned(), true)),
+        OffsetBuffer::from_lengths(count_vec),
+        values,
+        None,
+    )?))
+}
+
+/// Handle List version of `general_repeat`
+///
+/// For each element of `list_array[i]` repeat `count_array[i]` times.
+///
+/// For example,
+/// ```text
+/// array_repeat(
+///     [[1, 2, 3], [4, 5], [6]], [2, 0, 1] => [[[1, 2, 3], [1, 2, 3]], [], [[6]]]
+/// )
+/// ```
+fn general_list_repeat<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    count_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let data_type = list_array.data_type();
+    let value_type = list_array.value_type();
+    let mut new_values = vec![];
+
+    let count_vec = count_array
+        .values()
+        .to_vec()
+        .iter()
+        .map(|x| *x as usize)
+        .collect::<Vec<_>>();
+
+    for (list_array_row, &count) in list_array.iter().zip(count_vec.iter()) {
+        let list_arr = match list_array_row {
+            Some(list_array_row) => {
+                let original_data = list_array_row.to_data();
+                let capacity = Capacities::Array(original_data.len() * count);
+                let mut mutable = MutableArrayData::with_capacities(
+                    vec![&original_data],
+                    false,
+                    capacity,
+                );
+
+                for _ in 0..count {
+                    mutable.extend(0, 0, original_data.len());
+                }
+
+                let data = mutable.freeze();
+                let repeated_array = arrow_array::make_array(data);
+
+                let list_arr = GenericListArray::<O>::try_new(
+                    Arc::new(Field::new("item", value_type.clone(), true)),
+                    OffsetBuffer::<O>::from_lengths(vec![original_data.len(); count]),
+                    repeated_array,
+                    None,
+                )?;
+                Arc::new(list_arr) as ArrayRef
+            }
+            None => new_null_array(data_type, count),
+        };
+        new_values.push(list_arr);
+    }
+
+    let lengths = new_values.iter().map(|a| a.len()).collect::<Vec<_>>();
+    let new_values: Vec<_> = new_values.iter().map(|a| a.as_ref()).collect();
+    let values = compute::concat(&new_values)?;
+
+    Ok(Arc::new(ListArray::try_new(
+        Arc::new(Field::new("item", data_type.to_owned(), true)),
+        OffsetBuffer::<i32>::from_lengths(lengths),
+        values,
+        None,
+    )?))
+}
+
 /// Array_length SQL function
 pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 1 && args.len() != 2 {
@@ -617,6 +893,89 @@ pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
+/// Array_sort SQL function
+pub fn array_sort(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.is_empty() || args.len() > 3 {
+        return exec_err!("array_sort expects one to three arguments");
+    }
+
+    let sort_option = match args.len() {
+        1 => None,
+        2 => {
+            let sort = as_string_array(&args[1])?.value(0);
+            Some(SortOptions {
+                descending: order_desc(sort)?,
+                nulls_first: true,
+            })
+        }
+        3 => {
+            let sort = as_string_array(&args[1])?.value(0);
+            let nulls_first = as_string_array(&args[2])?.value(0);
+            Some(SortOptions {
+                descending: order_desc(sort)?,
+                nulls_first: order_nulls_first(nulls_first)?,
+            })
+        }
+        _ => return exec_err!("array_sort expects 1 to 3 arguments"),
+    };
+
+    let list_array = as_list_array(&args[0])?;
+    let row_count = list_array.len();
+
+    let mut array_lengths = vec![];
+    let mut arrays = vec![];
+    let mut valid = BooleanBufferBuilder::new(row_count);
+    for i in 0..row_count {
+        if list_array.is_null(i) {
+            array_lengths.push(0);
+            valid.append(false);
+        } else {
+            let arr_ref = list_array.value(i);
+            let arr_ref = arr_ref.as_ref();
+
+            let sorted_array = compute::sort(arr_ref, sort_option)?;
+            array_lengths.push(sorted_array.len());
+            arrays.push(sorted_array);
+            valid.append(true);
+        }
+    }
+
+    // Assume all arrays have the same data type
+    let data_type = list_array.value_type();
+    let buffer = valid.finish();
+
+    let elements = arrays
+        .iter()
+        .map(|a| a.as_ref())
+        .collect::<Vec<&dyn Array>>();
+
+    let list_arr = ListArray::new(
+        Arc::new(Field::new("item", data_type, true)),
+        OffsetBuffer::from_lengths(array_lengths),
+        Arc::new(compute::concat(elements.as_slice())?),
+        Some(NullBuffer::new(buffer)),
+    );
+    Ok(Arc::new(list_arr))
+}
+
+fn order_desc(modifier: &str) -> Result<bool> {
+    match modifier.to_uppercase().as_str() {
+        "DESC" => Ok(true),
+        "ASC" => Ok(false),
+        _ => exec_err!("the second parameter of array_sort expects DESC or ASC"),
+    }
+}
+
+fn order_nulls_first(modifier: &str) -> Result<bool> {
+    match modifier.to_uppercase().as_str() {
+        "NULLS FIRST" => Ok(true),
+        "NULLS LAST" => Ok(false),
+        _ => exec_err!(
+            "the third parameter of array_sort expects NULLS FIRST or NULLS LAST"
+        ),
+    }
+}
+
 // Create new offsets that are euqiavlent to `flatten` the array.
 fn get_offsets_for_flatten<O: OffsetSizeTrait>(
     offsets: OffsetBuffer<O>,
@@ -685,3 +1044,65 @@ pub fn flatten(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
     }
 }
+
+/// array_distinct SQL function
+/// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
+pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 1 {
+        return exec_err!("array_distinct needs one argument");
+    }
+
+    // handle null
+    if args[0].data_type() == &DataType::Null {
+        return Ok(args[0].clone());
+    }
+
+    // handle for list & largelist
+    match args[0].data_type() {
+        DataType::List(field) => {
+            let array = as_list_array(&args[0])?;
+            general_array_distinct(array, field)
+        }
+        DataType::LargeList(field) => {
+            let array = as_large_list_array(&args[0])?;
+            general_array_distinct(array, field)
+        }
+        array_type => exec_err!("array_distinct does not support type '{array_type:?}'"),
+    }
+}
+
+pub fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
+    array: &GenericListArray<OffsetSize>,
+    field: &FieldRef,
+) -> Result<ArrayRef> {
+    let dt = array.value_type();
+    let mut offsets = Vec::with_capacity(array.len());
+    offsets.push(OffsetSize::usize_as(0));
+    let mut new_arrays = Vec::with_capacity(array.len());
+    let converter = RowConverter::new(vec![SortField::new(dt)])?;
+    // distinct for each list in ListArray
+    for arr in array.iter().flatten() {
+        let values = converter.convert_columns(&[arr])?;
+        // sort elements in list and remove duplicates
+        let rows = values.iter().sorted().dedup().collect::<Vec<_>>();
+        let last_offset: OffsetSize = offsets.last().copied().unwrap();
+        offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
+        let arrays = converter.convert_rows(rows)?;
+        let array = match arrays.first() {
+            Some(array) => array.clone(),
+            None => {
+                return internal_err!("array_distinct: failed to get array from rows")
+            }
+        };
+        new_arrays.push(array);
+    }
+    let offsets = OffsetBuffer::new(offsets.into());
+    let new_arrays_ref = new_arrays.iter().map(|v| v.as_ref()).collect::<Vec<_>>();
+    let values = compute::concat(&new_arrays_ref)?;
+    Ok(Arc::new(GenericListArray::<OffsetSize>::try_new(
+        field.clone(),
+        offsets,
+        values,
+        None,
+    )?))
+}
diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs
index 73055966ee46..31b971a42297 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -29,7 +29,9 @@
 pub mod macros;
 
 mod array_has;
+mod concat;
 mod kernels;
+mod make_array;
 mod udf;
 mod utils;
 
@@ -44,32 +46,48 @@ pub mod expr_fn {
     pub use super::array_has::array_has;
     pub use super::array_has::array_has_all;
     pub use super::array_has::array_has_any;
+    pub use super::concat::array_append;
+    pub use super::concat::array_concat;
+    pub use super::concat::array_prepend;
+    pub use super::make_array::make_array;
     pub use super::udf::array_dims;
+    pub use super::udf::array_distinct;
     pub use super::udf::array_empty;
     pub use super::udf::array_length;
     pub use super::udf::array_ndims;
+    pub use super::udf::array_repeat;
+    pub use super::udf::array_sort;
     pub use super::udf::array_to_string;
     pub use super::udf::cardinality;
     pub use super::udf::flatten;
     pub use super::udf::gen_series;
     pub use super::udf::range;
+    pub use super::udf::string_to_array;
 }
 
 /// Registers all enabled packages with a [`FunctionRegistry`]
 pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
     let functions: Vec<Arc<ScalarUDF>> = vec![
         udf::array_to_string_udf(),
+        udf::string_to_array_udf(),
         udf::range_udf(),
         udf::gen_series_udf(),
         udf::array_dims_udf(),
         udf::cardinality_udf(),
         udf::array_ndims_udf(),
+        concat::array_append_udf(),
+        concat::array_prepend_udf(),
+        concat::array_concat_udf(),
+        make_array::make_array_udf(),
         array_has::array_has_udf(),
         array_has::array_has_all_udf(),
         array_has::array_has_any_udf(),
         udf::array_empty_udf(),
         udf::array_length_udf(),
         udf::flatten_udf(),
+        udf::array_sort_udf(),
+        udf::array_distinct_udf(),
+        udf::array_repeat_udf(),
     ];
     functions.into_iter().try_for_each(|udf| {
         let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions-array/src/macros.rs b/datafusion/functions-array/src/macros.rs
index c503fde05b18..c49f5830b8d5 100644
--- a/datafusion/functions-array/src/macros.rs
+++ b/datafusion/functions-array/src/macros.rs
@@ -76,4 +76,34 @@ macro_rules! make_udf_function {
             }
         }
     };
+    ($UDF:ty, $EXPR_FN:ident, $DOC:expr , $SCALAR_UDF_FN:ident) => {
+        paste::paste! {
+            // "fluent expr_fn" style function
+            #[doc = $DOC]
+            pub fn $EXPR_FN(arg: Vec<Expr>) -> Expr {
+                Expr::ScalarFunction(ScalarFunction::new_udf(
+                    $SCALAR_UDF_FN(),
+                    arg,
+                ))
+            }
+
+            /// Singleton instance of [`$UDF`], ensures the UDF is only created once
+            /// named STATIC_$(UDF). For example `STATIC_ArrayToString`
+            #[allow(non_upper_case_globals)]
+            static [< STATIC_ $UDF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
+                std::sync::OnceLock::new();
+            /// ScalarFunction that returns a [`ScalarUDF`] for [`$UDF`]
+            ///
+            /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+            pub fn $SCALAR_UDF_FN() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
+                [< STATIC_ $UDF >]
+                    .get_or_init(|| {
+                        std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
+                            <$UDF>::new(),
+                        ))
+                    })
+                    .clone()
+            }
+        }
+    };
 }
diff --git a/datafusion/functions-array/src/make_array.rs b/datafusion/functions-array/src/make_array.rs
new file mode 100644
index 000000000000..a371ea767b15
--- /dev/null
+++ b/datafusion/functions-array/src/make_array.rs
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// core array function like `make_array`
+
+use std::{any::Any, sync::Arc};
+
+use arrow::array::{ArrayData, Capacities, MutableArrayData};
+use arrow_array::{
+    new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait,
+};
+use arrow_buffer::OffsetBuffer;
+use arrow_schema::{DataType, Field};
+use datafusion_common::Result;
+use datafusion_common::{plan_err, utils::array_into_list_array};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::Expr;
+use datafusion_expr::{
+    ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
+};
+
+use crate::utils::make_scalar_function;
+
+make_udf_function!(
+    MakeArray,
+    make_array,
+    "Returns an Arrow array using the specified input expressions.",
+    make_array_udf
+);
+
+#[derive(Debug)]
+pub struct MakeArray {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl MakeArray {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![TypeSignature::VariadicEqual, TypeSignature::Any(0)],
+                Volatility::Immutable,
+            ),
+            aliases: vec![String::from("make_array"), String::from("make_list")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for MakeArray {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "make_array"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
+        match arg_types.len() {
+            0 => Ok(DataType::List(Arc::new(Field::new(
+                "item",
+                DataType::Null,
+                true,
+            )))),
+            _ => {
+                let mut expr_type = DataType::Null;
+                for arg_type in arg_types {
+                    if !arg_type.equals_datatype(&DataType::Null) {
+                        expr_type = arg_type.clone();
+                        break;
+                    }
+                }
+
+                Ok(DataType::List(Arc::new(Field::new(
+                    "item", expr_type, true,
+                ))))
+            }
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result<ColumnarValue> {
+        make_scalar_function(make_array_inner)(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+/// `make_array` SQL function
+pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
+    let mut data_type = DataType::Null;
+    for arg in arrays {
+        let arg_data_type = arg.data_type();
+        if !arg_data_type.equals_datatype(&DataType::Null) {
+            data_type = arg_data_type.clone();
+            break;
+        }
+    }
+
+    match data_type {
+        // Either an empty array or all nulls:
+        DataType::Null => {
+            let array =
+                new_null_array(&DataType::Null, arrays.iter().map(|a| a.len()).sum());
+            Ok(Arc::new(array_into_list_array(array)))
+        }
+        DataType::LargeList(..) => array_array::<i64>(arrays, data_type),
+        _ => array_array::<i32>(arrays, data_type),
+    }
+}
+
+/// Convert one or more [`ArrayRef`] of the same type into a
+/// `ListArray` or 'LargeListArray' depending on the offset size.
+///
+/// # Example (non nested)
+///
+/// Calling `array(col1, col2)` where col1 and col2 are non nested
+/// would return a single new `ListArray`, where each row was a list
+/// of 2 elements:
+///
+/// ```text
+/// ┌─────────┐   ┌─────────┐           ┌──────────────┐
+/// │ ┌─────┐ │   │ ┌─────┐ │           │ ┌──────────┐ │
+/// │ │  A  │ │   │ │  X  │ │           │ │  [A, X]  │ │
+/// │ ├─────┤ │   │ ├─────┤ │           │ ├──────────┤ │
+/// │ │NULL │ │   │ │  Y  │ │──────────▶│ │[NULL, Y] │ │
+/// │ ├─────┤ │   │ ├─────┤ │           │ ├──────────┤ │
+/// │ │  C  │ │   │ │  Z  │ │           │ │  [C, Z]  │ │
+/// │ └─────┘ │   │ └─────┘ │           │ └──────────┘ │
+/// └─────────┘   └─────────┘           └──────────────┘
+///   col1           col2                    output
+/// ```
+///
+/// # Example (nested)
+///
+/// Calling `array(col1, col2)` where col1 and col2 are lists
+/// would return a single new `ListArray`, where each row was a list
+/// of the corresponding elements of col1 and col2.
+///
+/// ``` text
+/// ┌──────────────┐   ┌──────────────┐        ┌─────────────────────────────┐
+/// │ ┌──────────┐ │   │ ┌──────────┐ │        │ ┌────────────────────────┐  │
+/// │ │  [A, X]  │ │   │ │    []    │ │        │ │    [[A, X], []]        │  │
+/// │ ├──────────┤ │   │ ├──────────┤ │        │ ├────────────────────────┤  │
+/// │ │[NULL, Y] │ │   │ │[Q, R, S] │ │───────▶│ │ [[NULL, Y], [Q, R, S]] │  │
+/// │ ├──────────┤ │   │ ├──────────┤ │        │ ├────────────────────────│  │
+/// │ │  [C, Z]  │ │   │ │   NULL   │ │        │ │    [[C, Z], NULL]      │  │
+/// │ └──────────┘ │   │ └──────────┘ │        │ └────────────────────────┘  │
+/// └──────────────┘   └──────────────┘        └─────────────────────────────┘
+///      col1               col2                         output
+/// ```
+fn array_array<O: OffsetSizeTrait>(
+    args: &[ArrayRef],
+    data_type: DataType,
+) -> Result<ArrayRef> {
+    // do not accept 0 arguments.
+    if args.is_empty() {
+        return plan_err!("Array requires at least one argument");
+    }
+
+    let mut data = vec![];
+    let mut total_len = 0;
+    for arg in args {
+        let arg_data = if arg.as_any().is::<NullArray>() {
+            ArrayData::new_empty(&data_type)
+        } else {
+            arg.to_data()
+        };
+        total_len += arg_data.len();
+        data.push(arg_data);
+    }
+
+    let mut offsets: Vec<O> = Vec::with_capacity(total_len);
+    offsets.push(O::usize_as(0));
+
+    let capacity = Capacities::Array(total_len);
+    let data_ref = data.iter().collect::<Vec<_>>();
+    let mut mutable = MutableArrayData::with_capacities(data_ref, true, capacity);
+
+    let num_rows = args[0].len();
+    for row_idx in 0..num_rows {
+        for (arr_idx, arg) in args.iter().enumerate() {
+            if !arg.as_any().is::<NullArray>()
+                && !arg.is_null(row_idx)
+                && arg.is_valid(row_idx)
+            {
+                mutable.extend(arr_idx, row_idx, row_idx + 1);
+            } else {
+                mutable.extend_nulls(1);
+            }
+        }
+        offsets.push(O::usize_as(mutable.len()));
+    }
+    let data = mutable.freeze();
+
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        Arc::new(Field::new("item", data_type, true)),
+        OffsetBuffer::new(offsets.into()),
+        arrow_array::make_array(data),
+        None,
+    )?))
+}
diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs
index b2c310e1701d..9fd9e0309bde 100644
--- a/datafusion/functions-array/src/udf.rs
+++ b/datafusion/functions-array/src/udf.rs
@@ -17,15 +17,17 @@
 
 //! [`ScalarUDFImpl`] definitions for array functions.
 
+use arrow::array::{NullArray, StringArray};
 use arrow::datatypes::DataType;
 use arrow::datatypes::Field;
 use arrow::datatypes::IntervalUnit::MonthDayNano;
+use arrow_schema::DataType::{LargeUtf8, List, Utf8};
 use datafusion_common::exec_err;
 use datafusion_common::plan_err;
 use datafusion_common::Result;
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::Expr;
-use datafusion_expr::TypeSignature::Exact;
+use datafusion_expr::TypeSignature;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
@@ -89,6 +91,81 @@ impl ScalarUDFImpl for ArrayToString {
     }
 }
 
+make_udf_function!(StringToArray,
+    string_to_array,
+    string delimiter null_string, // arg name
+    "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`", // doc
+    string_to_array_udf // internal function name
+);
+#[derive(Debug)]
+pub(super) struct StringToArray {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl StringToArray {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![
+                String::from("string_to_array"),
+                String::from("string_to_list"),
+            ],
+        }
+    }
+}
+
+impl ScalarUDFImpl for StringToArray {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "string_to_array"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        Ok(match arg_types[0] {
+            Utf8 | LargeUtf8 => {
+                List(Arc::new(Field::new("item", arg_types[0].clone(), true)))
+            }
+            _ => {
+                return plan_err!(
+                    "The string_to_array function can only accept Utf8 or LargeUtf8."
+                );
+            }
+        })
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let mut args = ColumnarValue::values_to_arrays(args)?;
+        // Case: delimiter is NULL, needs to be handled as well.
+        if args[1].as_any().is::<NullArray>() {
+            args[1] = Arc::new(StringArray::new_null(args[1].len()));
+        };
+
+        match args[0].data_type() {
+            Utf8 => {
+                crate::kernels::string_to_array::<i32>(&args).map(ColumnarValue::Array)
+            }
+            LargeUtf8 => {
+                crate::kernels::string_to_array::<i64>(&args).map(ColumnarValue::Array)
+            }
+            other => {
+                exec_err!("unsupported type for string_to_array function as {other}")
+            }
+        }
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
 make_udf_function!(
     Range,
     range,
@@ -107,10 +184,10 @@ impl Range {
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Int64]),
-                    Exact(vec![Int64, Int64]),
-                    Exact(vec![Int64, Int64, Int64]),
-                    Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
+                    TypeSignature::Exact(vec![Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
+                    TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
                 ],
                 Volatility::Immutable,
             ),
@@ -177,10 +254,10 @@ impl GenSeries {
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Int64]),
-                    Exact(vec![Int64, Int64]),
-                    Exact(vec![Int64, Int64, Int64]),
-                    Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
+                    TypeSignature::Exact(vec![Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64]),
+                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
+                    TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
                 ],
                 Volatility::Immutable,
             ),
@@ -286,6 +363,70 @@ impl ScalarUDFImpl for ArrayDims {
     }
 }
 
+make_udf_function!(
+    ArraySort,
+    array_sort,
+    array desc null_first,
+    "returns sorted array.",
+    array_sort_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArraySort {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArraySort {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec!["array_sort".to_string(), "list_sort".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArraySort {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_sort"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        match &arg_types[0] {
+            List(field) | FixedSizeList(field, _) => Ok(List(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            LargeList(field) => Ok(LargeList(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            _ => exec_err!(
+                "Not reachable, data_type should be List, LargeList or FixedSizeList"
+            ),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_sort(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
 make_udf_function!(
     Cardinality,
     cardinality,
@@ -448,6 +589,58 @@ impl ScalarUDFImpl for ArrayEmpty {
     }
 }
 
+make_udf_function!(
+    ArrayRepeat,
+    array_repeat,
+    element count, // arg name
+    "returns an array containing element `count` times.", // doc
+    array_repeat_udf // internal function name
+);
+#[derive(Debug)]
+pub(super) struct ArrayRepeat {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl ArrayRepeat {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+            aliases: vec![String::from("array_repeat"), String::from("list_repeat")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrayRepeat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_repeat"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(List(Arc::new(Field::new(
+            "item",
+            arg_types[0].clone(),
+            true,
+        ))))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_repeat(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
 make_udf_function!(
     ArrayLength,
     array_length,
@@ -569,3 +762,67 @@ impl ScalarUDFImpl for Flatten {
         &self.aliases
     }
 }
+
+make_udf_function!(
+    ArrayDistinct,
+    array_distinct,
+    array,
+    "return distinct values from the array after removing duplicates.",
+    array_distinct_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayDistinct {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl crate::udf::ArrayDistinct {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::array(Volatility::Immutable),
+            aliases: vec!["array_distinct".to_string(), "list_distinct".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for crate::udf::ArrayDistinct {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "array_distinct"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+        match &arg_types[0] {
+            List(field) | FixedSizeList(field, _) => Ok(List(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            LargeList(field) => Ok(LargeList(Arc::new(Field::new(
+                "item",
+                field.data_type().clone(),
+                true,
+            )))),
+            _ => exec_err!(
+                "Not reachable, data_type should be List, LargeList or FixedSizeList"
+            ),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+        crate::kernels::array_distinct(&args).map(ColumnarValue::Array)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
diff --git a/datafusion/functions-array/src/utils.rs b/datafusion/functions-array/src/utils.rs
index d374a9f66be0..3a6bb723c1fa 100644
--- a/datafusion/functions-array/src/utils.rs
+++ b/datafusion/functions-array/src/utils.rs
@@ -17,8 +17,14 @@
 
 //! array function utils
 
+use std::sync::Arc;
+
 use arrow::{array::ArrayRef, datatypes::DataType};
-use datafusion_common::{plan_err, Result};
+use arrow_array::{GenericListArray, OffsetSizeTrait};
+use arrow_buffer::OffsetBuffer;
+use arrow_schema::Field;
+use datafusion_common::{plan_err, Result, ScalarValue};
+use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
 
 pub(crate) fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> {
     let data_type = args[0].data_type();
@@ -32,3 +38,124 @@ pub(crate) fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> {
 
     Ok(())
 }
+
+pub(crate) fn make_scalar_function<F>(inner: F) -> ScalarFunctionImplementation
+where
+    F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
+{
+    Arc::new(move |args: &[ColumnarValue]| {
+        // first, identify if any of the arguments is an Array. If yes, store its `len`,
+        // as any scalar will need to be converted to an array of len `len`.
+        let len = args
+            .iter()
+            .fold(Option::<usize>::None, |acc, arg| match arg {
+                ColumnarValue::Scalar(_) => acc,
+                ColumnarValue::Array(a) => Some(a.len()),
+            });
+
+        let is_scalar = len.is_none();
+
+        let args = ColumnarValue::values_to_arrays(args)?;
+
+        let result = (inner)(&args);
+
+        if is_scalar {
+            // If all inputs are scalar, keeps output as scalar
+            let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
+            result.map(ColumnarValue::Scalar)
+        } else {
+            result.map(ColumnarValue::Array)
+        }
+    })
+}
+
+pub(crate) fn align_array_dimensions<O: OffsetSizeTrait>(
+    args: Vec<ArrayRef>,
+) -> Result<Vec<ArrayRef>> {
+    let args_ndim = args
+        .iter()
+        .map(|arg| datafusion_common::utils::list_ndims(arg.data_type()))
+        .collect::<Vec<_>>();
+    let max_ndim = args_ndim.iter().max().unwrap_or(&0);
+
+    // Align the dimensions of the arrays
+    let aligned_args: Result<Vec<ArrayRef>> = args
+        .into_iter()
+        .zip(args_ndim.iter())
+        .map(|(array, ndim)| {
+            if ndim < max_ndim {
+                let mut aligned_array = array.clone();
+                for _ in 0..(max_ndim - ndim) {
+                    let data_type = aligned_array.data_type().to_owned();
+                    let array_lengths = vec![1; aligned_array.len()];
+                    let offsets = OffsetBuffer::<O>::from_lengths(array_lengths);
+
+                    aligned_array = Arc::new(GenericListArray::<O>::try_new(
+                        Arc::new(Field::new("item", data_type, true)),
+                        offsets,
+                        aligned_array,
+                        None,
+                    )?)
+                }
+                Ok(aligned_array)
+            } else {
+                Ok(array.clone())
+            }
+        })
+        .collect();
+
+    aligned_args
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::Int64Type;
+    use arrow_array::ListArray;
+    use datafusion_common::{cast::as_list_array, utils::array_into_list_array};
+
+    /// Only test internal functions, array-related sql functions will be tested in sqllogictest `array.slt`
+    #[test]
+    fn test_align_array_dimensions() {
+        let array1d_1 =
+            Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
+                Some(vec![Some(1), Some(2), Some(3)]),
+                Some(vec![Some(4), Some(5)]),
+            ]));
+        let array1d_2 =
+            Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
+                Some(vec![Some(6), Some(7), Some(8)]),
+            ]));
+
+        let array2d_1 = Arc::new(array_into_list_array(array1d_1.clone())) as ArrayRef;
+        let array2d_2 = Arc::new(array_into_list_array(array1d_2.clone())) as ArrayRef;
+
+        let res = align_array_dimensions::<i32>(vec![
+            array1d_1.to_owned(),
+            array2d_2.to_owned(),
+        ])
+        .unwrap();
+
+        let expected = as_list_array(&array2d_1).unwrap();
+        let expected_dim = datafusion_common::utils::list_ndims(array2d_1.data_type());
+        assert_ne!(as_list_array(&res[0]).unwrap(), expected);
+        assert_eq!(
+            datafusion_common::utils::list_ndims(res[0].data_type()),
+            expected_dim
+        );
+
+        let array3d_1 = Arc::new(array_into_list_array(array2d_1)) as ArrayRef;
+        let array3d_2 = array_into_list_array(array2d_2.to_owned());
+        let res =
+            align_array_dimensions::<i32>(vec![array1d_1, Arc::new(array3d_2.clone())])
+                .unwrap();
+
+        let expected = as_list_array(&array3d_1).unwrap();
+        let expected_dim = datafusion_common::utils::list_ndims(array3d_1.data_type());
+        assert_ne!(as_list_array(&res[0]).unwrap(), expected);
+        assert_eq!(
+            datafusion_common::utils::list_ndims(res[0].data_type()),
+            expected_dim
+        );
+    }
+}
diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs
new file mode 100644
index 000000000000..89702d3267ec
--- /dev/null
+++ b/datafusion/functions/src/core/arrowtypeof.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct ArrowTypeOfFunc {
+    signature: Signature,
+}
+
+impl ArrowTypeOfFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::any(1, Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrowTypeOfFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "arrow_typeof"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Utf8)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.len() != 1 {
+            return exec_err!(
+                "arrow_typeof function requires 1 arguments, got {}",
+                args.len()
+            );
+        }
+
+        let input_data_type = args[0].data_type();
+        Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
+            "{input_data_type}"
+        ))))
+    }
+}
diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 842a1db3e0d0..3f13067a4a07 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -17,18 +17,24 @@
 
 //! "core" DataFusion functions
 
+mod arrowtypeof;
 mod nullif;
 mod nvl;
 mod nvl2;
+pub mod r#struct;
 
 // create UDFs
 make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
 make_udf_function!(nvl::NVLFunc, NVL, nvl);
 make_udf_function!(nvl2::NVL2Func, NVL2, nvl2);
+make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof);
+make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
 
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(
     (nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression."),
     (nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns value1"),
-    (nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3.")
+    (nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3."),
+    (arrow_typeof, arg_1, "Returns the Arrow type of the input expression."),
+    (r#struct, args, "Returns a struct with the given arguments")
 );
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 3ff8dbd942ff..1e903d7a881d 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Encoding expressions
-
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
diff --git a/datafusion/physical-expr/src/struct_expressions.rs b/datafusion/functions/src/core/struct.rs
similarity index 73%
rename from datafusion/physical-expr/src/struct_expressions.rs
rename to datafusion/functions/src/core/struct.rs
index f420e062ef91..6236f98794bb 100644
--- a/datafusion/physical-expr/src/struct_expressions.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Struct expressions
-
-use arrow::array::*;
-use arrow::datatypes::Field;
+use arrow::datatypes::{DataType, Field, Fields};
+use arrow_array::{ArrayRef, StructArray};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
 use std::sync::Arc;
 
 fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -47,10 +47,9 @@ fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     Ok(Arc::new(StructArray::from(vec)))
 }
-
 /// put values in a struct array.
-pub fn struct_expr(values: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arrays = values
+fn struct_expr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    let arrays = args
         .iter()
         .map(|x| {
             Ok(match x {
@@ -61,10 +60,55 @@ pub fn struct_expr(values: &[ColumnarValue]) -> Result<ColumnarValue> {
         .collect::<Result<Vec<ArrayRef>>>()?;
     Ok(ColumnarValue::Array(array_struct(arrays.as_slice())?))
 }
+#[derive(Debug)]
+pub struct StructFunc {
+    signature: Signature,
+}
+
+impl StructFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+        }
+    }
+}
+
+impl Default for StructFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ScalarUDFImpl for StructFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "struct"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        let return_fields = arg_types
+            .iter()
+            .enumerate()
+            .map(|(pos, dt)| Field::new(format!("c{pos}"), dt.clone(), true))
+            .collect::<Vec<Field>>();
+        Ok(DataType::Struct(Fields::from(return_fields)))
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        struct_expr(args)
+    }
+}
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_array::Int64Array;
     use datafusion_common::cast::as_struct_array;
     use datafusion_common::ScalarValue;
 
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index 1984ae659a81..859964b5b8d5 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -72,7 +72,7 @@ macro_rules! make_udf_function {
         /// Return a [`ScalarUDF`] for [`$UDF`]
         ///
         /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
-        fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
+        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
             $GNAME
                 .get_or_init(|| {
                     std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 3741cc2802bb..e7ede6043a59 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -21,12 +21,16 @@ mod abs;
 mod acos;
 mod asin;
 mod nans;
+mod tan;
+mod tanh;
 
 // create  UDFs
 make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
 make_udf_function!(abs::AbsFunc, ABS, abs);
 make_udf_function!(acos::AcosFunc, ACOS, acos);
 make_udf_function!(asin::AsinFunc, ASIN, asin);
+make_udf_function!(tan::TanFunc, TAN, tan);
+make_udf_function!(tanh::TanhFunc, TANH, tanh);
 
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(
@@ -45,5 +49,7 @@ export_functions!(
         asin,
         num,
         "returns the arc sine or inverse sine of a number"
-    )
+    ),
+    (tan, num, "returns the tangent of a number"),
+    (tanh, num, "returns the hyperbolic tangent of a number")
 );
diff --git a/datafusion/functions/src/math/tan.rs b/datafusion/functions/src/math/tan.rs
new file mode 100644
index 000000000000..ea3e002f8489
--- /dev/null
+++ b/datafusion/functions/src/math/tan.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Math function: `tan()`.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::DataType;
+use arrow_array::{ArrayRef, Float32Array, Float64Array};
+use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_expr::Volatility;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature};
+
+#[derive(Debug)]
+pub struct TanFunc {
+    signature: Signature,
+}
+
+impl TanFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::uniform(
+                1,
+                vec![DataType::Float64, DataType::Float32],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for TanFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "tan"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        let arg_type = &arg_types[0];
+
+        match arg_type {
+            DataType::Float64 => Ok(DataType::Float64),
+            DataType::Float32 => Ok(DataType::Float32),
+
+            // For other types (possible values null/int), use Float 64
+            _ => Ok(DataType::Float64),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+
+        let arr: ArrayRef = match args[0].data_type() {
+            DataType::Float64 => Arc::new(make_function_scalar_inputs_return_type!(
+                &args[0],
+                self.name(),
+                Float64Array,
+                Float64Array,
+                { f64::tan }
+            )),
+            DataType::Float32 => Arc::new(make_function_scalar_inputs_return_type!(
+                &args[0],
+                self.name(),
+                Float32Array,
+                Float32Array,
+                { f32::tan }
+            )),
+            other => {
+                return exec_err!(
+                    "Unsupported data type {other:?} for function {}",
+                    self.name()
+                )
+            }
+        };
+        Ok(ColumnarValue::Array(arr))
+    }
+}
diff --git a/datafusion/functions/src/math/tanh.rs b/datafusion/functions/src/math/tanh.rs
new file mode 100644
index 000000000000..af34681919ab
--- /dev/null
+++ b/datafusion/functions/src/math/tanh.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Math function: `tanh()`.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::DataType;
+use arrow_array::{ArrayRef, Float32Array, Float64Array};
+use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_expr::Volatility;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature};
+
+#[derive(Debug)]
+pub struct TanhFunc {
+    signature: Signature,
+}
+
+impl TanhFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::uniform(
+                1,
+                vec![DataType::Float64, DataType::Float32],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for TanhFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "tanh"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        let arg_type = &arg_types[0];
+
+        match arg_type {
+            DataType::Float64 => Ok(DataType::Float64),
+            DataType::Float32 => Ok(DataType::Float32),
+
+            // For other types (possible values null/int), use Float 64
+            _ => Ok(DataType::Float64),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+
+        let arr: ArrayRef = match args[0].data_type() {
+            DataType::Float64 => Arc::new(make_function_scalar_inputs_return_type!(
+                &args[0],
+                self.name(),
+                Float64Array,
+                Float64Array,
+                { f64::tanh }
+            )),
+            DataType::Float32 => Arc::new(make_function_scalar_inputs_return_type!(
+                &args[0],
+                self.name(),
+                Float32Array,
+                Float32Array,
+                { f32::tanh }
+            )),
+            other => {
+                return exec_err!(
+                    "Unsupported data type {other:?} for function {}",
+                    self.name()
+                )
+            }
+        };
+        Ok(ColumnarValue::Array(arr))
+    }
+}
diff --git a/datafusion/optimizer/src/analyzer/rewrite_expr.rs b/datafusion/optimizer/src/analyzer/rewrite_expr.rs
index 6f856fa9bd16..99578e91183c 100644
--- a/datafusion/optimizer/src/analyzer/rewrite_expr.rs
+++ b/datafusion/optimizer/src/analyzer/rewrite_expr.rs
@@ -17,23 +17,27 @@
 
 //! Analyzer rule for to replace operators with function calls (e.g `||` to array_concat`)
 
+#[cfg(feature = "array_expressions")]
 use std::sync::Arc;
 
 use super::AnalyzerRule;
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::utils::list_ndims;
-use datafusion_common::{DFSchema, DFSchemaRef, Result};
+#[cfg(feature = "array_expressions")]
+use datafusion_common::{utils::list_ndims, DFSchemaRef};
+use datafusion_common::{DFSchema, Result};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::expr_rewriter::rewrite_preserving_name;
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::BuiltinScalarFunction;
 use datafusion_expr::GetFieldAccess;
 use datafusion_expr::GetIndexedField;
-use datafusion_expr::Operator;
-use datafusion_expr::ScalarFunctionDefinition;
-use datafusion_expr::{BinaryExpr, Expr, LogicalPlan};
+#[cfg(feature = "array_expressions")]
+use datafusion_expr::{BinaryExpr, Operator, ScalarFunctionDefinition};
+use datafusion_expr::{Expr, LogicalPlan};
+#[cfg(feature = "array_expressions")]
+use datafusion_functions_array::expr_fn::{array_append, array_concat, array_prepend};
 
 #[derive(Default)]
 pub struct OperatorToFunction {}
@@ -73,6 +77,7 @@ fn analyze_internal(plan: &LogicalPlan) -> Result<LogicalPlan> {
     }
 
     let mut expr_rewrite = OperatorToFunctionRewriter {
+        #[cfg(feature = "array_expressions")]
         schema: Arc::new(schema),
     };
 
@@ -90,6 +95,7 @@ fn analyze_internal(plan: &LogicalPlan) -> Result<LogicalPlan> {
 }
 
 pub(crate) struct OperatorToFunctionRewriter {
+    #[cfg(feature = "array_expressions")]
     pub(crate) schema: DFSchemaRef,
 }
 
@@ -97,13 +103,14 @@ impl TreeNodeRewriter for OperatorToFunctionRewriter {
     type Node = Expr;
 
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
+        #[cfg(feature = "array_expressions")]
         if let Expr::BinaryExpr(BinaryExpr {
             ref left,
             op,
             ref right,
         }) = expr
         {
-            if let Some(fun) = rewrite_array_concat_operator_to_func_for_column(
+            if let Some(expr) = rewrite_array_concat_operator_to_func_for_column(
                 left.as_ref(),
                 op,
                 right.as_ref(),
@@ -113,12 +120,7 @@ impl TreeNodeRewriter for OperatorToFunctionRewriter {
                 rewrite_array_concat_operator_to_func(left.as_ref(), op, right.as_ref())
             }) {
                 // Convert &Box<Expr> -> Expr
-                let left = (**left).clone();
-                let right = (**right).clone();
-                return Ok(Transformed::yes(Expr::ScalarFunction(ScalarFunction {
-                    func_def: ScalarFunctionDefinition::BuiltIn(fun),
-                    args: vec![left, right],
-                })));
+                return Ok(Transformed::yes(expr));
             }
 
             // TODO: change OperatorToFunction to OperatoToArrayFunction and configure it with array_expressions feature
@@ -185,16 +187,14 @@ fn rewrite_array_has_all_operator_to_func(
         // array1 <@ array2 -> array_has_all(array2, array1)
         (
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(left_fun),
                 args: _left_args,
             }),
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(right_fun),
                 args: _right_args,
             }),
-        ) => {
+        ) if left_fun.name() == "make_array" && right_fun.name() == "make_array" => {
             let left = left.clone();
             let right = right.clone();
 
@@ -220,11 +220,12 @@ fn rewrite_array_has_all_operator_to_func(
 /// 4) (arry concat, array append, array prepend) || array -> array concat
 ///
 /// 5) (arry concat, array append, array prepend) || scalar -> array append
+#[cfg(feature = "array_expressions")]
 fn rewrite_array_concat_operator_to_func(
     left: &Expr,
     op: Operator,
     right: &Expr,
-) -> Option<BuiltinScalarFunction> {
+) -> Option<Expr> {
     // Convert `Array StringConcat Array` to ScalarFunction::ArrayConcat
 
     if op != Operator::StringConcat {
@@ -236,97 +237,65 @@ fn rewrite_array_concat_operator_to_func(
         // (arry concat, array append, array prepend) || array -> array concat
         (
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayConcat),
-                args: _left_args,
-            }),
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
-                args: _right_args,
-            }),
-        )
-        | (
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayAppend),
+                func_def: ScalarFunctionDefinition::UDF(left_fun),
                 args: _left_args,
             }),
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(right_fun),
                 args: _right_args,
             }),
-        )
-        | (
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayPrepend),
-                args: _left_args,
-            }),
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
-                args: _right_args,
-            }),
-        ) => Some(BuiltinScalarFunction::ArrayConcat),
+        ) if ["array_append", "array_prepend", "array_concat"]
+            .contains(&left_fun.name())
+            && right_fun.name() == "make_array" =>
+        {
+            Some(array_concat(vec![left.clone(), right.clone()]))
+        }
         // Chain concat operator (a || b) || scalar,
         // (arry concat, array append, array prepend) || scalar -> array append
         (
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayConcat),
-                args: _left_args,
-            }),
-            _scalar,
-        )
-        | (
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayAppend),
+                func_def: ScalarFunctionDefinition::UDF(left_fun),
                 args: _left_args,
             }),
             _scalar,
-        )
-        | (
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayPrepend),
-                args: _left_args,
-            }),
-            _scalar,
-        ) => Some(BuiltinScalarFunction::ArrayAppend),
+        ) if ["array_append", "array_prepend", "array_concat"]
+            .contains(&left_fun.name()) =>
+        {
+            Some(array_append(left.clone(), right.clone()))
+        }
         // array || array -> array concat
         (
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(left_fun),
                 args: _left_args,
             }),
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(right_fun),
                 args: _right_args,
             }),
-        ) => Some(BuiltinScalarFunction::ArrayConcat),
+        ) if left_fun.name() == "make_array" && right_fun.name() == "make_array" => {
+            Some(array_concat(vec![left.clone(), right.clone()]))
+        }
         // array || scalar -> array append
         (
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(left_fun),
                 args: _left_args,
             }),
             _right_scalar,
-        ) => Some(BuiltinScalarFunction::ArrayAppend),
+        ) if left_fun.name() == "make_array" => {
+            Some(array_append(left.clone(), right.clone()))
+        }
         // scalar || array -> array prepend
         (
             _left_scalar,
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::MakeArray),
+                func_def: ScalarFunctionDefinition::UDF(right_fun),
                 args: _right_args,
             }),
-        ) => Some(BuiltinScalarFunction::ArrayPrepend),
+        ) if right_fun.name() == "make_array" => {
+            Some(array_prepend(left.clone(), right.clone()))
+        }
 
         _ => None,
     }
@@ -337,12 +306,13 @@ fn rewrite_array_concat_operator_to_func(
 /// 1) (arry concat, array append, array prepend) || column -> (array append, array concat)
 ///
 /// 2) column1 || column2 -> (array prepend, array append, array concat)
+#[cfg(feature = "array_expressions")]
 fn rewrite_array_concat_operator_to_func_for_column(
     left: &Expr,
     op: Operator,
     right: &Expr,
     schema: &DFSchema,
-) -> Result<Option<BuiltinScalarFunction>> {
+) -> Result<Option<Expr>> {
     if op != Operator::StringConcat {
         return Ok(None);
     }
@@ -352,33 +322,18 @@ fn rewrite_array_concat_operator_to_func_for_column(
         // 1) array_prepend/append/concat || column
         (
             Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayPrepend),
-                args: _left_args,
-            }),
-            Expr::Column(c),
-        )
-        | (
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayAppend),
+                func_def: ScalarFunctionDefinition::UDF(left_fun),
                 args: _left_args,
             }),
             Expr::Column(c),
-        )
-        | (
-            Expr::ScalarFunction(ScalarFunction {
-                func_def:
-                    ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::ArrayConcat),
-                args: _left_args,
-            }),
-            Expr::Column(c),
-        ) => {
+        ) if ["array_append", "array_prepend", "array_concat"]
+            .contains(&left_fun.name()) =>
+        {
             let d = schema.field_from_column(c)?.data_type();
             let ndim = list_ndims(d);
             match ndim {
-                0 => Ok(Some(BuiltinScalarFunction::ArrayAppend)),
-                _ => Ok(Some(BuiltinScalarFunction::ArrayConcat)),
+                0 => Ok(Some(array_append(left.clone(), right.clone()))),
+                _ => Ok(Some(array_concat(vec![left.clone(), right.clone()]))),
             }
         }
         // 2) select column1 || column2
@@ -388,9 +343,9 @@ fn rewrite_array_concat_operator_to_func_for_column(
             let ndim1 = list_ndims(d1);
             let ndim2 = list_ndims(d2);
             match (ndim1, ndim2) {
-                (0, _) => Ok(Some(BuiltinScalarFunction::ArrayPrepend)),
-                (_, 0) => Ok(Some(BuiltinScalarFunction::ArrayAppend)),
-                _ => Ok(Some(BuiltinScalarFunction::ArrayConcat)),
+                (0, _) => Ok(Some(array_prepend(left.clone(), right.clone()))),
+                (_, 0) => Ok(Some(array_append(left.clone(), right.clone()))),
+                _ => Ok(Some(array_concat(vec![left.clone(), right.clone()]))),
             }
         }
         _ => Ok(None),
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 496def95e1bc..fabeba439370 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -46,8 +46,8 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not,
-    type_coercion, AggregateFunction, BuiltinScalarFunction, Expr, ExprSchemable,
-    LogicalPlan, Operator, Projection, ScalarFunctionDefinition, Signature, WindowFrame,
+    type_coercion, AggregateFunction, Expr, ExprSchemable, LogicalPlan, Operator,
+    Projection, ScalarFunctionDefinition, ScalarUDF, Signature, WindowFrame,
     WindowFrameBound, WindowFrameUnits,
 };
 
@@ -316,11 +316,6 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
                         &self.schema,
                         &fun.signature(),
                     )?;
-                    let new_args = coerce_arguments_for_fun(
-                        new_args.as_slice(),
-                        &self.schema,
-                        &fun,
-                    )?;
                     Ok(Transformed::yes(Expr::ScalarFunction(ScalarFunction::new(
                         fun, new_args,
                     ))))
@@ -331,6 +326,11 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
                         &self.schema,
                         fun.signature(),
                     )?;
+                    let new_expr = coerce_arguments_for_fun(
+                        new_expr.as_slice(),
+                        &self.schema,
+                        &fun,
+                    )?;
                     Ok(Transformed::yes(Expr::ScalarFunction(
                         ScalarFunction::new_udf(fun, new_expr),
                     )))
@@ -583,7 +583,7 @@ fn coerce_arguments_for_signature(
 fn coerce_arguments_for_fun(
     expressions: &[Expr],
     schema: &DFSchema,
-    fun: &BuiltinScalarFunction,
+    fun: &Arc<ScalarUDF>,
 ) -> Result<Vec<Expr>> {
     if expressions.is_empty() {
         return Ok(vec![]);
@@ -591,7 +591,7 @@ fn coerce_arguments_for_fun(
     let mut expressions: Vec<Expr> = expressions.to_vec();
 
     // Cast Fixedsizelist to List for array functions
-    if *fun == BuiltinScalarFunction::MakeArray {
+    if fun.name() == "make_array" {
         expressions = expressions
             .into_iter()
             .map(|expr| {
@@ -776,6 +776,7 @@ mod test {
         LogicalPlan, Operator, ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF,
         Subquery, Volatility,
     };
+    use datafusion_functions_array::expr_fn::make_array;
     use datafusion_physical_expr::expressions::AvgAccumulator;
 
     fn empty() -> Arc<LogicalPlan> {
@@ -1266,10 +1267,7 @@ mod test {
                 None,
             ),
         )));
-        let expr = Expr::ScalarFunction(ScalarFunction::new(
-            BuiltinScalarFunction::MakeArray,
-            vec![val.clone()],
-        ));
+        let expr = make_array(vec![val.clone()]);
         let schema = Arc::new(DFSchema::new_with_metadata(
             vec![DFField::new_unqualified(
                 "item",
@@ -1298,10 +1296,7 @@ mod test {
             &schema,
         )?;
 
-        let expected = Expr::ScalarFunction(ScalarFunction::new(
-            BuiltinScalarFunction::MakeArray,
-            vec![expected_casted_expr],
-        ));
+        let expected = make_array(vec![expected_casted_expr]);
 
         assert_eq!(result, expected);
         Ok(())
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 30c184a28e33..7b8eccad5133 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -33,9 +33,7 @@ use datafusion_common::{
     DataFusionError, Result,
 };
 use datafusion_expr::expr::Alias;
-use datafusion_expr::logical_plan::{
-    Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
-};
+use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection, Window};
 use datafusion_expr::{col, Expr, ExprSchemable};
 
 /// A map from expression's identifier to tuple including
@@ -44,13 +42,13 @@ use datafusion_expr::{col, Expr, ExprSchemable};
 /// - DataType of this expression.
 type ExprSet = HashMap<Identifier, (Expr, usize, DataType)>;
 
-/// Identifier type. Current implementation use describe of a expression (type String) as
+/// Identifier type. Current implementation use describe of an expression (type String) as
 /// Identifier.
 ///
-/// A Identifier should (ideally) be able to "hash", "accumulate", "equal" and "have no
+/// An identifier should (ideally) be able to "hash", "accumulate", "equal" and "have no
 /// collision (as low as possible)"
 ///
-/// Since a identifier is likely to be copied many times, it is better that a identifier
+/// Since an identifier is likely to be copied many times, it is better that an identifier
 /// is small or "copy". otherwise some kinds of reference count is needed. String description
 /// here is not such a good choose.
 type Identifier = String;
@@ -108,61 +106,6 @@ impl CommonSubexprEliminate {
         Ok((rewrite_exprs, new_input))
     }
 
-    fn try_optimize_projection(
-        &self,
-        projection: &Projection,
-        config: &dyn OptimizerConfig,
-    ) -> Result<LogicalPlan> {
-        let Projection { expr, input, .. } = projection;
-        let input_schema = Arc::clone(input.schema());
-        let mut expr_set = ExprSet::new();
-
-        // Visit expr list and build expr identifier to occuring count map (`expr_set`).
-        let arrays = to_arrays(expr, input_schema, &mut expr_set, ExprMask::Normal)?;
-
-        let (mut new_expr, new_input) =
-            self.rewrite_expr(&[expr], &[&arrays], input, &expr_set, config)?;
-
-        // Since projection expr changes, schema changes also. Use try_new method.
-        Projection::try_new(pop_expr(&mut new_expr)?, Arc::new(new_input))
-            .map(LogicalPlan::Projection)
-    }
-
-    fn try_optimize_filter(
-        &self,
-        filter: &Filter,
-        config: &dyn OptimizerConfig,
-    ) -> Result<LogicalPlan> {
-        let mut expr_set = ExprSet::new();
-        let predicate = &filter.predicate;
-        let input_schema = Arc::clone(filter.input.schema());
-        let mut id_array = vec![];
-        expr_to_identifier(
-            predicate,
-            &mut expr_set,
-            &mut id_array,
-            input_schema,
-            ExprMask::Normal,
-        )?;
-
-        let (mut new_expr, new_input) = self.rewrite_expr(
-            &[&[predicate.clone()]],
-            &[&[id_array]],
-            &filter.input,
-            &expr_set,
-            config,
-        )?;
-
-        if let Some(predicate) = pop_expr(&mut new_expr)?.pop() {
-            Ok(LogicalPlan::Filter(Filter::try_new(
-                predicate,
-                Arc::new(new_input),
-            )?))
-        } else {
-            internal_err!("Failed to pop predicate expr")
-        }
-    }
-
     fn try_optimize_window(
         &self,
         window: &Window,
@@ -354,25 +297,24 @@ impl CommonSubexprEliminate {
         }
     }
 
-    fn try_optimize_sort(
+    fn try_unary_plan(
         &self,
-        sort: &Sort,
+        plan: &LogicalPlan,
         config: &dyn OptimizerConfig,
     ) -> Result<LogicalPlan> {
-        let Sort { expr, input, fetch } = sort;
+        let expr = plan.expressions();
+        let inputs = plan.inputs();
+        let input = inputs[0];
+        let input_schema = Arc::clone(input.schema());
         let mut expr_set = ExprSet::new();
 
-        let input_schema = Arc::clone(input.schema());
-        let arrays = to_arrays(expr, input_schema, &mut expr_set, ExprMask::Normal)?;
+        // Visit expr list and build expr identifier to occuring count map (`expr_set`).
+        let arrays = to_arrays(&expr, input_schema, &mut expr_set, ExprMask::Normal)?;
 
         let (mut new_expr, new_input) =
-            self.rewrite_expr(&[expr], &[&arrays], input, &expr_set, config)?;
+            self.rewrite_expr(&[&expr], &[&arrays], input, &expr_set, config)?;
 
-        Ok(LogicalPlan::Sort(Sort {
-            expr: pop_expr(&mut new_expr)?,
-            input: Arc::new(new_input),
-            fetch: *fetch,
-        }))
+        plan.with_new_exprs(pop_expr(&mut new_expr)?, vec![new_input])
     }
 }
 
@@ -383,19 +325,15 @@ impl OptimizerRule for CommonSubexprEliminate {
         config: &dyn OptimizerConfig,
     ) -> Result<Option<LogicalPlan>> {
         let optimized_plan = match plan {
-            LogicalPlan::Projection(projection) => {
-                Some(self.try_optimize_projection(projection, config)?)
-            }
-            LogicalPlan::Filter(filter) => {
-                Some(self.try_optimize_filter(filter, config)?)
-            }
+            LogicalPlan::Projection(_)
+            | LogicalPlan::Sort(_)
+            | LogicalPlan::Filter(_) => Some(self.try_unary_plan(plan, config)?),
             LogicalPlan::Window(window) => {
                 Some(self.try_optimize_window(window, config)?)
             }
             LogicalPlan::Aggregate(aggregate) => {
                 Some(self.try_optimize_aggregate(aggregate, config)?)
             }
-            LogicalPlan::Sort(sort) => Some(self.try_optimize_sort(sort, config)?),
             LogicalPlan::Join(_)
             | LogicalPlan::CrossJoin(_)
             | LogicalPlan::Repartition(_)
@@ -1321,7 +1259,8 @@ mod test {
             .build()?;
 
         let expected = "Projection: test.a, test.b, test.c\
-        \n  Filter: Int32(1) + test.atest.aInt32(1) AS Int32(1) + test.a - Int32(10) > Int32(1) + test.atest.aInt32(1) AS Int32(1) + test.a\n    Projection: Int32(1) + test.a AS Int32(1) + test.atest.aInt32(1), test.a, test.b, test.c\
+        \n  Filter: Int32(1) + test.atest.aInt32(1) - Int32(10) > Int32(1) + test.atest.aInt32(1)\
+        \n    Projection: Int32(1) + test.a AS Int32(1) + test.atest.aInt32(1), test.a, test.b, test.c\
         \n      TableScan: test";
 
         assert_optimized_plan_eq(expected, &plan);
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index a63133c5166f..e93e171e0324 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -859,6 +859,13 @@ impl OptimizerRule for PushDownFilter {
                 let results = scan
                     .source
                     .supports_filters_pushdown(filter_predicates.as_slice())?;
+                if filter_predicates.len() != results.len() {
+                    return internal_err!(
+                        "Vec returned length: {} from supports_filters_pushdown is not the same size as the filters passed, which length is: {}",
+                        results.len(),
+                        filter_predicates.len());
+                }
+
                 let zip = filter_predicates.iter().zip(results);
 
                 let new_scan_filters = zip
diff --git a/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs b/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
index b3de7b0b4d36..3dbf1679e230 100644
--- a/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
+++ b/datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
@@ -30,8 +30,8 @@ use arrow::{
 use arrow_array::RecordBatch;
 use arrow_schema::Schema;
 use datafusion_common::{
-    downcast_value, exec_err, internal_err, not_impl_err, plan_err, DataFusionError,
-    Result, ScalarValue,
+    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
+    ScalarValue,
 };
 use datafusion_expr::{Accumulator, ColumnarValue};
 use std::{any::Any, iter, sync::Arc};
@@ -391,7 +391,7 @@ impl Accumulator for ApproxPercentileAccumulator {
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
         if self.digest.count() == 0.0 {
-            return exec_err!("aggregate function needs at least one non-null element");
+            return ScalarValue::try_from(self.return_type.clone());
         }
         let q = self.digest.estimate_quantile(self.percentile);
 
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index 5be72b0559d3..c846674e752f 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -28,15 +28,14 @@ use arrow::datatypes::{DataType, Field, UInt64Type};
 use arrow::row::{RowConverter, SortField};
 use arrow_buffer::{ArrowNativeType, NullBuffer};
 
-use arrow_schema::{FieldRef, SortOptions};
+use arrow_schema::FieldRef;
 use datafusion_common::cast::{
-    as_generic_list_array, as_generic_string_array, as_int64_array, as_large_list_array,
-    as_list_array, as_string_array,
+    as_generic_list_array, as_int64_array, as_large_list_array, as_list_array,
 };
-use datafusion_common::utils::{array_into_list_array, list_ndims};
+use datafusion_common::utils::array_into_list_array;
 use datafusion_common::{
-    exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err,
-    DataFusionError, Result, ScalarValue,
+    exec_err, internal_datafusion_err, internal_err, plan_err, DataFusionError, Result,
+    ScalarValue,
 };
 use itertools::Itertools;
 
@@ -746,484 +745,6 @@ pub fn array_pop_back(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-/// Appends or prepends elements to a ListArray.
-///
-/// This function takes a ListArray, an ArrayRef, a FieldRef, and a boolean flag
-/// indicating whether to append or prepend the elements. It returns a `Result<ArrayRef>`
-/// representing the resulting ListArray after the operation.
-///
-/// # Arguments
-///
-/// * `list_array` - A reference to the ListArray to which elements will be appended/prepended.
-/// * `element_array` - A reference to the Array containing elements to be appended/prepended.
-/// * `field` - A reference to the Field describing the data type of the arrays.
-/// * `is_append` - A boolean flag indicating whether to append (`true`) or prepend (`false`) elements.
-///
-/// # Examples
-///
-/// generic_append_and_prepend(
-///     [1, 2, 3], 4, append => [1, 2, 3, 4]
-///     5, [6, 7, 8], prepend => [5, 6, 7, 8]
-/// )
-fn generic_append_and_prepend<O: OffsetSizeTrait>(
-    list_array: &GenericListArray<O>,
-    element_array: &ArrayRef,
-    data_type: &DataType,
-    is_append: bool,
-) -> Result<ArrayRef>
-where
-    i64: TryInto<O>,
-{
-    let mut offsets = vec![O::usize_as(0)];
-    let values = list_array.values();
-    let original_data = values.to_data();
-    let element_data = element_array.to_data();
-    let capacity = Capacities::Array(original_data.len() + element_data.len());
-
-    let mut mutable = MutableArrayData::with_capacities(
-        vec![&original_data, &element_data],
-        false,
-        capacity,
-    );
-
-    let values_index = 0;
-    let element_index = 1;
-
-    for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() {
-        let start = offset_window[0].to_usize().unwrap();
-        let end = offset_window[1].to_usize().unwrap();
-        if is_append {
-            mutable.extend(values_index, start, end);
-            mutable.extend(element_index, row_index, row_index + 1);
-        } else {
-            mutable.extend(element_index, row_index, row_index + 1);
-            mutable.extend(values_index, start, end);
-        }
-        offsets.push(offsets[row_index] + O::usize_as(end - start + 1));
-    }
-
-    let data = mutable.freeze();
-
-    Ok(Arc::new(GenericListArray::<O>::try_new(
-        Arc::new(Field::new("item", data_type.to_owned(), true)),
-        OffsetBuffer::new(offsets.into()),
-        arrow_array::make_array(data),
-        None,
-    )?))
-}
-
-/// Array_sort SQL function
-pub fn array_sort(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.is_empty() || args.len() > 3 {
-        return exec_err!("array_sort expects one to three arguments");
-    }
-
-    let sort_option = match args.len() {
-        1 => None,
-        2 => {
-            let sort = as_string_array(&args[1])?.value(0);
-            Some(SortOptions {
-                descending: order_desc(sort)?,
-                nulls_first: true,
-            })
-        }
-        3 => {
-            let sort = as_string_array(&args[1])?.value(0);
-            let nulls_first = as_string_array(&args[2])?.value(0);
-            Some(SortOptions {
-                descending: order_desc(sort)?,
-                nulls_first: order_nulls_first(nulls_first)?,
-            })
-        }
-        _ => return exec_err!("array_sort expects 1 to 3 arguments"),
-    };
-
-    let list_array = as_list_array(&args[0])?;
-    let row_count = list_array.len();
-
-    let mut array_lengths = vec![];
-    let mut arrays = vec![];
-    let mut valid = BooleanBufferBuilder::new(row_count);
-    for i in 0..row_count {
-        if list_array.is_null(i) {
-            array_lengths.push(0);
-            valid.append(false);
-        } else {
-            let arr_ref = list_array.value(i);
-            let arr_ref = arr_ref.as_ref();
-
-            let sorted_array = compute::sort(arr_ref, sort_option)?;
-            array_lengths.push(sorted_array.len());
-            arrays.push(sorted_array);
-            valid.append(true);
-        }
-    }
-
-    // Assume all arrays have the same data type
-    let data_type = list_array.value_type();
-    let buffer = valid.finish();
-
-    let elements = arrays
-        .iter()
-        .map(|a| a.as_ref())
-        .collect::<Vec<&dyn Array>>();
-
-    let list_arr = ListArray::new(
-        Arc::new(Field::new("item", data_type, true)),
-        OffsetBuffer::from_lengths(array_lengths),
-        Arc::new(compute::concat(elements.as_slice())?),
-        Some(NullBuffer::new(buffer)),
-    );
-    Ok(Arc::new(list_arr))
-}
-
-fn order_desc(modifier: &str) -> Result<bool> {
-    match modifier.to_uppercase().as_str() {
-        "DESC" => Ok(true),
-        "ASC" => Ok(false),
-        _ => exec_err!("the second parameter of array_sort expects DESC or ASC"),
-    }
-}
-
-fn order_nulls_first(modifier: &str) -> Result<bool> {
-    match modifier.to_uppercase().as_str() {
-        "NULLS FIRST" => Ok(true),
-        "NULLS LAST" => Ok(false),
-        _ => exec_err!(
-            "the third parameter of array_sort expects NULLS FIRST or NULLS LAST"
-        ),
-    }
-}
-
-fn general_append_and_prepend<O: OffsetSizeTrait>(
-    args: &[ArrayRef],
-    is_append: bool,
-) -> Result<ArrayRef>
-where
-    i64: TryInto<O>,
-{
-    let (list_array, element_array) = if is_append {
-        let list_array = as_generic_list_array::<O>(&args[0])?;
-        let element_array = &args[1];
-        check_datatypes("array_append", &[element_array, list_array.values()])?;
-        (list_array, element_array)
-    } else {
-        let list_array = as_generic_list_array::<O>(&args[1])?;
-        let element_array = &args[0];
-        check_datatypes("array_prepend", &[list_array.values(), element_array])?;
-        (list_array, element_array)
-    };
-
-    let res = match list_array.value_type() {
-        DataType::List(_) => concat_internal::<i32>(args)?,
-        DataType::LargeList(_) => concat_internal::<i64>(args)?,
-        data_type => {
-            return generic_append_and_prepend::<O>(
-                list_array,
-                element_array,
-                &data_type,
-                is_append,
-            );
-        }
-    };
-
-    Ok(res)
-}
-
-/// Array_append SQL function
-pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 2 {
-        return exec_err!("array_append expects two arguments");
-    }
-
-    match args[0].data_type() {
-        DataType::LargeList(_) => general_append_and_prepend::<i64>(args, true),
-        _ => general_append_and_prepend::<i32>(args, true),
-    }
-}
-
-/// Array_prepend SQL function
-pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 2 {
-        return exec_err!("array_prepend expects two arguments");
-    }
-
-    match args[1].data_type() {
-        DataType::LargeList(_) => general_append_and_prepend::<i64>(args, false),
-        _ => general_append_and_prepend::<i32>(args, false),
-    }
-}
-
-fn align_array_dimensions<O: OffsetSizeTrait>(
-    args: Vec<ArrayRef>,
-) -> Result<Vec<ArrayRef>> {
-    let args_ndim = args
-        .iter()
-        .map(|arg| datafusion_common::utils::list_ndims(arg.data_type()))
-        .collect::<Vec<_>>();
-    let max_ndim = args_ndim.iter().max().unwrap_or(&0);
-
-    // Align the dimensions of the arrays
-    let aligned_args: Result<Vec<ArrayRef>> = args
-        .into_iter()
-        .zip(args_ndim.iter())
-        .map(|(array, ndim)| {
-            if ndim < max_ndim {
-                let mut aligned_array = array.clone();
-                for _ in 0..(max_ndim - ndim) {
-                    let data_type = aligned_array.data_type().to_owned();
-                    let array_lengths = vec![1; aligned_array.len()];
-                    let offsets = OffsetBuffer::<O>::from_lengths(array_lengths);
-
-                    aligned_array = Arc::new(GenericListArray::<O>::try_new(
-                        Arc::new(Field::new("item", data_type, true)),
-                        offsets,
-                        aligned_array,
-                        None,
-                    )?)
-                }
-                Ok(aligned_array)
-            } else {
-                Ok(array.clone())
-            }
-        })
-        .collect();
-
-    aligned_args
-}
-
-// Concatenate arrays on the same row.
-fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let args = align_array_dimensions::<O>(args.to_vec())?;
-
-    let list_arrays = args
-        .iter()
-        .map(|arg| as_generic_list_array::<O>(arg))
-        .collect::<Result<Vec<_>>>()?;
-    // Assume number of rows is the same for all arrays
-    let row_count = list_arrays[0].len();
-
-    let mut array_lengths = vec![];
-    let mut arrays = vec![];
-    let mut valid = BooleanBufferBuilder::new(row_count);
-    for i in 0..row_count {
-        let nulls = list_arrays
-            .iter()
-            .map(|arr| arr.is_null(i))
-            .collect::<Vec<_>>();
-
-        // If all the arrays are null, the concatenated array is null
-        let is_null = nulls.iter().all(|&x| x);
-        if is_null {
-            array_lengths.push(0);
-            valid.append(false);
-        } else {
-            // Get all the arrays on i-th row
-            let values = list_arrays
-                .iter()
-                .map(|arr| arr.value(i))
-                .collect::<Vec<_>>();
-
-            let elements = values
-                .iter()
-                .map(|a| a.as_ref())
-                .collect::<Vec<&dyn Array>>();
-
-            // Concatenated array on i-th row
-            let concated_array = compute::concat(elements.as_slice())?;
-            array_lengths.push(concated_array.len());
-            arrays.push(concated_array);
-            valid.append(true);
-        }
-    }
-    // Assume all arrays have the same data type
-    let data_type = list_arrays[0].value_type();
-    let buffer = valid.finish();
-
-    let elements = arrays
-        .iter()
-        .map(|a| a.as_ref())
-        .collect::<Vec<&dyn Array>>();
-
-    let list_arr = GenericListArray::<O>::new(
-        Arc::new(Field::new("item", data_type, true)),
-        OffsetBuffer::from_lengths(array_lengths),
-        Arc::new(compute::concat(elements.as_slice())?),
-        Some(NullBuffer::new(buffer)),
-    );
-
-    Ok(Arc::new(list_arr))
-}
-
-/// Array_concat/Array_cat SQL function
-pub fn array_concat(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.is_empty() {
-        return exec_err!("array_concat expects at least one arguments");
-    }
-
-    let mut new_args = vec![];
-    for arg in args {
-        let ndim = list_ndims(arg.data_type());
-        let base_type = datafusion_common::utils::base_type(arg.data_type());
-        if ndim == 0 {
-            return not_impl_err!("Array is not type '{base_type:?}'.");
-        } else if !base_type.eq(&DataType::Null) {
-            new_args.push(arg.clone());
-        }
-    }
-
-    match &args[0].data_type() {
-        DataType::LargeList(_) => concat_internal::<i64>(new_args.as_slice()),
-        _ => concat_internal::<i32>(new_args.as_slice()),
-    }
-}
-
-/// Array_repeat SQL function
-pub fn array_repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 2 {
-        return exec_err!("array_repeat expects two arguments");
-    }
-
-    let element = &args[0];
-    let count_array = as_int64_array(&args[1])?;
-
-    match element.data_type() {
-        DataType::List(_) => {
-            let list_array = as_list_array(element)?;
-            general_list_repeat::<i32>(list_array, count_array)
-        }
-        DataType::LargeList(_) => {
-            let list_array = as_large_list_array(element)?;
-            general_list_repeat::<i64>(list_array, count_array)
-        }
-        _ => general_repeat::<i32>(element, count_array),
-    }
-}
-
-/// For each element of `array[i]` repeat `count_array[i]` times.
-///
-/// Assumption for the input:
-///     1. `count[i] >= 0`
-///     2. `array.len() == count_array.len()`
-///
-/// For example,
-/// ```text
-/// array_repeat(
-///     [1, 2, 3], [2, 0, 1] => [[1, 1], [], [3]]
-/// )
-/// ```
-fn general_repeat<O: OffsetSizeTrait>(
-    array: &ArrayRef,
-    count_array: &Int64Array,
-) -> Result<ArrayRef> {
-    let data_type = array.data_type();
-    let mut new_values = vec![];
-
-    let count_vec = count_array
-        .values()
-        .to_vec()
-        .iter()
-        .map(|x| *x as usize)
-        .collect::<Vec<_>>();
-
-    for (row_index, &count) in count_vec.iter().enumerate() {
-        let repeated_array = if array.is_null(row_index) {
-            new_null_array(data_type, count)
-        } else {
-            let original_data = array.to_data();
-            let capacity = Capacities::Array(count);
-            let mut mutable =
-                MutableArrayData::with_capacities(vec![&original_data], false, capacity);
-
-            for _ in 0..count {
-                mutable.extend(0, row_index, row_index + 1);
-            }
-
-            let data = mutable.freeze();
-            arrow_array::make_array(data)
-        };
-        new_values.push(repeated_array);
-    }
-
-    let new_values: Vec<_> = new_values.iter().map(|a| a.as_ref()).collect();
-    let values = compute::concat(&new_values)?;
-
-    Ok(Arc::new(GenericListArray::<O>::try_new(
-        Arc::new(Field::new("item", data_type.to_owned(), true)),
-        OffsetBuffer::from_lengths(count_vec),
-        values,
-        None,
-    )?))
-}
-
-/// Handle List version of `general_repeat`
-///
-/// For each element of `list_array[i]` repeat `count_array[i]` times.
-///
-/// For example,
-/// ```text
-/// array_repeat(
-///     [[1, 2, 3], [4, 5], [6]], [2, 0, 1] => [[[1, 2, 3], [1, 2, 3]], [], [[6]]]
-/// )
-/// ```
-fn general_list_repeat<O: OffsetSizeTrait>(
-    list_array: &GenericListArray<O>,
-    count_array: &Int64Array,
-) -> Result<ArrayRef> {
-    let data_type = list_array.data_type();
-    let value_type = list_array.value_type();
-    let mut new_values = vec![];
-
-    let count_vec = count_array
-        .values()
-        .to_vec()
-        .iter()
-        .map(|x| *x as usize)
-        .collect::<Vec<_>>();
-
-    for (list_array_row, &count) in list_array.iter().zip(count_vec.iter()) {
-        let list_arr = match list_array_row {
-            Some(list_array_row) => {
-                let original_data = list_array_row.to_data();
-                let capacity = Capacities::Array(original_data.len() * count);
-                let mut mutable = MutableArrayData::with_capacities(
-                    vec![&original_data],
-                    false,
-                    capacity,
-                );
-
-                for _ in 0..count {
-                    mutable.extend(0, 0, original_data.len());
-                }
-
-                let data = mutable.freeze();
-                let repeated_array = arrow_array::make_array(data);
-
-                let list_arr = GenericListArray::<O>::try_new(
-                    Arc::new(Field::new("item", value_type.clone(), true)),
-                    OffsetBuffer::<O>::from_lengths(vec![original_data.len(); count]),
-                    repeated_array,
-                    None,
-                )?;
-                Arc::new(list_arr) as ArrayRef
-            }
-            None => new_null_array(data_type, count),
-        };
-        new_values.push(list_arr);
-    }
-
-    let lengths = new_values.iter().map(|a| a.len()).collect::<Vec<_>>();
-    let new_values: Vec<_> = new_values.iter().map(|a| a.as_ref()).collect();
-    let values = compute::concat(&new_values)?;
-
-    Ok(Arc::new(ListArray::try_new(
-        Arc::new(Field::new("item", data_type.to_owned(), true)),
-        OffsetBuffer::<i32>::from_lengths(lengths),
-        values,
-        None,
-    )?))
-}
-
 /// Array_position SQL function
 pub fn array_position(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() < 2 || args.len() > 3 {
@@ -1836,95 +1357,6 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result<ArrayRef> {
     general_set_op(array1, array2, SetOp::Intersect)
 }
 
-/// Splits string at occurrences of delimiter and returns an array of parts
-/// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]'
-pub fn string_to_array<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = as_generic_string_array::<T>(&args[0])?;
-    let delimiter_array = as_generic_string_array::<T>(&args[1])?;
-
-    let mut list_builder = ListBuilder::new(StringBuilder::with_capacity(
-        string_array.len(),
-        string_array.get_buffer_memory_size(),
-    ));
-
-    match args.len() {
-        2 => {
-            string_array.iter().zip(delimiter_array.iter()).for_each(
-                |(string, delimiter)| {
-                    match (string, delimiter) {
-                        (Some(string), Some("")) => {
-                            list_builder.values().append_value(string);
-                            list_builder.append(true);
-                        }
-                        (Some(string), Some(delimiter)) => {
-                            string.split(delimiter).for_each(|s| {
-                                list_builder.values().append_value(s);
-                            });
-                            list_builder.append(true);
-                        }
-                        (Some(string), None) => {
-                            string.chars().map(|c| c.to_string()).for_each(|c| {
-                                list_builder.values().append_value(c);
-                            });
-                            list_builder.append(true);
-                        }
-                        _ => list_builder.append(false), // null value
-                    }
-                },
-            );
-        }
-
-        3 => {
-            let null_value_array = as_generic_string_array::<T>(&args[2])?;
-            string_array
-                .iter()
-                .zip(delimiter_array.iter())
-                .zip(null_value_array.iter())
-                .for_each(|((string, delimiter), null_value)| {
-                    match (string, delimiter) {
-                        (Some(string), Some("")) => {
-                            if Some(string) == null_value {
-                                list_builder.values().append_null();
-                            } else {
-                                list_builder.values().append_value(string);
-                            }
-                            list_builder.append(true);
-                        }
-                        (Some(string), Some(delimiter)) => {
-                            string.split(delimiter).for_each(|s| {
-                                if Some(s) == null_value {
-                                    list_builder.values().append_null();
-                                } else {
-                                    list_builder.values().append_value(s);
-                                }
-                            });
-                            list_builder.append(true);
-                        }
-                        (Some(string), None) => {
-                            string.chars().map(|c| c.to_string()).for_each(|c| {
-                                if Some(c.as_str()) == null_value {
-                                    list_builder.values().append_null();
-                                } else {
-                                    list_builder.values().append_value(c);
-                                }
-                            });
-                            list_builder.append(true);
-                        }
-                        _ => list_builder.append(false), // null value
-                    }
-                });
-        }
-        _ => {
-            return exec_err!(
-                "Expect string_to_array function to take two or three parameters"
-            )
-        }
-    }
-
-    let list_array = list_builder.finish();
-    Ok(Arc::new(list_array) as ArrayRef)
-}
-
 pub fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
     array: &GenericListArray<OffsetSize>,
     field: &FieldRef,
@@ -1961,32 +1393,6 @@ pub fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
     )?))
 }
 
-/// array_distinct SQL function
-/// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
-pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 1 {
-        return exec_err!("array_distinct needs one argument");
-    }
-
-    // handle null
-    if args[0].data_type() == &DataType::Null {
-        return Ok(args[0].clone());
-    }
-
-    // handle for list & largelist
-    match args[0].data_type() {
-        DataType::List(field) => {
-            let array = as_list_array(&args[0])?;
-            general_array_distinct(array, field)
-        }
-        DataType::LargeList(field) => {
-            let array = as_large_list_array(&args[0])?;
-            general_array_distinct(array, field)
-        }
-        array_type => exec_err!("array_distinct does not support type '{array_type:?}'"),
-    }
-}
-
 /// array_resize SQL function
 pub fn array_resize(arg: &[ArrayRef]) -> Result<ArrayRef> {
     if arg.len() < 2 || arg.len() > 3 {
@@ -2149,54 +1555,3 @@ where
         Some(nulls.into()),
     )?))
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::datatypes::Int64Type;
-
-    /// Only test internal functions, array-related sql functions will be tested in sqllogictest `array.slt`
-    #[test]
-    fn test_align_array_dimensions() {
-        let array1d_1 =
-            Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-                Some(vec![Some(1), Some(2), Some(3)]),
-                Some(vec![Some(4), Some(5)]),
-            ]));
-        let array1d_2 =
-            Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-                Some(vec![Some(6), Some(7), Some(8)]),
-            ]));
-
-        let array2d_1 = Arc::new(array_into_list_array(array1d_1.clone())) as ArrayRef;
-        let array2d_2 = Arc::new(array_into_list_array(array1d_2.clone())) as ArrayRef;
-
-        let res = align_array_dimensions::<i32>(vec![
-            array1d_1.to_owned(),
-            array2d_2.to_owned(),
-        ])
-        .unwrap();
-
-        let expected = as_list_array(&array2d_1).unwrap();
-        let expected_dim = datafusion_common::utils::list_ndims(array2d_1.data_type());
-        assert_ne!(as_list_array(&res[0]).unwrap(), expected);
-        assert_eq!(
-            datafusion_common::utils::list_ndims(res[0].data_type()),
-            expected_dim
-        );
-
-        let array3d_1 = Arc::new(array_into_list_array(array2d_1)) as ArrayRef;
-        let array3d_2 = array_into_list_array(array2d_2.to_owned());
-        let res =
-            align_array_dimensions::<i32>(vec![array1d_1, Arc::new(array3d_2.clone())])
-                .unwrap();
-
-        let expected = as_list_array(&array3d_1).unwrap();
-        let expected_dim = datafusion_common::utils::list_ndims(array3d_1.data_type());
-        assert_ne!(as_list_array(&res[0]).unwrap(), expected);
-        assert_eq!(
-            datafusion_common::utils::list_ndims(res[0].data_type()),
-            expected_dim
-        );
-    }
-}
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 84aa0c94a22d..072e4ba47e24 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -33,7 +33,7 @@
 use crate::sort_properties::SortProperties;
 use crate::{
     array_expressions, conditional_expressions, datetime_expressions, math_expressions,
-    string_expressions, struct_expressions, PhysicalExpr, ScalarFunctionExpr,
+    string_expressions, PhysicalExpr, ScalarFunctionExpr,
 };
 use arrow::{
     array::ArrayRef,
@@ -282,8 +282,6 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Sinh => Arc::new(math_expressions::sinh),
         BuiltinScalarFunction::Sqrt => Arc::new(math_expressions::sqrt),
         BuiltinScalarFunction::Cbrt => Arc::new(math_expressions::cbrt),
-        BuiltinScalarFunction::Tan => Arc::new(math_expressions::tan),
-        BuiltinScalarFunction::Tanh => Arc::new(math_expressions::tanh),
         BuiltinScalarFunction::Trunc => {
             Arc::new(|args| make_scalar_function_inner(math_expressions::trunc)(args))
         }
@@ -302,18 +300,6 @@ pub fn create_physical_fun(
         }
 
         // array functions
-        BuiltinScalarFunction::ArrayAppend => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::array_append)(args)
-        }),
-        BuiltinScalarFunction::ArraySort => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::array_sort)(args)
-        }),
-        BuiltinScalarFunction::ArrayConcat => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::array_concat)(args)
-        }),
-        BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::array_distinct)(args)
-        }),
         BuiltinScalarFunction::ArrayElement => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_element)(args)
         }),
@@ -332,12 +318,6 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ArrayPositions => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_positions)(args)
         }),
-        BuiltinScalarFunction::ArrayPrepend => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::array_prepend)(args)
-        }),
-        BuiltinScalarFunction::ArrayRepeat => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::array_repeat)(args)
-        }),
         BuiltinScalarFunction::ArrayRemove => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_remove)(args)
         }),
@@ -368,14 +348,9 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ArrayResize => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_resize)(args)
         }),
-        BuiltinScalarFunction::MakeArray => Arc::new(|args| {
-            make_scalar_function_inner(array_expressions::make_array)(args)
-        }),
         BuiltinScalarFunction::ArrayUnion => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_union)(args)
         }),
-        // struct functions
-        BuiltinScalarFunction::Struct => Arc::new(struct_expressions::struct_expr),
 
         // string functions
         BuiltinScalarFunction::Ascii => Arc::new(|args| match args[0].data_type() {
@@ -593,21 +568,6 @@ pub fn create_physical_fun(
                 exec_err!("Unsupported data type {other:?} for function split_part")
             }
         }),
-        BuiltinScalarFunction::StringToArray => {
-            Arc::new(|args| match args[0].data_type() {
-                DataType::Utf8 => make_scalar_function_inner(
-                    array_expressions::string_to_array::<i32>,
-                )(args),
-                DataType::LargeUtf8 => make_scalar_function_inner(
-                    array_expressions::string_to_array::<i64>,
-                )(args),
-                other => {
-                    exec_err!(
-                        "Unsupported data type {other:?} for function string_to_array"
-                    )
-                }
-            })
-        }
         BuiltinScalarFunction::StartsWith => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
                 make_scalar_function_inner(string_expressions::starts_with::<i32>)(args)
@@ -699,19 +659,6 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
         BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
-        BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| {
-            if args.len() != 1 {
-                return exec_err!(
-                    "arrow_typeof function requires 1 arguments, got {}",
-                    args.len()
-                );
-            }
-
-            let input_data_type = args[0].data_type();
-            Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
-                "{input_data_type}"
-            ))))
-        }),
         BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
                 make_scalar_function_inner(string_expressions::overlay::<i32>)(args)
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index b36e5d79bb44..07bccf25c86a 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -35,7 +35,6 @@ mod scalar_function;
 mod sort_expr;
 pub mod sort_properties;
 pub mod string_expressions;
-pub mod struct_expressions;
 pub mod tree_node;
 pub mod udf;
 #[cfg(feature = "unicode_expressions")]
diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs
index a8c115ba3a82..db8855cb5400 100644
--- a/datafusion/physical-expr/src/math_expressions.rs
+++ b/datafusion/physical-expr/src/math_expressions.rs
@@ -159,10 +159,8 @@ math_unary_function!("sqrt", sqrt);
 math_unary_function!("cbrt", cbrt);
 math_unary_function!("sin", sin);
 math_unary_function!("cos", cos);
-math_unary_function!("tan", tan);
 math_unary_function!("sinh", sinh);
 math_unary_function!("cosh", cosh);
-math_unary_function!("tanh", tanh);
 math_unary_function!("asin", asin);
 math_unary_function!("acos", acos);
 math_unary_function!("atan", atan);
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index bfe0fdb279f5..1c9f0e609c3c 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -153,14 +153,15 @@ impl PhysicalExpr for ScalarFunctionExpr {
                 if scalar_fun
                     .signature()
                     .type_signature
-                    .supports_zero_argument()
-                    && scalar_fun != BuiltinScalarFunction::MakeArray =>
+                    .supports_zero_argument() =>
             {
                 vec![ColumnarValue::create_null_array(batch.num_rows())]
             }
             // If the function supports zero argument, we pass in a null array indicating the batch size.
             // This is for user-defined functions.
-            (true, Err(_)) if self.supports_zero_argument => {
+            (true, Err(_))
+                if self.supports_zero_argument && self.name != "make_array" =>
+            {
                 vec![ColumnarValue::create_null_array(batch.num_rows())]
             }
             _ => self
diff --git a/datafusion/physical-expr/src/window/nth_value.rs b/datafusion/physical-expr/src/window/nth_value.rs
index a7bb31b6e109..5c7c891f92d2 100644
--- a/datafusion/physical-expr/src/window/nth_value.rs
+++ b/datafusion/physical-expr/src/window/nth_value.rs
@@ -42,6 +42,7 @@ pub struct NthValue {
     /// Output data type
     data_type: DataType,
     kind: NthValueKind,
+    ignore_nulls: bool,
 }
 
 impl NthValue {
@@ -50,12 +51,14 @@ impl NthValue {
         name: impl Into<String>,
         expr: Arc<dyn PhysicalExpr>,
         data_type: DataType,
+        ignore_nulls: bool,
     ) -> Self {
         Self {
             name: name.into(),
             expr,
             data_type,
             kind: NthValueKind::First,
+            ignore_nulls,
         }
     }
 
@@ -64,12 +67,14 @@ impl NthValue {
         name: impl Into<String>,
         expr: Arc<dyn PhysicalExpr>,
         data_type: DataType,
+        ignore_nulls: bool,
     ) -> Self {
         Self {
             name: name.into(),
             expr,
             data_type,
             kind: NthValueKind::Last,
+            ignore_nulls,
         }
     }
 
@@ -79,7 +84,11 @@ impl NthValue {
         expr: Arc<dyn PhysicalExpr>,
         data_type: DataType,
         n: u32,
+        ignore_nulls: bool,
     ) -> Result<Self> {
+        if ignore_nulls {
+            return exec_err!("NTH_VALUE ignore_nulls is not supported yet");
+        }
         match n {
             0 => exec_err!("NTH_VALUE expects n to be non-zero"),
             _ => Ok(Self {
@@ -87,6 +96,7 @@ impl NthValue {
                 expr,
                 data_type,
                 kind: NthValueKind::Nth(n as i64),
+                ignore_nulls,
             }),
         }
     }
@@ -122,7 +132,10 @@ impl BuiltInWindowFunctionExpr for NthValue {
             finalized_result: None,
             kind: self.kind,
         };
-        Ok(Box::new(NthValueEvaluator { state }))
+        Ok(Box::new(NthValueEvaluator {
+            state,
+            ignore_nulls: self.ignore_nulls,
+        }))
     }
 
     fn reverse_expr(&self) -> Option<Arc<dyn BuiltInWindowFunctionExpr>> {
@@ -136,6 +149,7 @@ impl BuiltInWindowFunctionExpr for NthValue {
             expr: self.expr.clone(),
             data_type: self.data_type.clone(),
             kind: reversed_kind,
+            ignore_nulls: self.ignore_nulls,
         }))
     }
 }
@@ -144,6 +158,7 @@ impl BuiltInWindowFunctionExpr for NthValue {
 #[derive(Debug)]
 pub(crate) struct NthValueEvaluator {
     state: NthValueState,
+    ignore_nulls: bool,
 }
 
 impl PartitionEvaluator for NthValueEvaluator {
@@ -184,7 +199,8 @@ impl PartitionEvaluator for NthValueEvaluator {
                 }
             }
         };
-        if is_prunable {
+        // Do not memoize results when nulls are ignored.
+        if is_prunable && !self.ignore_nulls {
             if self.state.finalized_result.is_none() && !is_reverse_direction {
                 let result = ScalarValue::try_from_array(out, size - 1)?;
                 self.state.finalized_result = Some(result);
@@ -210,9 +226,39 @@ impl PartitionEvaluator for NthValueEvaluator {
                 // We produce None if the window is empty.
                 return ScalarValue::try_from(arr.data_type());
             }
+
+            // Extract valid indices if ignoring nulls.
+            let (slice, valid_indices) = if self.ignore_nulls {
+                let slice = arr.slice(range.start, n_range);
+                let valid_indices =
+                    slice.nulls().unwrap().valid_indices().collect::<Vec<_>>();
+                if valid_indices.is_empty() {
+                    return ScalarValue::try_from(arr.data_type());
+                }
+                (Some(slice), Some(valid_indices))
+            } else {
+                (None, None)
+            };
             match self.state.kind {
-                NthValueKind::First => ScalarValue::try_from_array(arr, range.start),
-                NthValueKind::Last => ScalarValue::try_from_array(arr, range.end - 1),
+                NthValueKind::First => {
+                    if let Some(slice) = &slice {
+                        let valid_indices = valid_indices.unwrap();
+                        ScalarValue::try_from_array(slice, valid_indices[0])
+                    } else {
+                        ScalarValue::try_from_array(arr, range.start)
+                    }
+                }
+                NthValueKind::Last => {
+                    if let Some(slice) = &slice {
+                        let valid_indices = valid_indices.unwrap();
+                        ScalarValue::try_from_array(
+                            slice,
+                            valid_indices[valid_indices.len() - 1],
+                        )
+                    } else {
+                        ScalarValue::try_from_array(arr, range.end - 1)
+                    }
+                }
                 NthValueKind::Nth(n) => {
                     match n.cmp(&0) {
                         Ordering::Greater => {
@@ -295,6 +341,7 @@ mod tests {
             "first_value".to_owned(),
             Arc::new(Column::new("arr", 0)),
             DataType::Int32,
+            false,
         );
         test_i32_result(first_value, Int32Array::from(vec![1; 8]))?;
         Ok(())
@@ -306,6 +353,7 @@ mod tests {
             "last_value".to_owned(),
             Arc::new(Column::new("arr", 0)),
             DataType::Int32,
+            false,
         );
         test_i32_result(
             last_value,
@@ -330,6 +378,7 @@ mod tests {
             Arc::new(Column::new("arr", 0)),
             DataType::Int32,
             1,
+            false,
         )?;
         test_i32_result(nth_value, Int32Array::from(vec![1; 8]))?;
         Ok(())
@@ -342,6 +391,7 @@ mod tests {
             Arc::new(Column::new("arr", 0)),
             DataType::Int32,
             2,
+            false,
         )?;
         test_i32_result(
             nth_value,
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 4cba571054de..0349f8f1eeec 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -1179,15 +1179,19 @@ mod tests {
         .map(|e| Arc::new(e) as Arc<dyn ExecutionPlan>)?;
         let col_a = col("a", &schema)?;
         let nth_value_func1 =
-            NthValue::nth("nth_value(-1)", col_a.clone(), DataType::Int32, 1)?
+            NthValue::nth("nth_value(-1)", col_a.clone(), DataType::Int32, 1, false)?
                 .reverse_expr()
                 .unwrap();
         let nth_value_func2 =
-            NthValue::nth("nth_value(-2)", col_a.clone(), DataType::Int32, 2)?
+            NthValue::nth("nth_value(-2)", col_a.clone(), DataType::Int32, 2, false)?
                 .reverse_expr()
                 .unwrap();
-        let last_value_func =
-            Arc::new(NthValue::last("last", col_a.clone(), DataType::Int32)) as _;
+        let last_value_func = Arc::new(NthValue::last(
+            "last",
+            col_a.clone(),
+            DataType::Int32,
+            false,
+        )) as _;
         let window_exprs = vec![
             // LAST_VALUE(a)
             Arc::new(BuiltInWindowExpr::new(
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index f91b525d6090..6712bc855ffd 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -250,15 +250,21 @@ fn create_built_in_window_expr(
                 .try_into()
                 .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?;
             let n: u32 = n as u32;
-            Arc::new(NthValue::nth(name, arg, data_type.clone(), n)?)
+            Arc::new(NthValue::nth(
+                name,
+                arg,
+                data_type.clone(),
+                n,
+                ignore_nulls,
+            )?)
         }
         BuiltInWindowFunction::FirstValue => {
             let arg = args[0].clone();
-            Arc::new(NthValue::first(name, arg, data_type.clone()))
+            Arc::new(NthValue::first(name, arg, data_type.clone(), ignore_nulls))
         }
         BuiltInWindowFunction::LastValue => {
             let arg = args[0].clone();
-            Arc::new(NthValue::last(name, arg, data_type.clone()))
+            Arc::new(NthValue::last(name, arg, data_type.clone(), ignore_nulls))
         }
     })
 }
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index dfdf6bd68276..e6ee41fadb9f 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -90,17 +90,13 @@ message ProjectionColumns {
 }
 
 message CsvFormat {
-  bool has_header = 1;
-  string delimiter = 2;
-  string quote = 3;
-  oneof optional_escape {
-    string escape = 4;
-  }
+  CsvOptions options = 5;
 }
 
 message ParquetFormat {
   // Used to be bool enable_pruning = 1;
   reserved 1;
+  TableParquetOptions options = 2;
 }
 
 message AvroFormat {}
@@ -324,22 +320,18 @@ message DistinctOnNode {
 message CopyToNode {
     LogicalPlanNode input = 1;
     string output_url = 2;
-    oneof CopyOptions {
-        SQLOptions sql_options = 4;
-        FileTypeWriterOptions writer_options = 5;
+    oneof format_options {
+      CsvOptions csv = 8;
+      JsonOptions json = 9;
+      TableParquetOptions parquet = 10;
+      AvroOptions avro = 11;
+      ArrowOptions arrow = 12;
     }
-    string file_type = 6;
     repeated string partition_by = 7;
 }
 
-message SQLOptions {
-  repeated SQLOption option = 1;
-}
-
-message SQLOption {
-    string key = 1;
-    string value = 2;
-}
+message AvroOptions {}
+message ArrowOptions {}
 
 message UnionNode {
   repeated LogicalPlanNode inputs = 1;
@@ -567,9 +559,9 @@ enum ScalarFunction {
   Signum = 15;
   Sin = 16;
   Sqrt = 17;
-  Tan = 18;
+  // Tan = 18;
   Trunc = 19;
-  Array = 20;
+  // 20 was Array
   // RegexpMatch = 21;
   BitLength = 22;
   Btrim = 23;
@@ -614,11 +606,11 @@ enum ScalarFunction {
   Upper = 62;
   Coalesce = 63;
   Power = 64;
-  StructFun = 65;
+  // 65 was StructFun
   // 66 was FromUnixtime
   Atan2 = 67;
   // 68 was DateBin
-  ArrowTypeof = 69;
+  // 69 was ArrowTypeof
   // 70 was CurrentDate
   // 71 was CurrentTime
   Uuid = 72;
@@ -628,22 +620,22 @@ enum ScalarFunction {
   Atanh = 76;
   Sinh = 77;
   Cosh = 78;
-  Tanh = 79;
+  // Tanh = 79;
   Pi = 80;
   Degrees = 81;
   Radians = 82;
   Factorial = 83;
   Lcm = 84;
   Gcd = 85;
-  ArrayAppend = 86;
-  ArrayConcat = 87;
+  // 86 was ArrayAppend
+  // 87 was ArrayConcat
   // 88 was ArrayDims
-  ArrayRepeat = 89;
+  // 89 was ArrayRepeat
   // 90 was ArrayLength
   // 91 was ArrayNdims
   ArrayPosition = 92;
   ArrayPositions = 93;
-  ArrayPrepend = 94;
+  // 94 was ArrayPrepend
   ArrayRemove = 95;
   ArrayReplace = 96;
   // 97 was ArrayToString
@@ -664,7 +656,7 @@ enum ScalarFunction {
   Iszero = 114;
   // 115 was ArrayEmpty
   ArrayPopBack = 116;
-  StringToArray = 117;
+  // 117 was StringToArray
   // 118 was ToTimestampNanos
   ArrayIntersect = 119;
   ArrayUnion = 120;
@@ -675,8 +667,8 @@ enum ScalarFunction {
   Levenshtein = 125;
   SubstrIndex = 126;
   FindInSet = 127;
-  ArraySort = 128;
-  ArrayDistinct = 129;
+  /// 128 was ArraySort
+  /// 129 was ArrayDistinct
   ArrayResize = 130;
   EndsWith = 131;
   /// 132 was InStr
@@ -1219,22 +1211,11 @@ message PartitionColumn {
   ArrowType arrow_type = 2;
 }
 
-message FileTypeWriterOptions {
-  oneof FileType {
-    JsonWriterOptions json_options = 1;
-    ParquetWriterOptions parquet_options = 2;
-    CsvWriterOptions csv_options = 3;
-    ArrowWriterOptions arrow_options = 4;
-  }
-}
 
 message JsonWriterOptions {
   CompressionTypeVariant compression = 1;
 }
 
-message ParquetWriterOptions {
-    WriterProperties writer_properties = 1;
-}
 
 message CsvWriterOptions {
   // Compression type
@@ -1255,16 +1236,26 @@ message CsvWriterOptions {
   string null_value = 8;
 }
 
-message ArrowWriterOptions {}
+// Options controlling CSV format
+message CsvOptions {
+  bool has_header = 1; // Indicates if the CSV has a header row
+  bytes delimiter = 2; // Delimiter character as a byte
+  bytes quote = 3; // Quote character as a byte
+  bytes escape = 4; // Optional escape character as a byte
+  CompressionTypeVariant compression = 5; // Compression type
+  uint64 schema_infer_max_rec = 6; // Max records for schema inference
+  string date_format = 7; // Optional date format
+  string datetime_format = 8; // Optional datetime format
+  string timestamp_format = 9; // Optional timestamp format
+  string timestamp_tz_format = 10; // Optional timestamp with timezone format
+  string time_format = 11; // Optional time format
+  string null_value = 12; // Optional representation of null value
+}
 
-message WriterProperties {
-  uint64 data_page_size_limit = 1;
-  uint64 dictionary_page_size_limit = 2;
-  uint64 data_page_row_count_limit = 3;
-  uint64 write_batch_size = 4;
-  uint64 max_row_group_size = 5;
-  string writer_version = 6;
-  string created_by = 7;
+// Options controlling CSV format
+message JsonOptions {
+  CompressionTypeVariant compression = 1; // Compression type
+  uint64 schema_infer_max_rec = 2; // Max records for schema inference
 }
 
 message FileSinkConfig {
@@ -1276,11 +1267,11 @@ message FileSinkConfig {
   Schema output_schema = 4;
   repeated PartitionColumn table_partition_cols = 5;
   bool overwrite = 8;
-  FileTypeWriterOptions file_type_writer_options = 9;
 }
 
 message JsonSink {
   FileSinkConfig config = 1;
+  JsonWriterOptions writer_options = 2;
 }
 
 message JsonSinkExecNode {
@@ -1292,6 +1283,7 @@ message JsonSinkExecNode {
 
 message CsvSink {
   FileSinkConfig config = 1;
+  CsvWriterOptions writer_options = 2;
 }
 
 message CsvSinkExecNode {
@@ -1301,8 +1293,115 @@ message CsvSinkExecNode {
   PhysicalSortExprNodeCollection sort_order = 4;
 }
 
+message TableParquetOptions {
+  ParquetOptions global = 1;
+  repeated ColumnSpecificOptions column_specific_options = 2;
+}
+
+message ColumnSpecificOptions {
+  string column_name = 1;
+  ColumnOptions options = 2;
+}
+
+message ColumnOptions {
+  oneof bloom_filter_enabled_opt {
+    bool bloom_filter_enabled = 1;
+  }
+
+  oneof encoding_opt {
+    string encoding = 2;
+  }
+
+  oneof dictionary_enabled_opt {
+    bool dictionary_enabled = 3;
+  }
+
+  oneof compression_opt {
+    string compression = 4;
+  }
+
+  oneof statistics_enabled_opt {
+    string statistics_enabled = 5;
+  }
+
+  oneof bloom_filter_fpp_opt {
+    double bloom_filter_fpp = 6;
+  }
+
+  oneof bloom_filter_ndv_opt {
+    uint64 bloom_filter_ndv = 7;
+  }
+
+  oneof max_statistics_size_opt {
+    uint32 max_statistics_size = 8;
+  }
+}
+
+message ParquetOptions {
+  // Regular fields
+  bool enable_page_index = 1; // default = true
+  bool pruning = 2; // default = true
+  bool skip_metadata = 3; // default = true
+  bool pushdown_filters = 5; // default = false
+  bool reorder_filters = 6; // default = false
+  uint64 data_pagesize_limit = 7; // default = 1024 * 1024
+  uint64 write_batch_size = 8; // default = 1024
+  string writer_version = 9; // default = "1.0"
+  bool bloom_filter_enabled = 20; // default = false
+  bool allow_single_file_parallelism = 23; // default = true
+  uint64 maximum_parallel_row_group_writers = 24; // default = 1
+  uint64 maximum_buffered_record_batches_per_stream = 25; // default = 2
+
+  oneof metadata_size_hint_opt {
+    uint64 metadata_size_hint = 4;
+  }
+
+  oneof compression_opt {
+    string compression = 10;
+  }
+
+  oneof dictionary_enabled_opt {
+    bool dictionary_enabled = 11;
+  }
+
+  oneof statistics_enabled_opt {
+    string statistics_enabled = 13;
+  }
+
+  oneof max_statistics_size_opt {
+    uint64 max_statistics_size = 14;
+  }
+
+  oneof column_index_truncate_length_opt {
+    uint64 column_index_truncate_length = 17;
+  }
+
+  oneof encoding_opt {
+    string encoding = 19;
+  }
+
+  oneof bloom_filter_fpp_opt {
+    double bloom_filter_fpp = 21;
+  }
+
+  oneof bloom_filter_ndv_opt {
+    uint64 bloom_filter_ndv = 22;
+  }
+
+  uint64 dictionary_page_size_limit = 12;
+
+  uint64 data_page_row_count_limit = 18;
+
+  uint64 max_row_group_size = 15;
+
+  string created_by = 16;
+}
+
+
+
 message ParquetSink {
   FileSinkConfig config = 1;
+  TableParquetOptions parquet_options = 2;
 }
 
 message ParquetSinkExecNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 83b0c6813b4b..37cc1a45785b 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -1409,6 +1409,77 @@ impl<'de> serde::Deserialize<'de> for AnalyzedLogicalPlanType {
         deserializer.deserialize_struct("datafusion.AnalyzedLogicalPlanType", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for ArrowOptions {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let len = 0;
+        let struct_ser = serializer.serialize_struct("datafusion.ArrowOptions", len)?;
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ArrowOptions {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                            Err(serde::de::Error::unknown_field(value, FIELDS))
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ArrowOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ArrowOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ArrowOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                while map_.next_key::<GeneratedField>()?.is_some() {
+                    let _ = map_.next_value::<serde::de::IgnoredAny>()?;
+                }
+                Ok(ArrowOptions {
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ArrowOptions", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for ArrowType {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -1929,7 +2000,7 @@ impl<'de> serde::Deserialize<'de> for ArrowType {
         deserializer.deserialize_struct("datafusion.ArrowType", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for ArrowWriterOptions {
+impl serde::Serialize for AvroFormat {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -1937,11 +2008,11 @@ impl serde::Serialize for ArrowWriterOptions {
     {
         use serde::ser::SerializeStruct;
         let len = 0;
-        let struct_ser = serializer.serialize_struct("datafusion.ArrowWriterOptions", len)?;
+        let struct_ser = serializer.serialize_struct("datafusion.AvroFormat", len)?;
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for ArrowWriterOptions {
+impl<'de> serde::Deserialize<'de> for AvroFormat {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
@@ -1980,27 +2051,27 @@ impl<'de> serde::Deserialize<'de> for ArrowWriterOptions {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ArrowWriterOptions;
+            type Value = AvroFormat;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.ArrowWriterOptions")
+                formatter.write_str("struct datafusion.AvroFormat")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ArrowWriterOptions, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<AvroFormat, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
                 while map_.next_key::<GeneratedField>()?.is_some() {
                     let _ = map_.next_value::<serde::de::IgnoredAny>()?;
                 }
-                Ok(ArrowWriterOptions {
+                Ok(AvroFormat {
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.ArrowWriterOptions", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.AvroFormat", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for AvroFormat {
+impl serde::Serialize for AvroOptions {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -2008,11 +2079,11 @@ impl serde::Serialize for AvroFormat {
     {
         use serde::ser::SerializeStruct;
         let len = 0;
-        let struct_ser = serializer.serialize_struct("datafusion.AvroFormat", len)?;
+        let struct_ser = serializer.serialize_struct("datafusion.AvroOptions", len)?;
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for AvroFormat {
+impl<'de> serde::Deserialize<'de> for AvroOptions {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
@@ -2051,24 +2122,24 @@ impl<'de> serde::Deserialize<'de> for AvroFormat {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = AvroFormat;
+            type Value = AvroOptions;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.AvroFormat")
+                formatter.write_str("struct datafusion.AvroOptions")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<AvroFormat, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<AvroOptions, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
                 while map_.next_key::<GeneratedField>()?.is_some() {
                     let _ = map_.next_value::<serde::de::IgnoredAny>()?;
                 }
-                Ok(AvroFormat {
+                Ok(AvroOptions {
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.AvroFormat", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.AvroOptions", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for AvroScanExecNode {
@@ -3260,6 +3331,255 @@ impl<'de> serde::Deserialize<'de> for ColumnIndex {
         deserializer.deserialize_struct("datafusion.ColumnIndex", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for ColumnOptions {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.bloom_filter_enabled_opt.is_some() {
+            len += 1;
+        }
+        if self.encoding_opt.is_some() {
+            len += 1;
+        }
+        if self.dictionary_enabled_opt.is_some() {
+            len += 1;
+        }
+        if self.compression_opt.is_some() {
+            len += 1;
+        }
+        if self.statistics_enabled_opt.is_some() {
+            len += 1;
+        }
+        if self.bloom_filter_fpp_opt.is_some() {
+            len += 1;
+        }
+        if self.bloom_filter_ndv_opt.is_some() {
+            len += 1;
+        }
+        if self.max_statistics_size_opt.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ColumnOptions", len)?;
+        if let Some(v) = self.bloom_filter_enabled_opt.as_ref() {
+            match v {
+                column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v) => {
+                    struct_ser.serialize_field("bloomFilterEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.encoding_opt.as_ref() {
+            match v {
+                column_options::EncodingOpt::Encoding(v) => {
+                    struct_ser.serialize_field("encoding", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.dictionary_enabled_opt.as_ref() {
+            match v {
+                column_options::DictionaryEnabledOpt::DictionaryEnabled(v) => {
+                    struct_ser.serialize_field("dictionaryEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.compression_opt.as_ref() {
+            match v {
+                column_options::CompressionOpt::Compression(v) => {
+                    struct_ser.serialize_field("compression", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.statistics_enabled_opt.as_ref() {
+            match v {
+                column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => {
+                    struct_ser.serialize_field("statisticsEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.bloom_filter_fpp_opt.as_ref() {
+            match v {
+                column_options::BloomFilterFppOpt::BloomFilterFpp(v) => {
+                    struct_ser.serialize_field("bloomFilterFpp", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.bloom_filter_ndv_opt.as_ref() {
+            match v {
+                column_options::BloomFilterNdvOpt::BloomFilterNdv(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("bloomFilterNdv", ToString::to_string(&v).as_str())?;
+                }
+            }
+        }
+        if let Some(v) = self.max_statistics_size_opt.as_ref() {
+            match v {
+                column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
+                    struct_ser.serialize_field("maxStatisticsSize", v)?;
+                }
+            }
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ColumnOptions {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "bloom_filter_enabled",
+            "bloomFilterEnabled",
+            "encoding",
+            "dictionary_enabled",
+            "dictionaryEnabled",
+            "compression",
+            "statistics_enabled",
+            "statisticsEnabled",
+            "bloom_filter_fpp",
+            "bloomFilterFpp",
+            "bloom_filter_ndv",
+            "bloomFilterNdv",
+            "max_statistics_size",
+            "maxStatisticsSize",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            BloomFilterEnabled,
+            Encoding,
+            DictionaryEnabled,
+            Compression,
+            StatisticsEnabled,
+            BloomFilterFpp,
+            BloomFilterNdv,
+            MaxStatisticsSize,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "bloomFilterEnabled" | "bloom_filter_enabled" => Ok(GeneratedField::BloomFilterEnabled),
+                            "encoding" => Ok(GeneratedField::Encoding),
+                            "dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled),
+                            "compression" => Ok(GeneratedField::Compression),
+                            "statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
+                            "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
+                            "bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
+                            "maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ColumnOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ColumnOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ColumnOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut bloom_filter_enabled_opt__ = None;
+                let mut encoding_opt__ = None;
+                let mut dictionary_enabled_opt__ = None;
+                let mut compression_opt__ = None;
+                let mut statistics_enabled_opt__ = None;
+                let mut bloom_filter_fpp_opt__ = None;
+                let mut bloom_filter_ndv_opt__ = None;
+                let mut max_statistics_size_opt__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::BloomFilterEnabled => {
+                            if bloom_filter_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterEnabled"));
+                            }
+                            bloom_filter_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::BloomFilterEnabledOpt::BloomFilterEnabled);
+                        }
+                        GeneratedField::Encoding => {
+                            if encoding_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("encoding"));
+                            }
+                            encoding_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::EncodingOpt::Encoding);
+                        }
+                        GeneratedField::DictionaryEnabled => {
+                            if dictionary_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dictionaryEnabled"));
+                            }
+                            dictionary_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::DictionaryEnabledOpt::DictionaryEnabled);
+                        }
+                        GeneratedField::Compression => {
+                            if compression_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compression"));
+                            }
+                            compression_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::CompressionOpt::Compression);
+                        }
+                        GeneratedField::StatisticsEnabled => {
+                            if statistics_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("statisticsEnabled"));
+                            }
+                            statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(column_options::StatisticsEnabledOpt::StatisticsEnabled);
+                        }
+                        GeneratedField::BloomFilterFpp => {
+                            if bloom_filter_fpp_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterFpp"));
+                            }
+                            bloom_filter_fpp_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| column_options::BloomFilterFppOpt::BloomFilterFpp(x.0));
+                        }
+                        GeneratedField::BloomFilterNdv => {
+                            if bloom_filter_ndv_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterNdv"));
+                            }
+                            bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| column_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
+                        }
+                        GeneratedField::MaxStatisticsSize => {
+                            if max_statistics_size_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
+                            }
+                            max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
+                        }
+                    }
+                }
+                Ok(ColumnOptions {
+                    bloom_filter_enabled_opt: bloom_filter_enabled_opt__,
+                    encoding_opt: encoding_opt__,
+                    dictionary_enabled_opt: dictionary_enabled_opt__,
+                    compression_opt: compression_opt__,
+                    statistics_enabled_opt: statistics_enabled_opt__,
+                    bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
+                    bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
+                    max_statistics_size_opt: max_statistics_size_opt__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ColumnOptions", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for ColumnRelation {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -3351,7 +3671,7 @@ impl<'de> serde::Deserialize<'de> for ColumnRelation {
         deserializer.deserialize_struct("datafusion.ColumnRelation", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for ColumnStats {
+impl serde::Serialize for ColumnSpecificOptions {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -3359,57 +3679,38 @@ impl serde::Serialize for ColumnStats {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.min_value.is_some() {
-            len += 1;
-        }
-        if self.max_value.is_some() {
+        if !self.column_name.is_empty() {
             len += 1;
         }
-        if self.null_count.is_some() {
-            len += 1;
-        }
-        if self.distinct_count.is_some() {
+        if self.options.is_some() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion.ColumnStats", len)?;
-        if let Some(v) = self.min_value.as_ref() {
-            struct_ser.serialize_field("minValue", v)?;
-        }
-        if let Some(v) = self.max_value.as_ref() {
-            struct_ser.serialize_field("maxValue", v)?;
-        }
-        if let Some(v) = self.null_count.as_ref() {
-            struct_ser.serialize_field("nullCount", v)?;
+        let mut struct_ser = serializer.serialize_struct("datafusion.ColumnSpecificOptions", len)?;
+        if !self.column_name.is_empty() {
+            struct_ser.serialize_field("columnName", &self.column_name)?;
         }
-        if let Some(v) = self.distinct_count.as_ref() {
-            struct_ser.serialize_field("distinctCount", v)?;
+        if let Some(v) = self.options.as_ref() {
+            struct_ser.serialize_field("options", v)?;
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for ColumnStats {
+impl<'de> serde::Deserialize<'de> for ColumnSpecificOptions {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "min_value",
-            "minValue",
-            "max_value",
-            "maxValue",
-            "null_count",
-            "nullCount",
-            "distinct_count",
-            "distinctCount",
+            "column_name",
+            "columnName",
+            "options",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            MinValue,
-            MaxValue,
-            NullCount,
-            DistinctCount,
+            ColumnName,
+            Options,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -3431,10 +3732,8 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
                         E: serde::de::Error,
                     {
                         match value {
-                            "minValue" | "min_value" => Ok(GeneratedField::MinValue),
-                            "maxValue" | "max_value" => Ok(GeneratedField::MaxValue),
-                            "nullCount" | "null_count" => Ok(GeneratedField::NullCount),
-                            "distinctCount" | "distinct_count" => Ok(GeneratedField::DistinctCount),
+                            "columnName" | "column_name" => Ok(GeneratedField::ColumnName),
+                            "options" => Ok(GeneratedField::Options),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -3444,7 +3743,137 @@ impl<'de> serde::Deserialize<'de> for ColumnStats {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ColumnStats;
+            type Value = ColumnSpecificOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ColumnSpecificOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ColumnSpecificOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut column_name__ = None;
+                let mut options__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::ColumnName => {
+                            if column_name__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("columnName"));
+                            }
+                            column_name__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Options => {
+                            if options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("options"));
+                            }
+                            options__ = map_.next_value()?;
+                        }
+                    }
+                }
+                Ok(ColumnSpecificOptions {
+                    column_name: column_name__.unwrap_or_default(),
+                    options: options__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ColumnSpecificOptions", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for ColumnStats {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.min_value.is_some() {
+            len += 1;
+        }
+        if self.max_value.is_some() {
+            len += 1;
+        }
+        if self.null_count.is_some() {
+            len += 1;
+        }
+        if self.distinct_count.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ColumnStats", len)?;
+        if let Some(v) = self.min_value.as_ref() {
+            struct_ser.serialize_field("minValue", v)?;
+        }
+        if let Some(v) = self.max_value.as_ref() {
+            struct_ser.serialize_field("maxValue", v)?;
+        }
+        if let Some(v) = self.null_count.as_ref() {
+            struct_ser.serialize_field("nullCount", v)?;
+        }
+        if let Some(v) = self.distinct_count.as_ref() {
+            struct_ser.serialize_field("distinctCount", v)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ColumnStats {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "min_value",
+            "minValue",
+            "max_value",
+            "maxValue",
+            "null_count",
+            "nullCount",
+            "distinct_count",
+            "distinctCount",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            MinValue,
+            MaxValue,
+            NullCount,
+            DistinctCount,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "minValue" | "min_value" => Ok(GeneratedField::MinValue),
+                            "maxValue" | "max_value" => Ok(GeneratedField::MaxValue),
+                            "nullCount" | "null_count" => Ok(GeneratedField::NullCount),
+                            "distinctCount" | "distinct_count" => Ok(GeneratedField::DistinctCount),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ColumnStats;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                 formatter.write_str("struct datafusion.ColumnStats")
@@ -3792,13 +4221,10 @@ impl serde::Serialize for CopyToNode {
         if !self.output_url.is_empty() {
             len += 1;
         }
-        if !self.file_type.is_empty() {
-            len += 1;
-        }
         if !self.partition_by.is_empty() {
             len += 1;
         }
-        if self.copy_options.is_some() {
+        if self.format_options.is_some() {
             len += 1;
         }
         let mut struct_ser = serializer.serialize_struct("datafusion.CopyToNode", len)?;
@@ -3808,19 +4234,25 @@ impl serde::Serialize for CopyToNode {
         if !self.output_url.is_empty() {
             struct_ser.serialize_field("outputUrl", &self.output_url)?;
         }
-        if !self.file_type.is_empty() {
-            struct_ser.serialize_field("fileType", &self.file_type)?;
-        }
         if !self.partition_by.is_empty() {
             struct_ser.serialize_field("partitionBy", &self.partition_by)?;
         }
-        if let Some(v) = self.copy_options.as_ref() {
+        if let Some(v) = self.format_options.as_ref() {
             match v {
-                copy_to_node::CopyOptions::SqlOptions(v) => {
-                    struct_ser.serialize_field("sqlOptions", v)?;
+                copy_to_node::FormatOptions::Csv(v) => {
+                    struct_ser.serialize_field("csv", v)?;
+                }
+                copy_to_node::FormatOptions::Json(v) => {
+                    struct_ser.serialize_field("json", v)?;
+                }
+                copy_to_node::FormatOptions::Parquet(v) => {
+                    struct_ser.serialize_field("parquet", v)?;
                 }
-                copy_to_node::CopyOptions::WriterOptions(v) => {
-                    struct_ser.serialize_field("writerOptions", v)?;
+                copy_to_node::FormatOptions::Avro(v) => {
+                    struct_ser.serialize_field("avro", v)?;
+                }
+                copy_to_node::FormatOptions::Arrow(v) => {
+                    struct_ser.serialize_field("arrow", v)?;
                 }
             }
         }
@@ -3837,24 +4269,25 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
             "input",
             "output_url",
             "outputUrl",
-            "file_type",
-            "fileType",
             "partition_by",
             "partitionBy",
-            "sql_options",
-            "sqlOptions",
-            "writer_options",
-            "writerOptions",
+            "csv",
+            "json",
+            "parquet",
+            "avro",
+            "arrow",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Input,
             OutputUrl,
-            FileType,
             PartitionBy,
-            SqlOptions,
-            WriterOptions,
+            Csv,
+            Json,
+            Parquet,
+            Avro,
+            Arrow,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -3878,10 +4311,12 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                         match value {
                             "input" => Ok(GeneratedField::Input),
                             "outputUrl" | "output_url" => Ok(GeneratedField::OutputUrl),
-                            "fileType" | "file_type" => Ok(GeneratedField::FileType),
                             "partitionBy" | "partition_by" => Ok(GeneratedField::PartitionBy),
-                            "sqlOptions" | "sql_options" => Ok(GeneratedField::SqlOptions),
-                            "writerOptions" | "writer_options" => Ok(GeneratedField::WriterOptions),
+                            "csv" => Ok(GeneratedField::Csv),
+                            "json" => Ok(GeneratedField::Json),
+                            "parquet" => Ok(GeneratedField::Parquet),
+                            "avro" => Ok(GeneratedField::Avro),
+                            "arrow" => Ok(GeneratedField::Arrow),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -3903,9 +4338,8 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
             {
                 let mut input__ = None;
                 let mut output_url__ = None;
-                let mut file_type__ = None;
                 let mut partition_by__ = None;
-                let mut copy_options__ = None;
+                let mut format_options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Input => {
@@ -3920,30 +4354,45 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                             }
                             output_url__ = Some(map_.next_value()?);
                         }
-                        GeneratedField::FileType => {
-                            if file_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("fileType"));
-                            }
-                            file_type__ = Some(map_.next_value()?);
-                        }
                         GeneratedField::PartitionBy => {
                             if partition_by__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("partitionBy"));
                             }
                             partition_by__ = Some(map_.next_value()?);
                         }
-                        GeneratedField::SqlOptions => {
-                            if copy_options__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("sqlOptions"));
+                        GeneratedField::Csv => {
+                            if format_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("csv"));
                             }
-                            copy_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::CopyOptions::SqlOptions)
+                            format_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::FormatOptions::Csv)
 ;
                         }
-                        GeneratedField::WriterOptions => {
-                            if copy_options__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("writerOptions"));
+                        GeneratedField::Json => {
+                            if format_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("json"));
+                            }
+                            format_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::FormatOptions::Json)
+;
+                        }
+                        GeneratedField::Parquet => {
+                            if format_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("parquet"));
+                            }
+                            format_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::FormatOptions::Parquet)
+;
+                        }
+                        GeneratedField::Avro => {
+                            if format_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("avro"));
                             }
-                            copy_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::CopyOptions::WriterOptions)
+                            format_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::FormatOptions::Avro)
+;
+                        }
+                        GeneratedField::Arrow => {
+                            if format_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("arrow"));
+                            }
+                            format_options__ = map_.next_value::<::std::option::Option<_>>()?.map(copy_to_node::FormatOptions::Arrow)
 ;
                         }
                     }
@@ -3951,9 +4400,8 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                 Ok(CopyToNode {
                     input: input__,
                     output_url: output_url__.unwrap_or_default(),
-                    file_type: file_type__.unwrap_or_default(),
                     partition_by: partition_by__.unwrap_or_default(),
-                    copy_options: copy_options__,
+                    format_options: format_options__,
                 })
             }
         }
@@ -4923,34 +5371,12 @@ impl serde::Serialize for CsvFormat {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.has_header {
-            len += 1;
-        }
-        if !self.delimiter.is_empty() {
-            len += 1;
-        }
-        if !self.quote.is_empty() {
-            len += 1;
-        }
-        if self.optional_escape.is_some() {
+        if self.options.is_some() {
             len += 1;
         }
         let mut struct_ser = serializer.serialize_struct("datafusion.CsvFormat", len)?;
-        if self.has_header {
-            struct_ser.serialize_field("hasHeader", &self.has_header)?;
-        }
-        if !self.delimiter.is_empty() {
-            struct_ser.serialize_field("delimiter", &self.delimiter)?;
-        }
-        if !self.quote.is_empty() {
-            struct_ser.serialize_field("quote", &self.quote)?;
-        }
-        if let Some(v) = self.optional_escape.as_ref() {
-            match v {
-                csv_format::OptionalEscape::Escape(v) => {
-                    struct_ser.serialize_field("escape", v)?;
-                }
-            }
+        if let Some(v) = self.options.as_ref() {
+            struct_ser.serialize_field("options", v)?;
         }
         struct_ser.end()
     }
@@ -4962,19 +5388,12 @@ impl<'de> serde::Deserialize<'de> for CsvFormat {
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "has_header",
-            "hasHeader",
-            "delimiter",
-            "quote",
-            "escape",
+            "options",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            HasHeader,
-            Delimiter,
-            Quote,
-            Escape,
+            Options,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -4996,10 +5415,7 @@ impl<'de> serde::Deserialize<'de> for CsvFormat {
                         E: serde::de::Error,
                     {
                         match value {
-                            "hasHeader" | "has_header" => Ok(GeneratedField::HasHeader),
-                            "delimiter" => Ok(GeneratedField::Delimiter),
-                            "quote" => Ok(GeneratedField::Quote),
-                            "escape" => Ok(GeneratedField::Escape),
+                            "options" => Ok(GeneratedField::Options),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -5019,50 +5435,26 @@ impl<'de> serde::Deserialize<'de> for CsvFormat {
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut has_header__ = None;
-                let mut delimiter__ = None;
-                let mut quote__ = None;
-                let mut optional_escape__ = None;
+                let mut options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::HasHeader => {
-                            if has_header__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("hasHeader"));
-                            }
-                            has_header__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Delimiter => {
-                            if delimiter__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("delimiter"));
-                            }
-                            delimiter__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Quote => {
-                            if quote__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("quote"));
-                            }
-                            quote__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Escape => {
-                            if optional_escape__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("escape"));
+                        GeneratedField::Options => {
+                            if options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("options"));
                             }
-                            optional_escape__ = map_.next_value::<::std::option::Option<_>>()?.map(csv_format::OptionalEscape::Escape);
+                            options__ = map_.next_value()?;
                         }
                     }
                 }
                 Ok(CsvFormat {
-                    has_header: has_header__.unwrap_or_default(),
-                    delimiter: delimiter__.unwrap_or_default(),
-                    quote: quote__.unwrap_or_default(),
-                    optional_escape: optional_escape__,
+                    options: options__,
                 })
             }
         }
         deserializer.deserialize_struct("datafusion.CsvFormat", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for CsvScanExecNode {
+impl serde::Serialize for CsvOptions {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -5070,9 +5462,6 @@ impl serde::Serialize for CsvScanExecNode {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.base_conf.is_some() {
-            len += 1;
-        }
         if self.has_header {
             len += 1;
         }
@@ -5082,14 +5471,317 @@ impl serde::Serialize for CsvScanExecNode {
         if !self.quote.is_empty() {
             len += 1;
         }
-        if self.optional_escape.is_some() {
+        if !self.escape.is_empty() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion.CsvScanExecNode", len)?;
-        if let Some(v) = self.base_conf.as_ref() {
-            struct_ser.serialize_field("baseConf", v)?;
+        if self.compression != 0 {
+            len += 1;
         }
-        if self.has_header {
+        if self.schema_infer_max_rec != 0 {
+            len += 1;
+        }
+        if !self.date_format.is_empty() {
+            len += 1;
+        }
+        if !self.datetime_format.is_empty() {
+            len += 1;
+        }
+        if !self.timestamp_format.is_empty() {
+            len += 1;
+        }
+        if !self.timestamp_tz_format.is_empty() {
+            len += 1;
+        }
+        if !self.time_format.is_empty() {
+            len += 1;
+        }
+        if !self.null_value.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.CsvOptions", len)?;
+        if self.has_header {
+            struct_ser.serialize_field("hasHeader", &self.has_header)?;
+        }
+        if !self.delimiter.is_empty() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("delimiter", pbjson::private::base64::encode(&self.delimiter).as_str())?;
+        }
+        if !self.quote.is_empty() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("quote", pbjson::private::base64::encode(&self.quote).as_str())?;
+        }
+        if !self.escape.is_empty() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("escape", pbjson::private::base64::encode(&self.escape).as_str())?;
+        }
+        if self.compression != 0 {
+            let v = CompressionTypeVariant::try_from(self.compression)
+                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", self.compression)))?;
+            struct_ser.serialize_field("compression", &v)?;
+        }
+        if self.schema_infer_max_rec != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("schemaInferMaxRec", ToString::to_string(&self.schema_infer_max_rec).as_str())?;
+        }
+        if !self.date_format.is_empty() {
+            struct_ser.serialize_field("dateFormat", &self.date_format)?;
+        }
+        if !self.datetime_format.is_empty() {
+            struct_ser.serialize_field("datetimeFormat", &self.datetime_format)?;
+        }
+        if !self.timestamp_format.is_empty() {
+            struct_ser.serialize_field("timestampFormat", &self.timestamp_format)?;
+        }
+        if !self.timestamp_tz_format.is_empty() {
+            struct_ser.serialize_field("timestampTzFormat", &self.timestamp_tz_format)?;
+        }
+        if !self.time_format.is_empty() {
+            struct_ser.serialize_field("timeFormat", &self.time_format)?;
+        }
+        if !self.null_value.is_empty() {
+            struct_ser.serialize_field("nullValue", &self.null_value)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for CsvOptions {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "has_header",
+            "hasHeader",
+            "delimiter",
+            "quote",
+            "escape",
+            "compression",
+            "schema_infer_max_rec",
+            "schemaInferMaxRec",
+            "date_format",
+            "dateFormat",
+            "datetime_format",
+            "datetimeFormat",
+            "timestamp_format",
+            "timestampFormat",
+            "timestamp_tz_format",
+            "timestampTzFormat",
+            "time_format",
+            "timeFormat",
+            "null_value",
+            "nullValue",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            HasHeader,
+            Delimiter,
+            Quote,
+            Escape,
+            Compression,
+            SchemaInferMaxRec,
+            DateFormat,
+            DatetimeFormat,
+            TimestampFormat,
+            TimestampTzFormat,
+            TimeFormat,
+            NullValue,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "hasHeader" | "has_header" => Ok(GeneratedField::HasHeader),
+                            "delimiter" => Ok(GeneratedField::Delimiter),
+                            "quote" => Ok(GeneratedField::Quote),
+                            "escape" => Ok(GeneratedField::Escape),
+                            "compression" => Ok(GeneratedField::Compression),
+                            "schemaInferMaxRec" | "schema_infer_max_rec" => Ok(GeneratedField::SchemaInferMaxRec),
+                            "dateFormat" | "date_format" => Ok(GeneratedField::DateFormat),
+                            "datetimeFormat" | "datetime_format" => Ok(GeneratedField::DatetimeFormat),
+                            "timestampFormat" | "timestamp_format" => Ok(GeneratedField::TimestampFormat),
+                            "timestampTzFormat" | "timestamp_tz_format" => Ok(GeneratedField::TimestampTzFormat),
+                            "timeFormat" | "time_format" => Ok(GeneratedField::TimeFormat),
+                            "nullValue" | "null_value" => Ok(GeneratedField::NullValue),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = CsvOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.CsvOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<CsvOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut has_header__ = None;
+                let mut delimiter__ = None;
+                let mut quote__ = None;
+                let mut escape__ = None;
+                let mut compression__ = None;
+                let mut schema_infer_max_rec__ = None;
+                let mut date_format__ = None;
+                let mut datetime_format__ = None;
+                let mut timestamp_format__ = None;
+                let mut timestamp_tz_format__ = None;
+                let mut time_format__ = None;
+                let mut null_value__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::HasHeader => {
+                            if has_header__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("hasHeader"));
+                            }
+                            has_header__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Delimiter => {
+                            if delimiter__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("delimiter"));
+                            }
+                            delimiter__ = 
+                                Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Quote => {
+                            if quote__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("quote"));
+                            }
+                            quote__ = 
+                                Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Escape => {
+                            if escape__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("escape"));
+                            }
+                            escape__ = 
+                                Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::Compression => {
+                            if compression__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compression"));
+                            }
+                            compression__ = Some(map_.next_value::<CompressionTypeVariant>()? as i32);
+                        }
+                        GeneratedField::SchemaInferMaxRec => {
+                            if schema_infer_max_rec__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("schemaInferMaxRec"));
+                            }
+                            schema_infer_max_rec__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::DateFormat => {
+                            if date_format__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dateFormat"));
+                            }
+                            date_format__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::DatetimeFormat => {
+                            if datetime_format__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("datetimeFormat"));
+                            }
+                            datetime_format__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::TimestampFormat => {
+                            if timestamp_format__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("timestampFormat"));
+                            }
+                            timestamp_format__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::TimestampTzFormat => {
+                            if timestamp_tz_format__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("timestampTzFormat"));
+                            }
+                            timestamp_tz_format__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::TimeFormat => {
+                            if time_format__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("timeFormat"));
+                            }
+                            time_format__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::NullValue => {
+                            if null_value__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullValue"));
+                            }
+                            null_value__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(CsvOptions {
+                    has_header: has_header__.unwrap_or_default(),
+                    delimiter: delimiter__.unwrap_or_default(),
+                    quote: quote__.unwrap_or_default(),
+                    escape: escape__.unwrap_or_default(),
+                    compression: compression__.unwrap_or_default(),
+                    schema_infer_max_rec: schema_infer_max_rec__.unwrap_or_default(),
+                    date_format: date_format__.unwrap_or_default(),
+                    datetime_format: datetime_format__.unwrap_or_default(),
+                    timestamp_format: timestamp_format__.unwrap_or_default(),
+                    timestamp_tz_format: timestamp_tz_format__.unwrap_or_default(),
+                    time_format: time_format__.unwrap_or_default(),
+                    null_value: null_value__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.CsvOptions", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for CsvScanExecNode {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.base_conf.is_some() {
+            len += 1;
+        }
+        if self.has_header {
+            len += 1;
+        }
+        if !self.delimiter.is_empty() {
+            len += 1;
+        }
+        if !self.quote.is_empty() {
+            len += 1;
+        }
+        if self.optional_escape.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.CsvScanExecNode", len)?;
+        if let Some(v) = self.base_conf.as_ref() {
+            struct_ser.serialize_field("baseConf", v)?;
+        }
+        if self.has_header {
             struct_ser.serialize_field("hasHeader", &self.has_header)?;
         }
         if !self.delimiter.is_empty() {
@@ -5238,10 +5930,16 @@ impl serde::Serialize for CsvSink {
         if self.config.is_some() {
             len += 1;
         }
+        if self.writer_options.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.CsvSink", len)?;
         if let Some(v) = self.config.as_ref() {
             struct_ser.serialize_field("config", v)?;
         }
+        if let Some(v) = self.writer_options.as_ref() {
+            struct_ser.serialize_field("writerOptions", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -5253,11 +5951,14 @@ impl<'de> serde::Deserialize<'de> for CsvSink {
     {
         const FIELDS: &[&str] = &[
             "config",
+            "writer_options",
+            "writerOptions",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Config,
+            WriterOptions,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -5280,6 +5981,7 @@ impl<'de> serde::Deserialize<'de> for CsvSink {
                     {
                         match value {
                             "config" => Ok(GeneratedField::Config),
+                            "writerOptions" | "writer_options" => Ok(GeneratedField::WriterOptions),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -5300,6 +6002,7 @@ impl<'de> serde::Deserialize<'de> for CsvSink {
                     V: serde::de::MapAccess<'de>,
             {
                 let mut config__ = None;
+                let mut writer_options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Config => {
@@ -5308,10 +6011,17 @@ impl<'de> serde::Deserialize<'de> for CsvSink {
                             }
                             config__ = map_.next_value()?;
                         }
+                        GeneratedField::WriterOptions => {
+                            if writer_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("writerOptions"));
+                            }
+                            writer_options__ = map_.next_value()?;
+                        }
                     }
                 }
                 Ok(CsvSink {
                     config: config__,
+                    writer_options: writer_options__,
                 })
             }
         }
@@ -8211,9 +8921,6 @@ impl serde::Serialize for FileSinkConfig {
         if self.overwrite {
             len += 1;
         }
-        if self.file_type_writer_options.is_some() {
-            len += 1;
-        }
         let mut struct_ser = serializer.serialize_struct("datafusion.FileSinkConfig", len)?;
         if !self.object_store_url.is_empty() {
             struct_ser.serialize_field("objectStoreUrl", &self.object_store_url)?;
@@ -8233,9 +8940,6 @@ impl serde::Serialize for FileSinkConfig {
         if self.overwrite {
             struct_ser.serialize_field("overwrite", &self.overwrite)?;
         }
-        if let Some(v) = self.file_type_writer_options.as_ref() {
-            struct_ser.serialize_field("fileTypeWriterOptions", v)?;
-        }
         struct_ser.end()
     }
 }
@@ -8257,8 +8961,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig {
             "table_partition_cols",
             "tablePartitionCols",
             "overwrite",
-            "file_type_writer_options",
-            "fileTypeWriterOptions",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -8269,7 +8971,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig {
             OutputSchema,
             TablePartitionCols,
             Overwrite,
-            FileTypeWriterOptions,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -8297,7 +8998,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig {
                             "outputSchema" | "output_schema" => Ok(GeneratedField::OutputSchema),
                             "tablePartitionCols" | "table_partition_cols" => Ok(GeneratedField::TablePartitionCols),
                             "overwrite" => Ok(GeneratedField::Overwrite),
-                            "fileTypeWriterOptions" | "file_type_writer_options" => Ok(GeneratedField::FileTypeWriterOptions),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -8323,7 +9023,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig {
                 let mut output_schema__ = None;
                 let mut table_partition_cols__ = None;
                 let mut overwrite__ = None;
-                let mut file_type_writer_options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::ObjectStoreUrl => {
@@ -8362,12 +9061,6 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig {
                             }
                             overwrite__ = Some(map_.next_value()?);
                         }
-                        GeneratedField::FileTypeWriterOptions => {
-                            if file_type_writer_options__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("fileTypeWriterOptions"));
-                            }
-                            file_type_writer_options__ = map_.next_value()?;
-                        }
                     }
                 }
                 Ok(FileSinkConfig {
@@ -8377,152 +9070,12 @@ impl<'de> serde::Deserialize<'de> for FileSinkConfig {
                     output_schema: output_schema__,
                     table_partition_cols: table_partition_cols__.unwrap_or_default(),
                     overwrite: overwrite__.unwrap_or_default(),
-                    file_type_writer_options: file_type_writer_options__,
                 })
             }
         }
         deserializer.deserialize_struct("datafusion.FileSinkConfig", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for FileTypeWriterOptions {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if self.file_type.is_some() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion.FileTypeWriterOptions", len)?;
-        if let Some(v) = self.file_type.as_ref() {
-            match v {
-                file_type_writer_options::FileType::JsonOptions(v) => {
-                    struct_ser.serialize_field("jsonOptions", v)?;
-                }
-                file_type_writer_options::FileType::ParquetOptions(v) => {
-                    struct_ser.serialize_field("parquetOptions", v)?;
-                }
-                file_type_writer_options::FileType::CsvOptions(v) => {
-                    struct_ser.serialize_field("csvOptions", v)?;
-                }
-                file_type_writer_options::FileType::ArrowOptions(v) => {
-                    struct_ser.serialize_field("arrowOptions", v)?;
-                }
-            }
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for FileTypeWriterOptions {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "json_options",
-            "jsonOptions",
-            "parquet_options",
-            "parquetOptions",
-            "csv_options",
-            "csvOptions",
-            "arrow_options",
-            "arrowOptions",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            JsonOptions,
-            ParquetOptions,
-            CsvOptions,
-            ArrowOptions,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "jsonOptions" | "json_options" => Ok(GeneratedField::JsonOptions),
-                            "parquetOptions" | "parquet_options" => Ok(GeneratedField::ParquetOptions),
-                            "csvOptions" | "csv_options" => Ok(GeneratedField::CsvOptions),
-                            "arrowOptions" | "arrow_options" => Ok(GeneratedField::ArrowOptions),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = FileTypeWriterOptions;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.FileTypeWriterOptions")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<FileTypeWriterOptions, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut file_type__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::JsonOptions => {
-                            if file_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("jsonOptions"));
-                            }
-                            file_type__ = map_.next_value::<::std::option::Option<_>>()?.map(file_type_writer_options::FileType::JsonOptions)
-;
-                        }
-                        GeneratedField::ParquetOptions => {
-                            if file_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("parquetOptions"));
-                            }
-                            file_type__ = map_.next_value::<::std::option::Option<_>>()?.map(file_type_writer_options::FileType::ParquetOptions)
-;
-                        }
-                        GeneratedField::CsvOptions => {
-                            if file_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("csvOptions"));
-                            }
-                            file_type__ = map_.next_value::<::std::option::Option<_>>()?.map(file_type_writer_options::FileType::CsvOptions)
-;
-                        }
-                        GeneratedField::ArrowOptions => {
-                            if file_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("arrowOptions"));
-                            }
-                            file_type__ = map_.next_value::<::std::option::Option<_>>()?.map(file_type_writer_options::FileType::ArrowOptions)
-;
-                        }
-                    }
-                }
-                Ok(FileTypeWriterOptions {
-                    file_type: file_type__,
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion.FileTypeWriterOptions", FIELDS, GeneratedVisitor)
-    }
-}
 impl serde::Serialize for FilterExecNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -11651,7 +12204,121 @@ impl<'de> serde::Deserialize<'de> for JoinType {
                 }
             }
         }
-        deserializer.deserialize_any(GeneratedVisitor)
+        deserializer.deserialize_any(GeneratedVisitor)
+    }
+}
+impl serde::Serialize for JsonOptions {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.compression != 0 {
+            len += 1;
+        }
+        if self.schema_infer_max_rec != 0 {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.JsonOptions", len)?;
+        if self.compression != 0 {
+            let v = CompressionTypeVariant::try_from(self.compression)
+                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", self.compression)))?;
+            struct_ser.serialize_field("compression", &v)?;
+        }
+        if self.schema_infer_max_rec != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("schemaInferMaxRec", ToString::to_string(&self.schema_infer_max_rec).as_str())?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for JsonOptions {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "compression",
+            "schema_infer_max_rec",
+            "schemaInferMaxRec",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Compression,
+            SchemaInferMaxRec,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "compression" => Ok(GeneratedField::Compression),
+                            "schemaInferMaxRec" | "schema_infer_max_rec" => Ok(GeneratedField::SchemaInferMaxRec),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = JsonOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.JsonOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<JsonOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut compression__ = None;
+                let mut schema_infer_max_rec__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Compression => {
+                            if compression__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compression"));
+                            }
+                            compression__ = Some(map_.next_value::<CompressionTypeVariant>()? as i32);
+                        }
+                        GeneratedField::SchemaInferMaxRec => {
+                            if schema_infer_max_rec__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("schemaInferMaxRec"));
+                            }
+                            schema_infer_max_rec__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                    }
+                }
+                Ok(JsonOptions {
+                    compression: compression__.unwrap_or_default(),
+                    schema_infer_max_rec: schema_infer_max_rec__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.JsonOptions", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for JsonSink {
@@ -11665,10 +12332,16 @@ impl serde::Serialize for JsonSink {
         if self.config.is_some() {
             len += 1;
         }
+        if self.writer_options.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.JsonSink", len)?;
         if let Some(v) = self.config.as_ref() {
             struct_ser.serialize_field("config", v)?;
         }
+        if let Some(v) = self.writer_options.as_ref() {
+            struct_ser.serialize_field("writerOptions", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -11680,11 +12353,14 @@ impl<'de> serde::Deserialize<'de> for JsonSink {
     {
         const FIELDS: &[&str] = &[
             "config",
+            "writer_options",
+            "writerOptions",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Config,
+            WriterOptions,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -11707,6 +12383,7 @@ impl<'de> serde::Deserialize<'de> for JsonSink {
                     {
                         match value {
                             "config" => Ok(GeneratedField::Config),
+                            "writerOptions" | "writer_options" => Ok(GeneratedField::WriterOptions),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -11727,6 +12404,7 @@ impl<'de> serde::Deserialize<'de> for JsonSink {
                     V: serde::de::MapAccess<'de>,
             {
                 let mut config__ = None;
+                let mut writer_options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Config => {
@@ -11735,10 +12413,17 @@ impl<'de> serde::Deserialize<'de> for JsonSink {
                             }
                             config__ = map_.next_value()?;
                         }
+                        GeneratedField::WriterOptions => {
+                            if writer_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("writerOptions"));
+                            }
+                            writer_options__ = map_.next_value()?;
+                        }
                     }
                 }
                 Ok(JsonSink {
                     config: config__,
+                    writer_options: writer_options__,
                 })
             }
         }
@@ -15387,37 +16072,397 @@ impl<'de> serde::Deserialize<'de> for OwnedTableReference {
                         }
                     }
                 }
-                Ok(OwnedTableReference {
-                    table_reference_enum: table_reference_enum__,
-                })
+                Ok(OwnedTableReference {
+                    table_reference_enum: table_reference_enum__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.OwnedTableReference", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for ParquetFormat {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.options.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ParquetFormat", len)?;
+        if let Some(v) = self.options.as_ref() {
+            struct_ser.serialize_field("options", v)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ParquetFormat {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "options",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Options,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "options" => Ok(GeneratedField::Options),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ParquetFormat;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ParquetFormat")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetFormat, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut options__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Options => {
+                            if options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("options"));
+                            }
+                            options__ = map_.next_value()?;
+                        }
+                    }
+                }
+                Ok(ParquetFormat {
+                    options: options__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ParquetFormat", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for ParquetOptions {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.enable_page_index {
+            len += 1;
+        }
+        if self.pruning {
+            len += 1;
+        }
+        if self.skip_metadata {
+            len += 1;
+        }
+        if self.pushdown_filters {
+            len += 1;
+        }
+        if self.reorder_filters {
+            len += 1;
+        }
+        if self.data_pagesize_limit != 0 {
+            len += 1;
+        }
+        if self.write_batch_size != 0 {
+            len += 1;
+        }
+        if !self.writer_version.is_empty() {
+            len += 1;
+        }
+        if self.bloom_filter_enabled {
+            len += 1;
+        }
+        if self.allow_single_file_parallelism {
+            len += 1;
+        }
+        if self.maximum_parallel_row_group_writers != 0 {
+            len += 1;
+        }
+        if self.maximum_buffered_record_batches_per_stream != 0 {
+            len += 1;
+        }
+        if self.dictionary_page_size_limit != 0 {
+            len += 1;
+        }
+        if self.data_page_row_count_limit != 0 {
+            len += 1;
+        }
+        if self.max_row_group_size != 0 {
+            len += 1;
+        }
+        if !self.created_by.is_empty() {
+            len += 1;
+        }
+        if self.metadata_size_hint_opt.is_some() {
+            len += 1;
+        }
+        if self.compression_opt.is_some() {
+            len += 1;
+        }
+        if self.dictionary_enabled_opt.is_some() {
+            len += 1;
+        }
+        if self.statistics_enabled_opt.is_some() {
+            len += 1;
+        }
+        if self.max_statistics_size_opt.is_some() {
+            len += 1;
+        }
+        if self.column_index_truncate_length_opt.is_some() {
+            len += 1;
+        }
+        if self.encoding_opt.is_some() {
+            len += 1;
+        }
+        if self.bloom_filter_fpp_opt.is_some() {
+            len += 1;
+        }
+        if self.bloom_filter_ndv_opt.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ParquetOptions", len)?;
+        if self.enable_page_index {
+            struct_ser.serialize_field("enablePageIndex", &self.enable_page_index)?;
+        }
+        if self.pruning {
+            struct_ser.serialize_field("pruning", &self.pruning)?;
+        }
+        if self.skip_metadata {
+            struct_ser.serialize_field("skipMetadata", &self.skip_metadata)?;
+        }
+        if self.pushdown_filters {
+            struct_ser.serialize_field("pushdownFilters", &self.pushdown_filters)?;
+        }
+        if self.reorder_filters {
+            struct_ser.serialize_field("reorderFilters", &self.reorder_filters)?;
+        }
+        if self.data_pagesize_limit != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("dataPagesizeLimit", ToString::to_string(&self.data_pagesize_limit).as_str())?;
+        }
+        if self.write_batch_size != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("writeBatchSize", ToString::to_string(&self.write_batch_size).as_str())?;
+        }
+        if !self.writer_version.is_empty() {
+            struct_ser.serialize_field("writerVersion", &self.writer_version)?;
+        }
+        if self.bloom_filter_enabled {
+            struct_ser.serialize_field("bloomFilterEnabled", &self.bloom_filter_enabled)?;
+        }
+        if self.allow_single_file_parallelism {
+            struct_ser.serialize_field("allowSingleFileParallelism", &self.allow_single_file_parallelism)?;
+        }
+        if self.maximum_parallel_row_group_writers != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("maximumParallelRowGroupWriters", ToString::to_string(&self.maximum_parallel_row_group_writers).as_str())?;
+        }
+        if self.maximum_buffered_record_batches_per_stream != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("maximumBufferedRecordBatchesPerStream", ToString::to_string(&self.maximum_buffered_record_batches_per_stream).as_str())?;
+        }
+        if self.dictionary_page_size_limit != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("dictionaryPageSizeLimit", ToString::to_string(&self.dictionary_page_size_limit).as_str())?;
+        }
+        if self.data_page_row_count_limit != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("dataPageRowCountLimit", ToString::to_string(&self.data_page_row_count_limit).as_str())?;
+        }
+        if self.max_row_group_size != 0 {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("maxRowGroupSize", ToString::to_string(&self.max_row_group_size).as_str())?;
+        }
+        if !self.created_by.is_empty() {
+            struct_ser.serialize_field("createdBy", &self.created_by)?;
+        }
+        if let Some(v) = self.metadata_size_hint_opt.as_ref() {
+            match v {
+                parquet_options::MetadataSizeHintOpt::MetadataSizeHint(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("metadataSizeHint", ToString::to_string(&v).as_str())?;
+                }
+            }
+        }
+        if let Some(v) = self.compression_opt.as_ref() {
+            match v {
+                parquet_options::CompressionOpt::Compression(v) => {
+                    struct_ser.serialize_field("compression", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.dictionary_enabled_opt.as_ref() {
+            match v {
+                parquet_options::DictionaryEnabledOpt::DictionaryEnabled(v) => {
+                    struct_ser.serialize_field("dictionaryEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.statistics_enabled_opt.as_ref() {
+            match v {
+                parquet_options::StatisticsEnabledOpt::StatisticsEnabled(v) => {
+                    struct_ser.serialize_field("statisticsEnabled", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.max_statistics_size_opt.as_ref() {
+            match v {
+                parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("maxStatisticsSize", ToString::to_string(&v).as_str())?;
+                }
+            }
+        }
+        if let Some(v) = self.column_index_truncate_length_opt.as_ref() {
+            match v {
+                parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("columnIndexTruncateLength", ToString::to_string(&v).as_str())?;
+                }
+            }
+        }
+        if let Some(v) = self.encoding_opt.as_ref() {
+            match v {
+                parquet_options::EncodingOpt::Encoding(v) => {
+                    struct_ser.serialize_field("encoding", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.bloom_filter_fpp_opt.as_ref() {
+            match v {
+                parquet_options::BloomFilterFppOpt::BloomFilterFpp(v) => {
+                    struct_ser.serialize_field("bloomFilterFpp", v)?;
+                }
+            }
+        }
+        if let Some(v) = self.bloom_filter_ndv_opt.as_ref() {
+            match v {
+                parquet_options::BloomFilterNdvOpt::BloomFilterNdv(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("bloomFilterNdv", ToString::to_string(&v).as_str())?;
+                }
             }
         }
-        deserializer.deserialize_struct("datafusion.OwnedTableReference", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for ParquetFormat {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let len = 0;
-        let struct_ser = serializer.serialize_struct("datafusion.ParquetFormat", len)?;
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for ParquetFormat {
+impl<'de> serde::Deserialize<'de> for ParquetOptions {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
+            "enable_page_index",
+            "enablePageIndex",
+            "pruning",
+            "skip_metadata",
+            "skipMetadata",
+            "pushdown_filters",
+            "pushdownFilters",
+            "reorder_filters",
+            "reorderFilters",
+            "data_pagesize_limit",
+            "dataPagesizeLimit",
+            "write_batch_size",
+            "writeBatchSize",
+            "writer_version",
+            "writerVersion",
+            "bloom_filter_enabled",
+            "bloomFilterEnabled",
+            "allow_single_file_parallelism",
+            "allowSingleFileParallelism",
+            "maximum_parallel_row_group_writers",
+            "maximumParallelRowGroupWriters",
+            "maximum_buffered_record_batches_per_stream",
+            "maximumBufferedRecordBatchesPerStream",
+            "dictionary_page_size_limit",
+            "dictionaryPageSizeLimit",
+            "data_page_row_count_limit",
+            "dataPageRowCountLimit",
+            "max_row_group_size",
+            "maxRowGroupSize",
+            "created_by",
+            "createdBy",
+            "metadata_size_hint",
+            "metadataSizeHint",
+            "compression",
+            "dictionary_enabled",
+            "dictionaryEnabled",
+            "statistics_enabled",
+            "statisticsEnabled",
+            "max_statistics_size",
+            "maxStatisticsSize",
+            "column_index_truncate_length",
+            "columnIndexTruncateLength",
+            "encoding",
+            "bloom_filter_fpp",
+            "bloomFilterFpp",
+            "bloom_filter_ndv",
+            "bloomFilterNdv",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
+            EnablePageIndex,
+            Pruning,
+            SkipMetadata,
+            PushdownFilters,
+            ReorderFilters,
+            DataPagesizeLimit,
+            WriteBatchSize,
+            WriterVersion,
+            BloomFilterEnabled,
+            AllowSingleFileParallelism,
+            MaximumParallelRowGroupWriters,
+            MaximumBufferedRecordBatchesPerStream,
+            DictionaryPageSizeLimit,
+            DataPageRowCountLimit,
+            MaxRowGroupSize,
+            CreatedBy,
+            MetadataSizeHint,
+            Compression,
+            DictionaryEnabled,
+            StatisticsEnabled,
+            MaxStatisticsSize,
+            ColumnIndexTruncateLength,
+            Encoding,
+            BloomFilterFpp,
+            BloomFilterNdv,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -15438,7 +16483,34 @@ impl<'de> serde::Deserialize<'de> for ParquetFormat {
                     where
                         E: serde::de::Error,
                     {
-                            Err(serde::de::Error::unknown_field(value, FIELDS))
+                        match value {
+                            "enablePageIndex" | "enable_page_index" => Ok(GeneratedField::EnablePageIndex),
+                            "pruning" => Ok(GeneratedField::Pruning),
+                            "skipMetadata" | "skip_metadata" => Ok(GeneratedField::SkipMetadata),
+                            "pushdownFilters" | "pushdown_filters" => Ok(GeneratedField::PushdownFilters),
+                            "reorderFilters" | "reorder_filters" => Ok(GeneratedField::ReorderFilters),
+                            "dataPagesizeLimit" | "data_pagesize_limit" => Ok(GeneratedField::DataPagesizeLimit),
+                            "writeBatchSize" | "write_batch_size" => Ok(GeneratedField::WriteBatchSize),
+                            "writerVersion" | "writer_version" => Ok(GeneratedField::WriterVersion),
+                            "bloomFilterEnabled" | "bloom_filter_enabled" => Ok(GeneratedField::BloomFilterEnabled),
+                            "allowSingleFileParallelism" | "allow_single_file_parallelism" => Ok(GeneratedField::AllowSingleFileParallelism),
+                            "maximumParallelRowGroupWriters" | "maximum_parallel_row_group_writers" => Ok(GeneratedField::MaximumParallelRowGroupWriters),
+                            "maximumBufferedRecordBatchesPerStream" | "maximum_buffered_record_batches_per_stream" => Ok(GeneratedField::MaximumBufferedRecordBatchesPerStream),
+                            "dictionaryPageSizeLimit" | "dictionary_page_size_limit" => Ok(GeneratedField::DictionaryPageSizeLimit),
+                            "dataPageRowCountLimit" | "data_page_row_count_limit" => Ok(GeneratedField::DataPageRowCountLimit),
+                            "maxRowGroupSize" | "max_row_group_size" => Ok(GeneratedField::MaxRowGroupSize),
+                            "createdBy" | "created_by" => Ok(GeneratedField::CreatedBy),
+                            "metadataSizeHint" | "metadata_size_hint" => Ok(GeneratedField::MetadataSizeHint),
+                            "compression" => Ok(GeneratedField::Compression),
+                            "dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled),
+                            "statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
+                            "maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
+                            "columnIndexTruncateLength" | "column_index_truncate_length" => Ok(GeneratedField::ColumnIndexTruncateLength),
+                            "encoding" => Ok(GeneratedField::Encoding),
+                            "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
+                            "bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
                     }
                 }
                 deserializer.deserialize_identifier(GeneratedVisitor)
@@ -15446,24 +16518,239 @@ impl<'de> serde::Deserialize<'de> for ParquetFormat {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ParquetFormat;
+            type Value = ParquetOptions;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.ParquetFormat")
+                formatter.write_str("struct datafusion.ParquetOptions")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetFormat, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetOptions, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                while map_.next_key::<GeneratedField>()?.is_some() {
-                    let _ = map_.next_value::<serde::de::IgnoredAny>()?;
+                let mut enable_page_index__ = None;
+                let mut pruning__ = None;
+                let mut skip_metadata__ = None;
+                let mut pushdown_filters__ = None;
+                let mut reorder_filters__ = None;
+                let mut data_pagesize_limit__ = None;
+                let mut write_batch_size__ = None;
+                let mut writer_version__ = None;
+                let mut bloom_filter_enabled__ = None;
+                let mut allow_single_file_parallelism__ = None;
+                let mut maximum_parallel_row_group_writers__ = None;
+                let mut maximum_buffered_record_batches_per_stream__ = None;
+                let mut dictionary_page_size_limit__ = None;
+                let mut data_page_row_count_limit__ = None;
+                let mut max_row_group_size__ = None;
+                let mut created_by__ = None;
+                let mut metadata_size_hint_opt__ = None;
+                let mut compression_opt__ = None;
+                let mut dictionary_enabled_opt__ = None;
+                let mut statistics_enabled_opt__ = None;
+                let mut max_statistics_size_opt__ = None;
+                let mut column_index_truncate_length_opt__ = None;
+                let mut encoding_opt__ = None;
+                let mut bloom_filter_fpp_opt__ = None;
+                let mut bloom_filter_ndv_opt__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::EnablePageIndex => {
+                            if enable_page_index__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("enablePageIndex"));
+                            }
+                            enable_page_index__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Pruning => {
+                            if pruning__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("pruning"));
+                            }
+                            pruning__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::SkipMetadata => {
+                            if skip_metadata__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("skipMetadata"));
+                            }
+                            skip_metadata__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::PushdownFilters => {
+                            if pushdown_filters__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("pushdownFilters"));
+                            }
+                            pushdown_filters__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::ReorderFilters => {
+                            if reorder_filters__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("reorderFilters"));
+                            }
+                            reorder_filters__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::DataPagesizeLimit => {
+                            if data_pagesize_limit__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dataPagesizeLimit"));
+                            }
+                            data_pagesize_limit__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::WriteBatchSize => {
+                            if write_batch_size__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("writeBatchSize"));
+                            }
+                            write_batch_size__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::WriterVersion => {
+                            if writer_version__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("writerVersion"));
+                            }
+                            writer_version__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::BloomFilterEnabled => {
+                            if bloom_filter_enabled__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterEnabled"));
+                            }
+                            bloom_filter_enabled__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::AllowSingleFileParallelism => {
+                            if allow_single_file_parallelism__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("allowSingleFileParallelism"));
+                            }
+                            allow_single_file_parallelism__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::MaximumParallelRowGroupWriters => {
+                            if maximum_parallel_row_group_writers__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("maximumParallelRowGroupWriters"));
+                            }
+                            maximum_parallel_row_group_writers__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::MaximumBufferedRecordBatchesPerStream => {
+                            if maximum_buffered_record_batches_per_stream__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("maximumBufferedRecordBatchesPerStream"));
+                            }
+                            maximum_buffered_record_batches_per_stream__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::DictionaryPageSizeLimit => {
+                            if dictionary_page_size_limit__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dictionaryPageSizeLimit"));
+                            }
+                            dictionary_page_size_limit__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::DataPageRowCountLimit => {
+                            if data_page_row_count_limit__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dataPageRowCountLimit"));
+                            }
+                            data_page_row_count_limit__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::MaxRowGroupSize => {
+                            if max_row_group_size__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("maxRowGroupSize"));
+                            }
+                            max_row_group_size__ = 
+                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
+                            ;
+                        }
+                        GeneratedField::CreatedBy => {
+                            if created_by__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("createdBy"));
+                            }
+                            created_by__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::MetadataSizeHint => {
+                            if metadata_size_hint_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("metadataSizeHint"));
+                            }
+                            metadata_size_hint_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::MetadataSizeHintOpt::MetadataSizeHint(x.0));
+                        }
+                        GeneratedField::Compression => {
+                            if compression_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("compression"));
+                            }
+                            compression_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::CompressionOpt::Compression);
+                        }
+                        GeneratedField::DictionaryEnabled => {
+                            if dictionary_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("dictionaryEnabled"));
+                            }
+                            dictionary_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::DictionaryEnabledOpt::DictionaryEnabled);
+                        }
+                        GeneratedField::StatisticsEnabled => {
+                            if statistics_enabled_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("statisticsEnabled"));
+                            }
+                            statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::StatisticsEnabledOpt::StatisticsEnabled);
+                        }
+                        GeneratedField::MaxStatisticsSize => {
+                            if max_statistics_size_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
+                            }
+                            max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
+                        }
+                        GeneratedField::ColumnIndexTruncateLength => {
+                            if column_index_truncate_length_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("columnIndexTruncateLength"));
+                            }
+                            column_index_truncate_length_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(x.0));
+                        }
+                        GeneratedField::Encoding => {
+                            if encoding_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("encoding"));
+                            }
+                            encoding_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::EncodingOpt::Encoding);
+                        }
+                        GeneratedField::BloomFilterFpp => {
+                            if bloom_filter_fpp_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterFpp"));
+                            }
+                            bloom_filter_fpp_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::BloomFilterFppOpt::BloomFilterFpp(x.0));
+                        }
+                        GeneratedField::BloomFilterNdv => {
+                            if bloom_filter_ndv_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("bloomFilterNdv"));
+                            }
+                            bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
+                        }
+                    }
                 }
-                Ok(ParquetFormat {
+                Ok(ParquetOptions {
+                    enable_page_index: enable_page_index__.unwrap_or_default(),
+                    pruning: pruning__.unwrap_or_default(),
+                    skip_metadata: skip_metadata__.unwrap_or_default(),
+                    pushdown_filters: pushdown_filters__.unwrap_or_default(),
+                    reorder_filters: reorder_filters__.unwrap_or_default(),
+                    data_pagesize_limit: data_pagesize_limit__.unwrap_or_default(),
+                    write_batch_size: write_batch_size__.unwrap_or_default(),
+                    writer_version: writer_version__.unwrap_or_default(),
+                    bloom_filter_enabled: bloom_filter_enabled__.unwrap_or_default(),
+                    allow_single_file_parallelism: allow_single_file_parallelism__.unwrap_or_default(),
+                    maximum_parallel_row_group_writers: maximum_parallel_row_group_writers__.unwrap_or_default(),
+                    maximum_buffered_record_batches_per_stream: maximum_buffered_record_batches_per_stream__.unwrap_or_default(),
+                    dictionary_page_size_limit: dictionary_page_size_limit__.unwrap_or_default(),
+                    data_page_row_count_limit: data_page_row_count_limit__.unwrap_or_default(),
+                    max_row_group_size: max_row_group_size__.unwrap_or_default(),
+                    created_by: created_by__.unwrap_or_default(),
+                    metadata_size_hint_opt: metadata_size_hint_opt__,
+                    compression_opt: compression_opt__,
+                    dictionary_enabled_opt: dictionary_enabled_opt__,
+                    statistics_enabled_opt: statistics_enabled_opt__,
+                    max_statistics_size_opt: max_statistics_size_opt__,
+                    column_index_truncate_length_opt: column_index_truncate_length_opt__,
+                    encoding_opt: encoding_opt__,
+                    bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
+                    bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.ParquetFormat", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.ParquetOptions", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for ParquetScanExecNode {
@@ -15586,10 +16873,16 @@ impl serde::Serialize for ParquetSink {
         if self.config.is_some() {
             len += 1;
         }
+        if self.parquet_options.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.ParquetSink", len)?;
         if let Some(v) = self.config.as_ref() {
             struct_ser.serialize_field("config", v)?;
         }
+        if let Some(v) = self.parquet_options.as_ref() {
+            struct_ser.serialize_field("parquetOptions", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -15601,11 +16894,14 @@ impl<'de> serde::Deserialize<'de> for ParquetSink {
     {
         const FIELDS: &[&str] = &[
             "config",
+            "parquet_options",
+            "parquetOptions",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Config,
+            ParquetOptions,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -15628,6 +16924,7 @@ impl<'de> serde::Deserialize<'de> for ParquetSink {
                     {
                         match value {
                             "config" => Ok(GeneratedField::Config),
+                            "parquetOptions" | "parquet_options" => Ok(GeneratedField::ParquetOptions),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -15648,6 +16945,7 @@ impl<'de> serde::Deserialize<'de> for ParquetSink {
                     V: serde::de::MapAccess<'de>,
             {
                 let mut config__ = None;
+                let mut parquet_options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Config => {
@@ -15656,10 +16954,17 @@ impl<'de> serde::Deserialize<'de> for ParquetSink {
                             }
                             config__ = map_.next_value()?;
                         }
+                        GeneratedField::ParquetOptions => {
+                            if parquet_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("parquetOptions"));
+                            }
+                            parquet_options__ = map_.next_value()?;
+                        }
                     }
                 }
                 Ok(ParquetSink {
                     config: config__,
+                    parquet_options: parquet_options__,
                 })
             }
         }
@@ -15787,119 +17092,27 @@ impl<'de> serde::Deserialize<'de> for ParquetSinkExecNode {
                         }
                         GeneratedField::SinkSchema => {
                             if sink_schema__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("sinkSchema"));
-                            }
-                            sink_schema__ = map_.next_value()?;
-                        }
-                        GeneratedField::SortOrder => {
-                            if sort_order__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("sortOrder"));
-                            }
-                            sort_order__ = map_.next_value()?;
-                        }
-                    }
-                }
-                Ok(ParquetSinkExecNode {
-                    input: input__,
-                    sink: sink__,
-                    sink_schema: sink_schema__,
-                    sort_order: sort_order__,
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion.ParquetSinkExecNode", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for ParquetWriterOptions {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if self.writer_properties.is_some() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion.ParquetWriterOptions", len)?;
-        if let Some(v) = self.writer_properties.as_ref() {
-            struct_ser.serialize_field("writerProperties", v)?;
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for ParquetWriterOptions {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "writer_properties",
-            "writerProperties",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            WriterProperties,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "writerProperties" | "writer_properties" => Ok(GeneratedField::WriterProperties),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = ParquetWriterOptions;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.ParquetWriterOptions")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ParquetWriterOptions, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut writer_properties__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::WriterProperties => {
-                            if writer_properties__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("writerProperties"));
+                                return Err(serde::de::Error::duplicate_field("sinkSchema"));
+                            }
+                            sink_schema__ = map_.next_value()?;
+                        }
+                        GeneratedField::SortOrder => {
+                            if sort_order__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("sortOrder"));
                             }
-                            writer_properties__ = map_.next_value()?;
+                            sort_order__ = map_.next_value()?;
                         }
                     }
                 }
-                Ok(ParquetWriterOptions {
-                    writer_properties: writer_properties__,
+                Ok(ParquetSinkExecNode {
+                    input: input__,
+                    sink: sink__,
+                    sink_schema: sink_schema__,
+                    sort_order: sort_order__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.ParquetWriterOptions", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.ParquetSinkExecNode", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for PartialTableReference {
@@ -21414,262 +22627,44 @@ impl<'de> serde::Deserialize<'de> for RepartitionExecNode {
                 let mut input__ = None;
                 let mut partition_method__ = None;
                 while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::Input => {
-                            if input__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("input"));
-                            }
-                            input__ = map_.next_value()?;
-                        }
-                        GeneratedField::RoundRobin => {
-                            if partition_method__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("roundRobin"));
-                            }
-                            partition_method__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| repartition_exec_node::PartitionMethod::RoundRobin(x.0));
-                        }
-                        GeneratedField::Hash => {
-                            if partition_method__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("hash"));
-                            }
-                            partition_method__ = map_.next_value::<::std::option::Option<_>>()?.map(repartition_exec_node::PartitionMethod::Hash)
-;
-                        }
-                        GeneratedField::Unknown => {
-                            if partition_method__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("unknown"));
-                            }
-                            partition_method__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| repartition_exec_node::PartitionMethod::Unknown(x.0));
-                        }
-                    }
-                }
-                Ok(RepartitionExecNode {
-                    input: input__,
-                    partition_method: partition_method__,
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion.RepartitionExecNode", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for RepartitionNode {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if self.input.is_some() {
-            len += 1;
-        }
-        if self.partition_method.is_some() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion.RepartitionNode", len)?;
-        if let Some(v) = self.input.as_ref() {
-            struct_ser.serialize_field("input", v)?;
-        }
-        if let Some(v) = self.partition_method.as_ref() {
-            match v {
-                repartition_node::PartitionMethod::RoundRobin(v) => {
-                    #[allow(clippy::needless_borrow)]
-                    struct_ser.serialize_field("roundRobin", ToString::to_string(&v).as_str())?;
-                }
-                repartition_node::PartitionMethod::Hash(v) => {
-                    struct_ser.serialize_field("hash", v)?;
-                }
-            }
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for RepartitionNode {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "input",
-            "round_robin",
-            "roundRobin",
-            "hash",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            Input,
-            RoundRobin,
-            Hash,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "input" => Ok(GeneratedField::Input),
-                            "roundRobin" | "round_robin" => Ok(GeneratedField::RoundRobin),
-                            "hash" => Ok(GeneratedField::Hash),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = RepartitionNode;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.RepartitionNode")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<RepartitionNode, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut input__ = None;
-                let mut partition_method__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::Input => {
-                            if input__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("input"));
-                            }
-                            input__ = map_.next_value()?;
-                        }
-                        GeneratedField::RoundRobin => {
-                            if partition_method__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("roundRobin"));
-                            }
-                            partition_method__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| repartition_node::PartitionMethod::RoundRobin(x.0));
-                        }
-                        GeneratedField::Hash => {
-                            if partition_method__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("hash"));
-                            }
-                            partition_method__ = map_.next_value::<::std::option::Option<_>>()?.map(repartition_node::PartitionMethod::Hash)
-;
-                        }
-                    }
-                }
-                Ok(RepartitionNode {
-                    input: input__,
-                    partition_method: partition_method__,
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion.RepartitionNode", FIELDS, GeneratedVisitor)
-    }
-}
-impl serde::Serialize for RollupNode {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if !self.expr.is_empty() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion.RollupNode", len)?;
-        if !self.expr.is_empty() {
-            struct_ser.serialize_field("expr", &self.expr)?;
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for RollupNode {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "expr",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            Expr,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "expr" => Ok(GeneratedField::Expr),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = RollupNode;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.RollupNode")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<RollupNode, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut expr__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::Expr => {
-                            if expr__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("expr"));
+                    match k {
+                        GeneratedField::Input => {
+                            if input__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("input"));
                             }
-                            expr__ = Some(map_.next_value()?);
+                            input__ = map_.next_value()?;
+                        }
+                        GeneratedField::RoundRobin => {
+                            if partition_method__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("roundRobin"));
+                            }
+                            partition_method__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| repartition_exec_node::PartitionMethod::RoundRobin(x.0));
+                        }
+                        GeneratedField::Hash => {
+                            if partition_method__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("hash"));
+                            }
+                            partition_method__ = map_.next_value::<::std::option::Option<_>>()?.map(repartition_exec_node::PartitionMethod::Hash)
+;
+                        }
+                        GeneratedField::Unknown => {
+                            if partition_method__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("unknown"));
+                            }
+                            partition_method__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| repartition_exec_node::PartitionMethod::Unknown(x.0));
                         }
                     }
                 }
-                Ok(RollupNode {
-                    expr: expr__.unwrap_or_default(),
+                Ok(RepartitionExecNode {
+                    input: input__,
+                    partition_method: partition_method__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.RollupNode", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.RepartitionExecNode", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for SqlOption {
+impl serde::Serialize for RepartitionNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -21677,37 +22672,48 @@ impl serde::Serialize for SqlOption {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if !self.key.is_empty() {
+        if self.input.is_some() {
             len += 1;
         }
-        if !self.value.is_empty() {
+        if self.partition_method.is_some() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion.SQLOption", len)?;
-        if !self.key.is_empty() {
-            struct_ser.serialize_field("key", &self.key)?;
+        let mut struct_ser = serializer.serialize_struct("datafusion.RepartitionNode", len)?;
+        if let Some(v) = self.input.as_ref() {
+            struct_ser.serialize_field("input", v)?;
         }
-        if !self.value.is_empty() {
-            struct_ser.serialize_field("value", &self.value)?;
+        if let Some(v) = self.partition_method.as_ref() {
+            match v {
+                repartition_node::PartitionMethod::RoundRobin(v) => {
+                    #[allow(clippy::needless_borrow)]
+                    struct_ser.serialize_field("roundRobin", ToString::to_string(&v).as_str())?;
+                }
+                repartition_node::PartitionMethod::Hash(v) => {
+                    struct_ser.serialize_field("hash", v)?;
+                }
+            }
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for SqlOption {
+impl<'de> serde::Deserialize<'de> for RepartitionNode {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "key",
-            "value",
+            "input",
+            "round_robin",
+            "roundRobin",
+            "hash",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            Key,
-            Value,
+            Input,
+            RoundRobin,
+            Hash,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -21729,8 +22735,9 @@ impl<'de> serde::Deserialize<'de> for SqlOption {
                         E: serde::de::Error,
                     {
                         match value {
-                            "key" => Ok(GeneratedField::Key),
-                            "value" => Ok(GeneratedField::Value),
+                            "input" => Ok(GeneratedField::Input),
+                            "roundRobin" | "round_robin" => Ok(GeneratedField::RoundRobin),
+                            "hash" => Ok(GeneratedField::Hash),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -21740,44 +22747,51 @@ impl<'de> serde::Deserialize<'de> for SqlOption {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = SqlOption;
+            type Value = RepartitionNode;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.SQLOption")
+                formatter.write_str("struct datafusion.RepartitionNode")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<SqlOption, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<RepartitionNode, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut key__ = None;
-                let mut value__ = None;
+                let mut input__ = None;
+                let mut partition_method__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::Key => {
-                            if key__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("key"));
+                        GeneratedField::Input => {
+                            if input__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("input"));
                             }
-                            key__ = Some(map_.next_value()?);
+                            input__ = map_.next_value()?;
                         }
-                        GeneratedField::Value => {
-                            if value__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("value"));
+                        GeneratedField::RoundRobin => {
+                            if partition_method__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("roundRobin"));
+                            }
+                            partition_method__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| repartition_node::PartitionMethod::RoundRobin(x.0));
+                        }
+                        GeneratedField::Hash => {
+                            if partition_method__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("hash"));
                             }
-                            value__ = Some(map_.next_value()?);
+                            partition_method__ = map_.next_value::<::std::option::Option<_>>()?.map(repartition_node::PartitionMethod::Hash)
+;
                         }
                     }
                 }
-                Ok(SqlOption {
-                    key: key__.unwrap_or_default(),
-                    value: value__.unwrap_or_default(),
+                Ok(RepartitionNode {
+                    input: input__,
+                    partition_method: partition_method__,
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.SQLOption", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.RepartitionNode", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for SqlOptions {
+impl serde::Serialize for RollupNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
     where
@@ -21785,29 +22799,29 @@ impl serde::Serialize for SqlOptions {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if !self.option.is_empty() {
+        if !self.expr.is_empty() {
             len += 1;
         }
-        let mut struct_ser = serializer.serialize_struct("datafusion.SQLOptions", len)?;
-        if !self.option.is_empty() {
-            struct_ser.serialize_field("option", &self.option)?;
+        let mut struct_ser = serializer.serialize_struct("datafusion.RollupNode", len)?;
+        if !self.expr.is_empty() {
+            struct_ser.serialize_field("expr", &self.expr)?;
         }
         struct_ser.end()
     }
 }
-impl<'de> serde::Deserialize<'de> for SqlOptions {
+impl<'de> serde::Deserialize<'de> for RollupNode {
     #[allow(deprecated)]
     fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
     where
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "option",
+            "expr",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            Option,
+            Expr,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -21829,7 +22843,7 @@ impl<'de> serde::Deserialize<'de> for SqlOptions {
                         E: serde::de::Error,
                     {
                         match value {
-                            "option" => Ok(GeneratedField::Option),
+                            "expr" => Ok(GeneratedField::Expr),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -21839,33 +22853,33 @@ impl<'de> serde::Deserialize<'de> for SqlOptions {
         }
         struct GeneratedVisitor;
         impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = SqlOptions;
+            type Value = RollupNode;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.SQLOptions")
+                formatter.write_str("struct datafusion.RollupNode")
             }
 
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<SqlOptions, V::Error>
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<RollupNode, V::Error>
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut option__ = None;
+                let mut expr__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::Option => {
-                            if option__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("option"));
+                        GeneratedField::Expr => {
+                            if expr__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("expr"));
                             }
-                            option__ = Some(map_.next_value()?);
+                            expr__ = Some(map_.next_value()?);
                         }
                     }
                 }
-                Ok(SqlOptions {
-                    option: option__.unwrap_or_default(),
+                Ok(RollupNode {
+                    expr: expr__.unwrap_or_default(),
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.SQLOptions", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.RollupNode", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for ScalarDictionaryValue {
@@ -22113,9 +23127,7 @@ impl serde::Serialize for ScalarFunction {
             Self::Signum => "Signum",
             Self::Sin => "Sin",
             Self::Sqrt => "Sqrt",
-            Self::Tan => "Tan",
             Self::Trunc => "Trunc",
-            Self::Array => "Array",
             Self::BitLength => "BitLength",
             Self::Btrim => "Btrim",
             Self::CharacterLength => "CharacterLength",
@@ -22145,14 +23157,16 @@ impl serde::Serialize for ScalarFunction {
             Self::Strpos => "Strpos",
             Self::Substr => "Substr",
             Self::ToHex => "ToHex",
+            Self::Now => "Now",
             Self::Translate => "Translate",
             Self::Trim => "Trim",
             Self::Upper => "Upper",
             Self::Coalesce => "Coalesce",
             Self::Power => "Power",
-            Self::StructFun => "StructFun",
+            Self::FromUnixtime => "FromUnixtime",
             Self::Atan2 => "Atan2",
-            Self::ArrowTypeof => "ArrowTypeof",
+            Self::CurrentDate => "CurrentDate",
+            Self::CurrentTime => "CurrentTime",
             Self::Uuid => "Uuid",
             Self::Cbrt => "Cbrt",
             Self::Acosh => "Acosh",
@@ -22160,19 +23174,14 @@ impl serde::Serialize for ScalarFunction {
             Self::Atanh => "Atanh",
             Self::Sinh => "Sinh",
             Self::Cosh => "Cosh",
-            Self::Tanh => "Tanh",
             Self::Pi => "Pi",
             Self::Degrees => "Degrees",
             Self::Radians => "Radians",
             Self::Factorial => "Factorial",
             Self::Lcm => "Lcm",
             Self::Gcd => "Gcd",
-            Self::ArrayAppend => "ArrayAppend",
-            Self::ArrayConcat => "ArrayConcat",
-            Self::ArrayRepeat => "ArrayRepeat",
             Self::ArrayPosition => "ArrayPosition",
             Self::ArrayPositions => "ArrayPositions",
-            Self::ArrayPrepend => "ArrayPrepend",
             Self::ArrayRemove => "ArrayRemove",
             Self::ArrayReplace => "ArrayReplace",
             Self::ArrayElement => "ArrayElement",
@@ -22185,7 +23194,6 @@ impl serde::Serialize for ScalarFunction {
             Self::Nanvl => "Nanvl",
             Self::Iszero => "Iszero",
             Self::ArrayPopBack => "ArrayPopBack",
-            Self::StringToArray => "StringToArray",
             Self::ArrayIntersect => "ArrayIntersect",
             Self::ArrayUnion => "ArrayUnion",
             Self::OverLay => "OverLay",
@@ -22194,8 +23202,6 @@ impl serde::Serialize for ScalarFunction {
             Self::Levenshtein => "Levenshtein",
             Self::SubstrIndex => "SubstrIndex",
             Self::FindInSet => "FindInSet",
-            Self::ArraySort => "ArraySort",
-            Self::ArrayDistinct => "ArrayDistinct",
             Self::ArrayResize => "ArrayResize",
             Self::EndsWith => "EndsWith",
             Self::MakeDate => "MakeDate",
@@ -22228,9 +23234,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Signum",
             "Sin",
             "Sqrt",
-            "Tan",
             "Trunc",
-            "Array",
             "BitLength",
             "Btrim",
             "CharacterLength",
@@ -22260,14 +23264,16 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Strpos",
             "Substr",
             "ToHex",
+            "Now",
             "Translate",
             "Trim",
             "Upper",
             "Coalesce",
             "Power",
-            "StructFun",
+            "FromUnixtime",
             "Atan2",
-            "ArrowTypeof",
+            "CurrentDate",
+            "CurrentTime",
             "Uuid",
             "Cbrt",
             "Acosh",
@@ -22275,19 +23281,14 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Atanh",
             "Sinh",
             "Cosh",
-            "Tanh",
             "Pi",
             "Degrees",
             "Radians",
             "Factorial",
             "Lcm",
             "Gcd",
-            "ArrayAppend",
-            "ArrayConcat",
-            "ArrayRepeat",
             "ArrayPosition",
             "ArrayPositions",
-            "ArrayPrepend",
             "ArrayRemove",
             "ArrayReplace",
             "ArrayElement",
@@ -22300,7 +23301,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Nanvl",
             "Iszero",
             "ArrayPopBack",
-            "StringToArray",
             "ArrayIntersect",
             "ArrayUnion",
             "OverLay",
@@ -22309,8 +23309,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Levenshtein",
             "SubstrIndex",
             "FindInSet",
-            "ArraySort",
-            "ArrayDistinct",
             "ArrayResize",
             "EndsWith",
             "MakeDate",
@@ -22372,9 +23370,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Signum" => Ok(ScalarFunction::Signum),
                     "Sin" => Ok(ScalarFunction::Sin),
                     "Sqrt" => Ok(ScalarFunction::Sqrt),
-                    "Tan" => Ok(ScalarFunction::Tan),
                     "Trunc" => Ok(ScalarFunction::Trunc),
-                    "Array" => Ok(ScalarFunction::Array),
                     "BitLength" => Ok(ScalarFunction::BitLength),
                     "Btrim" => Ok(ScalarFunction::Btrim),
                     "CharacterLength" => Ok(ScalarFunction::CharacterLength),
@@ -22404,14 +23400,16 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Strpos" => Ok(ScalarFunction::Strpos),
                     "Substr" => Ok(ScalarFunction::Substr),
                     "ToHex" => Ok(ScalarFunction::ToHex),
+                    "Now" => Ok(ScalarFunction::Now),
                     "Translate" => Ok(ScalarFunction::Translate),
                     "Trim" => Ok(ScalarFunction::Trim),
                     "Upper" => Ok(ScalarFunction::Upper),
                     "Coalesce" => Ok(ScalarFunction::Coalesce),
                     "Power" => Ok(ScalarFunction::Power),
-                    "StructFun" => Ok(ScalarFunction::StructFun),
+                    "FromUnixtime" => Ok(ScalarFunction::FromUnixtime),
                     "Atan2" => Ok(ScalarFunction::Atan2),
-                    "ArrowTypeof" => Ok(ScalarFunction::ArrowTypeof),
+                    "CurrentDate" => Ok(ScalarFunction::CurrentDate),
+                    "CurrentTime" => Ok(ScalarFunction::CurrentTime),
                     "Uuid" => Ok(ScalarFunction::Uuid),
                     "Cbrt" => Ok(ScalarFunction::Cbrt),
                     "Acosh" => Ok(ScalarFunction::Acosh),
@@ -22419,19 +23417,14 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Atanh" => Ok(ScalarFunction::Atanh),
                     "Sinh" => Ok(ScalarFunction::Sinh),
                     "Cosh" => Ok(ScalarFunction::Cosh),
-                    "Tanh" => Ok(ScalarFunction::Tanh),
                     "Pi" => Ok(ScalarFunction::Pi),
                     "Degrees" => Ok(ScalarFunction::Degrees),
                     "Radians" => Ok(ScalarFunction::Radians),
                     "Factorial" => Ok(ScalarFunction::Factorial),
                     "Lcm" => Ok(ScalarFunction::Lcm),
                     "Gcd" => Ok(ScalarFunction::Gcd),
-                    "ArrayAppend" => Ok(ScalarFunction::ArrayAppend),
-                    "ArrayConcat" => Ok(ScalarFunction::ArrayConcat),
-                    "ArrayRepeat" => Ok(ScalarFunction::ArrayRepeat),
                     "ArrayPosition" => Ok(ScalarFunction::ArrayPosition),
                     "ArrayPositions" => Ok(ScalarFunction::ArrayPositions),
-                    "ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend),
                     "ArrayRemove" => Ok(ScalarFunction::ArrayRemove),
                     "ArrayReplace" => Ok(ScalarFunction::ArrayReplace),
                     "ArrayElement" => Ok(ScalarFunction::ArrayElement),
@@ -22444,7 +23437,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Nanvl" => Ok(ScalarFunction::Nanvl),
                     "Iszero" => Ok(ScalarFunction::Iszero),
                     "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack),
-                    "StringToArray" => Ok(ScalarFunction::StringToArray),
                     "ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect),
                     "ArrayUnion" => Ok(ScalarFunction::ArrayUnion),
                     "OverLay" => Ok(ScalarFunction::OverLay),
@@ -22453,8 +23445,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Levenshtein" => Ok(ScalarFunction::Levenshtein),
                     "SubstrIndex" => Ok(ScalarFunction::SubstrIndex),
                     "FindInSet" => Ok(ScalarFunction::FindInSet),
-                    "ArraySort" => Ok(ScalarFunction::ArraySort),
-                    "ArrayDistinct" => Ok(ScalarFunction::ArrayDistinct),
                     "ArrayResize" => Ok(ScalarFunction::ArrayResize),
                     "EndsWith" => Ok(ScalarFunction::EndsWith),
                     "MakeDate" => Ok(ScalarFunction::MakeDate),
@@ -25535,76 +26525,185 @@ impl<'de> serde::Deserialize<'de> for SymmetricHashJoinExecNode {
                 let mut right_sort_exprs__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::Left => {
-                            if left__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("left"));
-                            }
-                            left__ = map_.next_value()?;
-                        }
-                        GeneratedField::Right => {
-                            if right__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("right"));
-                            }
-                            right__ = map_.next_value()?;
-                        }
-                        GeneratedField::On => {
-                            if on__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("on"));
-                            }
-                            on__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::JoinType => {
-                            if join_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("joinType"));
-                            }
-                            join_type__ = Some(map_.next_value::<JoinType>()? as i32);
-                        }
-                        GeneratedField::PartitionMode => {
-                            if partition_mode__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("partitionMode"));
-                            }
-                            partition_mode__ = Some(map_.next_value::<StreamPartitionMode>()? as i32);
-                        }
-                        GeneratedField::NullEqualsNull => {
-                            if null_equals_null__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("nullEqualsNull"));
-                            }
-                            null_equals_null__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::Filter => {
-                            if filter__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("filter"));
+                        GeneratedField::Left => {
+                            if left__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("left"));
+                            }
+                            left__ = map_.next_value()?;
+                        }
+                        GeneratedField::Right => {
+                            if right__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("right"));
+                            }
+                            right__ = map_.next_value()?;
+                        }
+                        GeneratedField::On => {
+                            if on__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("on"));
+                            }
+                            on__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::JoinType => {
+                            if join_type__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("joinType"));
+                            }
+                            join_type__ = Some(map_.next_value::<JoinType>()? as i32);
+                        }
+                        GeneratedField::PartitionMode => {
+                            if partition_mode__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("partitionMode"));
+                            }
+                            partition_mode__ = Some(map_.next_value::<StreamPartitionMode>()? as i32);
+                        }
+                        GeneratedField::NullEqualsNull => {
+                            if null_equals_null__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullEqualsNull"));
+                            }
+                            null_equals_null__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Filter => {
+                            if filter__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("filter"));
+                            }
+                            filter__ = map_.next_value()?;
+                        }
+                        GeneratedField::LeftSortExprs => {
+                            if left_sort_exprs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("leftSortExprs"));
+                            }
+                            left_sort_exprs__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::RightSortExprs => {
+                            if right_sort_exprs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("rightSortExprs"));
+                            }
+                            right_sort_exprs__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(SymmetricHashJoinExecNode {
+                    left: left__,
+                    right: right__,
+                    on: on__.unwrap_or_default(),
+                    join_type: join_type__.unwrap_or_default(),
+                    partition_mode: partition_mode__.unwrap_or_default(),
+                    null_equals_null: null_equals_null__.unwrap_or_default(),
+                    filter: filter__,
+                    left_sort_exprs: left_sort_exprs__.unwrap_or_default(),
+                    right_sort_exprs: right_sort_exprs__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.SymmetricHashJoinExecNode", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for TableParquetOptions {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if self.global.is_some() {
+            len += 1;
+        }
+        if !self.column_specific_options.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.TableParquetOptions", len)?;
+        if let Some(v) = self.global.as_ref() {
+            struct_ser.serialize_field("global", v)?;
+        }
+        if !self.column_specific_options.is_empty() {
+            struct_ser.serialize_field("columnSpecificOptions", &self.column_specific_options)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for TableParquetOptions {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "global",
+            "column_specific_options",
+            "columnSpecificOptions",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Global,
+            ColumnSpecificOptions,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "global" => Ok(GeneratedField::Global),
+                            "columnSpecificOptions" | "column_specific_options" => Ok(GeneratedField::ColumnSpecificOptions),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = TableParquetOptions;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.TableParquetOptions")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<TableParquetOptions, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut global__ = None;
+                let mut column_specific_options__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Global => {
+                            if global__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("global"));
                             }
-                            filter__ = map_.next_value()?;
-                        }
-                        GeneratedField::LeftSortExprs => {
-                            if left_sort_exprs__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("leftSortExprs"));
-                            }
-                            left_sort_exprs__ = Some(map_.next_value()?);
+                            global__ = map_.next_value()?;
                         }
-                        GeneratedField::RightSortExprs => {
-                            if right_sort_exprs__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("rightSortExprs"));
+                        GeneratedField::ColumnSpecificOptions => {
+                            if column_specific_options__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("columnSpecificOptions"));
                             }
-                            right_sort_exprs__ = Some(map_.next_value()?);
+                            column_specific_options__ = Some(map_.next_value()?);
                         }
                     }
                 }
-                Ok(SymmetricHashJoinExecNode {
-                    left: left__,
-                    right: right__,
-                    on: on__.unwrap_or_default(),
-                    join_type: join_type__.unwrap_or_default(),
-                    partition_mode: partition_mode__.unwrap_or_default(),
-                    null_equals_null: null_equals_null__.unwrap_or_default(),
-                    filter: filter__,
-                    left_sort_exprs: left_sort_exprs__.unwrap_or_default(),
-                    right_sort_exprs: right_sort_exprs__.unwrap_or_default(),
+                Ok(TableParquetOptions {
+                    global: global__,
+                    column_specific_options: column_specific_options__.unwrap_or_default(),
                 })
             }
         }
-        deserializer.deserialize_struct("datafusion.SymmetricHashJoinExecNode", FIELDS, GeneratedVisitor)
+        deserializer.deserialize_struct("datafusion.TableParquetOptions", FIELDS, GeneratedVisitor)
     }
 }
 impl serde::Serialize for TimeUnit {
@@ -27836,218 +28935,3 @@ impl<'de> serde::Deserialize<'de> for WindowNode {
         deserializer.deserialize_struct("datafusion.WindowNode", FIELDS, GeneratedVisitor)
     }
 }
-impl serde::Serialize for WriterProperties {
-    #[allow(deprecated)]
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-        let mut len = 0;
-        if self.data_page_size_limit != 0 {
-            len += 1;
-        }
-        if self.dictionary_page_size_limit != 0 {
-            len += 1;
-        }
-        if self.data_page_row_count_limit != 0 {
-            len += 1;
-        }
-        if self.write_batch_size != 0 {
-            len += 1;
-        }
-        if self.max_row_group_size != 0 {
-            len += 1;
-        }
-        if !self.writer_version.is_empty() {
-            len += 1;
-        }
-        if !self.created_by.is_empty() {
-            len += 1;
-        }
-        let mut struct_ser = serializer.serialize_struct("datafusion.WriterProperties", len)?;
-        if self.data_page_size_limit != 0 {
-            #[allow(clippy::needless_borrow)]
-            struct_ser.serialize_field("dataPageSizeLimit", ToString::to_string(&self.data_page_size_limit).as_str())?;
-        }
-        if self.dictionary_page_size_limit != 0 {
-            #[allow(clippy::needless_borrow)]
-            struct_ser.serialize_field("dictionaryPageSizeLimit", ToString::to_string(&self.dictionary_page_size_limit).as_str())?;
-        }
-        if self.data_page_row_count_limit != 0 {
-            #[allow(clippy::needless_borrow)]
-            struct_ser.serialize_field("dataPageRowCountLimit", ToString::to_string(&self.data_page_row_count_limit).as_str())?;
-        }
-        if self.write_batch_size != 0 {
-            #[allow(clippy::needless_borrow)]
-            struct_ser.serialize_field("writeBatchSize", ToString::to_string(&self.write_batch_size).as_str())?;
-        }
-        if self.max_row_group_size != 0 {
-            #[allow(clippy::needless_borrow)]
-            struct_ser.serialize_field("maxRowGroupSize", ToString::to_string(&self.max_row_group_size).as_str())?;
-        }
-        if !self.writer_version.is_empty() {
-            struct_ser.serialize_field("writerVersion", &self.writer_version)?;
-        }
-        if !self.created_by.is_empty() {
-            struct_ser.serialize_field("createdBy", &self.created_by)?;
-        }
-        struct_ser.end()
-    }
-}
-impl<'de> serde::Deserialize<'de> for WriterProperties {
-    #[allow(deprecated)]
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        const FIELDS: &[&str] = &[
-            "data_page_size_limit",
-            "dataPageSizeLimit",
-            "dictionary_page_size_limit",
-            "dictionaryPageSizeLimit",
-            "data_page_row_count_limit",
-            "dataPageRowCountLimit",
-            "write_batch_size",
-            "writeBatchSize",
-            "max_row_group_size",
-            "maxRowGroupSize",
-            "writer_version",
-            "writerVersion",
-            "created_by",
-            "createdBy",
-        ];
-
-        #[allow(clippy::enum_variant_names)]
-        enum GeneratedField {
-            DataPageSizeLimit,
-            DictionaryPageSizeLimit,
-            DataPageRowCountLimit,
-            WriteBatchSize,
-            MaxRowGroupSize,
-            WriterVersion,
-            CreatedBy,
-        }
-        impl<'de> serde::Deserialize<'de> for GeneratedField {
-            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                struct GeneratedVisitor;
-
-                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-                    type Value = GeneratedField;
-
-                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                        write!(formatter, "expected one of: {:?}", &FIELDS)
-                    }
-
-                    #[allow(unused_variables)]
-                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
-                    where
-                        E: serde::de::Error,
-                    {
-                        match value {
-                            "dataPageSizeLimit" | "data_page_size_limit" => Ok(GeneratedField::DataPageSizeLimit),
-                            "dictionaryPageSizeLimit" | "dictionary_page_size_limit" => Ok(GeneratedField::DictionaryPageSizeLimit),
-                            "dataPageRowCountLimit" | "data_page_row_count_limit" => Ok(GeneratedField::DataPageRowCountLimit),
-                            "writeBatchSize" | "write_batch_size" => Ok(GeneratedField::WriteBatchSize),
-                            "maxRowGroupSize" | "max_row_group_size" => Ok(GeneratedField::MaxRowGroupSize),
-                            "writerVersion" | "writer_version" => Ok(GeneratedField::WriterVersion),
-                            "createdBy" | "created_by" => Ok(GeneratedField::CreatedBy),
-                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
-                        }
-                    }
-                }
-                deserializer.deserialize_identifier(GeneratedVisitor)
-            }
-        }
-        struct GeneratedVisitor;
-        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
-            type Value = WriterProperties;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                formatter.write_str("struct datafusion.WriterProperties")
-            }
-
-            fn visit_map<V>(self, mut map_: V) -> std::result::Result<WriterProperties, V::Error>
-                where
-                    V: serde::de::MapAccess<'de>,
-            {
-                let mut data_page_size_limit__ = None;
-                let mut dictionary_page_size_limit__ = None;
-                let mut data_page_row_count_limit__ = None;
-                let mut write_batch_size__ = None;
-                let mut max_row_group_size__ = None;
-                let mut writer_version__ = None;
-                let mut created_by__ = None;
-                while let Some(k) = map_.next_key()? {
-                    match k {
-                        GeneratedField::DataPageSizeLimit => {
-                            if data_page_size_limit__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("dataPageSizeLimit"));
-                            }
-                            data_page_size_limit__ = 
-                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
-                            ;
-                        }
-                        GeneratedField::DictionaryPageSizeLimit => {
-                            if dictionary_page_size_limit__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("dictionaryPageSizeLimit"));
-                            }
-                            dictionary_page_size_limit__ = 
-                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
-                            ;
-                        }
-                        GeneratedField::DataPageRowCountLimit => {
-                            if data_page_row_count_limit__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("dataPageRowCountLimit"));
-                            }
-                            data_page_row_count_limit__ = 
-                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
-                            ;
-                        }
-                        GeneratedField::WriteBatchSize => {
-                            if write_batch_size__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("writeBatchSize"));
-                            }
-                            write_batch_size__ = 
-                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
-                            ;
-                        }
-                        GeneratedField::MaxRowGroupSize => {
-                            if max_row_group_size__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("maxRowGroupSize"));
-                            }
-                            max_row_group_size__ = 
-                                Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
-                            ;
-                        }
-                        GeneratedField::WriterVersion => {
-                            if writer_version__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("writerVersion"));
-                            }
-                            writer_version__ = Some(map_.next_value()?);
-                        }
-                        GeneratedField::CreatedBy => {
-                            if created_by__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("createdBy"));
-                            }
-                            created_by__ = Some(map_.next_value()?);
-                        }
-                    }
-                }
-                Ok(WriterProperties {
-                    data_page_size_limit: data_page_size_limit__.unwrap_or_default(),
-                    dictionary_page_size_limit: dictionary_page_size_limit__.unwrap_or_default(),
-                    data_page_row_count_limit: data_page_row_count_limit__.unwrap_or_default(),
-                    write_batch_size: write_batch_size__.unwrap_or_default(),
-                    max_row_group_size: max_row_group_size__.unwrap_or_default(),
-                    writer_version: writer_version__.unwrap_or_default(),
-                    created_by: created_by__.unwrap_or_default(),
-                })
-            }
-        }
-        deserializer.deserialize_struct("datafusion.WriterProperties", FIELDS, GeneratedVisitor)
-    }
-}
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index d4f911585bb9..c557fb48b191 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -122,27 +122,15 @@ pub struct ProjectionColumns {
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CsvFormat {
-    #[prost(bool, tag = "1")]
-    pub has_header: bool,
-    #[prost(string, tag = "2")]
-    pub delimiter: ::prost::alloc::string::String,
-    #[prost(string, tag = "3")]
-    pub quote: ::prost::alloc::string::String,
-    #[prost(oneof = "csv_format::OptionalEscape", tags = "4")]
-    pub optional_escape: ::core::option::Option<csv_format::OptionalEscape>,
-}
-/// Nested message and enum types in `CsvFormat`.
-pub mod csv_format {
-    #[allow(clippy::derive_partial_eq_without_eq)]
-    #[derive(Clone, PartialEq, ::prost::Oneof)]
-    pub enum OptionalEscape {
-        #[prost(string, tag = "4")]
-        Escape(::prost::alloc::string::String),
-    }
+    #[prost(message, optional, tag = "5")]
+    pub options: ::core::option::Option<CsvOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ParquetFormat {}
+pub struct ParquetFormat {
+    #[prost(message, optional, tag = "2")]
+    pub options: ::core::option::Option<TableParquetOptions>,
+}
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct AvroFormat {}
@@ -509,38 +497,34 @@ pub struct CopyToNode {
     pub input: ::core::option::Option<::prost::alloc::boxed::Box<LogicalPlanNode>>,
     #[prost(string, tag = "2")]
     pub output_url: ::prost::alloc::string::String,
-    #[prost(string, tag = "6")]
-    pub file_type: ::prost::alloc::string::String,
     #[prost(string, repeated, tag = "7")]
     pub partition_by: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
-    #[prost(oneof = "copy_to_node::CopyOptions", tags = "4, 5")]
-    pub copy_options: ::core::option::Option<copy_to_node::CopyOptions>,
+    #[prost(oneof = "copy_to_node::FormatOptions", tags = "8, 9, 10, 11, 12")]
+    pub format_options: ::core::option::Option<copy_to_node::FormatOptions>,
 }
 /// Nested message and enum types in `CopyToNode`.
 pub mod copy_to_node {
     #[allow(clippy::derive_partial_eq_without_eq)]
     #[derive(Clone, PartialEq, ::prost::Oneof)]
-    pub enum CopyOptions {
-        #[prost(message, tag = "4")]
-        SqlOptions(super::SqlOptions),
-        #[prost(message, tag = "5")]
-        WriterOptions(super::FileTypeWriterOptions),
+    pub enum FormatOptions {
+        #[prost(message, tag = "8")]
+        Csv(super::CsvOptions),
+        #[prost(message, tag = "9")]
+        Json(super::JsonOptions),
+        #[prost(message, tag = "10")]
+        Parquet(super::TableParquetOptions),
+        #[prost(message, tag = "11")]
+        Avro(super::AvroOptions),
+        #[prost(message, tag = "12")]
+        Arrow(super::ArrowOptions),
     }
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct SqlOptions {
-    #[prost(message, repeated, tag = "1")]
-    pub option: ::prost::alloc::vec::Vec<SqlOption>,
-}
+pub struct AvroOptions {}
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct SqlOption {
-    #[prost(string, tag = "1")]
-    pub key: ::prost::alloc::string::String,
-    #[prost(string, tag = "2")]
-    pub value: ::prost::alloc::string::String,
-}
+pub struct ArrowOptions {}
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct UnionNode {
@@ -1647,39 +1631,12 @@ pub struct PartitionColumn {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FileTypeWriterOptions {
-    #[prost(oneof = "file_type_writer_options::FileType", tags = "1, 2, 3, 4")]
-    pub file_type: ::core::option::Option<file_type_writer_options::FileType>,
-}
-/// Nested message and enum types in `FileTypeWriterOptions`.
-pub mod file_type_writer_options {
-    #[allow(clippy::derive_partial_eq_without_eq)]
-    #[derive(Clone, PartialEq, ::prost::Oneof)]
-    pub enum FileType {
-        #[prost(message, tag = "1")]
-        JsonOptions(super::JsonWriterOptions),
-        #[prost(message, tag = "2")]
-        ParquetOptions(super::ParquetWriterOptions),
-        #[prost(message, tag = "3")]
-        CsvOptions(super::CsvWriterOptions),
-        #[prost(message, tag = "4")]
-        ArrowOptions(super::ArrowWriterOptions),
-    }
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct JsonWriterOptions {
     #[prost(enumeration = "CompressionTypeVariant", tag = "1")]
     pub compression: i32,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ParquetWriterOptions {
-    #[prost(message, optional, tag = "1")]
-    pub writer_properties: ::core::option::Option<WriterProperties>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CsvWriterOptions {
     /// Compression type
     #[prost(enumeration = "CompressionTypeVariant", tag = "1")]
@@ -1706,26 +1663,57 @@ pub struct CsvWriterOptions {
     #[prost(string, tag = "8")]
     pub null_value: ::prost::alloc::string::String,
 }
+/// Options controlling CSV format
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ArrowWriterOptions {}
+pub struct CsvOptions {
+    /// Indicates if the CSV has a header row
+    #[prost(bool, tag = "1")]
+    pub has_header: bool,
+    /// Delimiter character as a byte
+    #[prost(bytes = "vec", tag = "2")]
+    pub delimiter: ::prost::alloc::vec::Vec<u8>,
+    /// Quote character as a byte
+    #[prost(bytes = "vec", tag = "3")]
+    pub quote: ::prost::alloc::vec::Vec<u8>,
+    /// Optional escape character as a byte
+    #[prost(bytes = "vec", tag = "4")]
+    pub escape: ::prost::alloc::vec::Vec<u8>,
+    /// Compression type
+    #[prost(enumeration = "CompressionTypeVariant", tag = "5")]
+    pub compression: i32,
+    /// Max records for schema inference
+    #[prost(uint64, tag = "6")]
+    pub schema_infer_max_rec: u64,
+    /// Optional date format
+    #[prost(string, tag = "7")]
+    pub date_format: ::prost::alloc::string::String,
+    /// Optional datetime format
+    #[prost(string, tag = "8")]
+    pub datetime_format: ::prost::alloc::string::String,
+    /// Optional timestamp format
+    #[prost(string, tag = "9")]
+    pub timestamp_format: ::prost::alloc::string::String,
+    /// Optional timestamp with timezone format
+    #[prost(string, tag = "10")]
+    pub timestamp_tz_format: ::prost::alloc::string::String,
+    /// Optional time format
+    #[prost(string, tag = "11")]
+    pub time_format: ::prost::alloc::string::String,
+    /// Optional representation of null value
+    #[prost(string, tag = "12")]
+    pub null_value: ::prost::alloc::string::String,
+}
+/// Options controlling CSV format
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct WriterProperties {
-    #[prost(uint64, tag = "1")]
-    pub data_page_size_limit: u64,
+pub struct JsonOptions {
+    /// Compression type
+    #[prost(enumeration = "CompressionTypeVariant", tag = "1")]
+    pub compression: i32,
+    /// Max records for schema inference
     #[prost(uint64, tag = "2")]
-    pub dictionary_page_size_limit: u64,
-    #[prost(uint64, tag = "3")]
-    pub data_page_row_count_limit: u64,
-    #[prost(uint64, tag = "4")]
-    pub write_batch_size: u64,
-    #[prost(uint64, tag = "5")]
-    pub max_row_group_size: u64,
-    #[prost(string, tag = "6")]
-    pub writer_version: ::prost::alloc::string::String,
-    #[prost(string, tag = "7")]
-    pub created_by: ::prost::alloc::string::String,
+    pub schema_infer_max_rec: u64,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -1742,14 +1730,14 @@ pub struct FileSinkConfig {
     pub table_partition_cols: ::prost::alloc::vec::Vec<PartitionColumn>,
     #[prost(bool, tag = "8")]
     pub overwrite: bool,
-    #[prost(message, optional, tag = "9")]
-    pub file_type_writer_options: ::core::option::Option<FileTypeWriterOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct JsonSink {
     #[prost(message, optional, tag = "1")]
     pub config: ::core::option::Option<FileSinkConfig>,
+    #[prost(message, optional, tag = "2")]
+    pub writer_options: ::core::option::Option<JsonWriterOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -1768,6 +1756,8 @@ pub struct JsonSinkExecNode {
 pub struct CsvSink {
     #[prost(message, optional, tag = "1")]
     pub config: ::core::option::Option<FileSinkConfig>,
+    #[prost(message, optional, tag = "2")]
+    pub writer_options: ::core::option::Option<CsvWriterOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -1783,9 +1773,241 @@ pub struct CsvSinkExecNode {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct TableParquetOptions {
+    #[prost(message, optional, tag = "1")]
+    pub global: ::core::option::Option<ParquetOptions>,
+    #[prost(message, repeated, tag = "2")]
+    pub column_specific_options: ::prost::alloc::vec::Vec<ColumnSpecificOptions>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnSpecificOptions {
+    #[prost(string, tag = "1")]
+    pub column_name: ::prost::alloc::string::String,
+    #[prost(message, optional, tag = "2")]
+    pub options: ::core::option::Option<ColumnOptions>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnOptions {
+    #[prost(oneof = "column_options::BloomFilterEnabledOpt", tags = "1")]
+    pub bloom_filter_enabled_opt: ::core::option::Option<
+        column_options::BloomFilterEnabledOpt,
+    >,
+    #[prost(oneof = "column_options::EncodingOpt", tags = "2")]
+    pub encoding_opt: ::core::option::Option<column_options::EncodingOpt>,
+    #[prost(oneof = "column_options::DictionaryEnabledOpt", tags = "3")]
+    pub dictionary_enabled_opt: ::core::option::Option<
+        column_options::DictionaryEnabledOpt,
+    >,
+    #[prost(oneof = "column_options::CompressionOpt", tags = "4")]
+    pub compression_opt: ::core::option::Option<column_options::CompressionOpt>,
+    #[prost(oneof = "column_options::StatisticsEnabledOpt", tags = "5")]
+    pub statistics_enabled_opt: ::core::option::Option<
+        column_options::StatisticsEnabledOpt,
+    >,
+    #[prost(oneof = "column_options::BloomFilterFppOpt", tags = "6")]
+    pub bloom_filter_fpp_opt: ::core::option::Option<column_options::BloomFilterFppOpt>,
+    #[prost(oneof = "column_options::BloomFilterNdvOpt", tags = "7")]
+    pub bloom_filter_ndv_opt: ::core::option::Option<column_options::BloomFilterNdvOpt>,
+    #[prost(oneof = "column_options::MaxStatisticsSizeOpt", tags = "8")]
+    pub max_statistics_size_opt: ::core::option::Option<
+        column_options::MaxStatisticsSizeOpt,
+    >,
+}
+/// Nested message and enum types in `ColumnOptions`.
+pub mod column_options {
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum BloomFilterEnabledOpt {
+        #[prost(bool, tag = "1")]
+        BloomFilterEnabled(bool),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum EncodingOpt {
+        #[prost(string, tag = "2")]
+        Encoding(::prost::alloc::string::String),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum DictionaryEnabledOpt {
+        #[prost(bool, tag = "3")]
+        DictionaryEnabled(bool),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum CompressionOpt {
+        #[prost(string, tag = "4")]
+        Compression(::prost::alloc::string::String),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum StatisticsEnabledOpt {
+        #[prost(string, tag = "5")]
+        StatisticsEnabled(::prost::alloc::string::String),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum BloomFilterFppOpt {
+        #[prost(double, tag = "6")]
+        BloomFilterFpp(f64),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum BloomFilterNdvOpt {
+        #[prost(uint64, tag = "7")]
+        BloomFilterNdv(u64),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum MaxStatisticsSizeOpt {
+        #[prost(uint32, tag = "8")]
+        MaxStatisticsSize(u32),
+    }
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ParquetOptions {
+    /// Regular fields
+    ///
+    /// default = true
+    #[prost(bool, tag = "1")]
+    pub enable_page_index: bool,
+    /// default = true
+    #[prost(bool, tag = "2")]
+    pub pruning: bool,
+    /// default = true
+    #[prost(bool, tag = "3")]
+    pub skip_metadata: bool,
+    /// default = false
+    #[prost(bool, tag = "5")]
+    pub pushdown_filters: bool,
+    /// default = false
+    #[prost(bool, tag = "6")]
+    pub reorder_filters: bool,
+    /// default = 1024 * 1024
+    #[prost(uint64, tag = "7")]
+    pub data_pagesize_limit: u64,
+    /// default = 1024
+    #[prost(uint64, tag = "8")]
+    pub write_batch_size: u64,
+    /// default = "1.0"
+    #[prost(string, tag = "9")]
+    pub writer_version: ::prost::alloc::string::String,
+    /// default = false
+    #[prost(bool, tag = "20")]
+    pub bloom_filter_enabled: bool,
+    /// default = true
+    #[prost(bool, tag = "23")]
+    pub allow_single_file_parallelism: bool,
+    /// default = 1
+    #[prost(uint64, tag = "24")]
+    pub maximum_parallel_row_group_writers: u64,
+    /// default = 2
+    #[prost(uint64, tag = "25")]
+    pub maximum_buffered_record_batches_per_stream: u64,
+    #[prost(uint64, tag = "12")]
+    pub dictionary_page_size_limit: u64,
+    #[prost(uint64, tag = "18")]
+    pub data_page_row_count_limit: u64,
+    #[prost(uint64, tag = "15")]
+    pub max_row_group_size: u64,
+    #[prost(string, tag = "16")]
+    pub created_by: ::prost::alloc::string::String,
+    #[prost(oneof = "parquet_options::MetadataSizeHintOpt", tags = "4")]
+    pub metadata_size_hint_opt: ::core::option::Option<
+        parquet_options::MetadataSizeHintOpt,
+    >,
+    #[prost(oneof = "parquet_options::CompressionOpt", tags = "10")]
+    pub compression_opt: ::core::option::Option<parquet_options::CompressionOpt>,
+    #[prost(oneof = "parquet_options::DictionaryEnabledOpt", tags = "11")]
+    pub dictionary_enabled_opt: ::core::option::Option<
+        parquet_options::DictionaryEnabledOpt,
+    >,
+    #[prost(oneof = "parquet_options::StatisticsEnabledOpt", tags = "13")]
+    pub statistics_enabled_opt: ::core::option::Option<
+        parquet_options::StatisticsEnabledOpt,
+    >,
+    #[prost(oneof = "parquet_options::MaxStatisticsSizeOpt", tags = "14")]
+    pub max_statistics_size_opt: ::core::option::Option<
+        parquet_options::MaxStatisticsSizeOpt,
+    >,
+    #[prost(oneof = "parquet_options::ColumnIndexTruncateLengthOpt", tags = "17")]
+    pub column_index_truncate_length_opt: ::core::option::Option<
+        parquet_options::ColumnIndexTruncateLengthOpt,
+    >,
+    #[prost(oneof = "parquet_options::EncodingOpt", tags = "19")]
+    pub encoding_opt: ::core::option::Option<parquet_options::EncodingOpt>,
+    #[prost(oneof = "parquet_options::BloomFilterFppOpt", tags = "21")]
+    pub bloom_filter_fpp_opt: ::core::option::Option<parquet_options::BloomFilterFppOpt>,
+    #[prost(oneof = "parquet_options::BloomFilterNdvOpt", tags = "22")]
+    pub bloom_filter_ndv_opt: ::core::option::Option<parquet_options::BloomFilterNdvOpt>,
+}
+/// Nested message and enum types in `ParquetOptions`.
+pub mod parquet_options {
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum MetadataSizeHintOpt {
+        #[prost(uint64, tag = "4")]
+        MetadataSizeHint(u64),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum CompressionOpt {
+        #[prost(string, tag = "10")]
+        Compression(::prost::alloc::string::String),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum DictionaryEnabledOpt {
+        #[prost(bool, tag = "11")]
+        DictionaryEnabled(bool),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum StatisticsEnabledOpt {
+        #[prost(string, tag = "13")]
+        StatisticsEnabled(::prost::alloc::string::String),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum MaxStatisticsSizeOpt {
+        #[prost(uint64, tag = "14")]
+        MaxStatisticsSize(u64),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum ColumnIndexTruncateLengthOpt {
+        #[prost(uint64, tag = "17")]
+        ColumnIndexTruncateLength(u64),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum EncodingOpt {
+        #[prost(string, tag = "19")]
+        Encoding(::prost::alloc::string::String),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum BloomFilterFppOpt {
+        #[prost(double, tag = "21")]
+        BloomFilterFpp(f64),
+    }
+    #[allow(clippy::derive_partial_eq_without_eq)]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum BloomFilterNdvOpt {
+        #[prost(uint64, tag = "22")]
+        BloomFilterNdv(u64),
+    }
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct ParquetSink {
     #[prost(message, optional, tag = "1")]
     pub config: ::core::option::Option<FileSinkConfig>,
+    #[prost(message, optional, tag = "2")]
+    pub parquet_options: ::core::option::Option<TableParquetOptions>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -2639,9 +2861,9 @@ pub enum ScalarFunction {
     Signum = 15,
     Sin = 16,
     Sqrt = 17,
-    Tan = 18,
+    /// Tan = 18;
     Trunc = 19,
-    Array = 20,
+    /// 20 was Array
     /// RegexpMatch = 21;
     BitLength = 22,
     Btrim = 23,
@@ -2680,19 +2902,19 @@ pub enum ScalarFunction {
     /// 56 was ToTimestampMillis
     /// 57 was ToTimestampMicros
     /// 58 was ToTimestampSeconds
-    /// 59 was Now
+    Now = 59,
     Translate = 60,
     Trim = 61,
     Upper = 62,
     Coalesce = 63,
     Power = 64,
-    StructFun = 65,
-    /// 66 was FromUnixtime
+    /// 65 was StructFun
+    FromUnixtime = 66,
     Atan2 = 67,
     /// 68 was DateBin
-    ArrowTypeof = 69,
-    /// 70 was CurrentDate
-    /// 71 was CurrentTime
+    /// 69 was ArrowTypeof
+    CurrentDate = 70,
+    CurrentTime = 71,
     Uuid = 72,
     Cbrt = 73,
     Acosh = 74,
@@ -2700,22 +2922,22 @@ pub enum ScalarFunction {
     Atanh = 76,
     Sinh = 77,
     Cosh = 78,
-    Tanh = 79,
+    /// Tanh = 79;
     Pi = 80,
     Degrees = 81,
     Radians = 82,
     Factorial = 83,
     Lcm = 84,
     Gcd = 85,
-    ArrayAppend = 86,
-    ArrayConcat = 87,
+    /// 86 was ArrayAppend
+    /// 87 was ArrayConcat
     /// 88 was ArrayDims
-    ArrayRepeat = 89,
+    /// 89 was ArrayRepeat
     /// 90 was ArrayLength
     /// 91 was ArrayNdims
     ArrayPosition = 92,
     ArrayPositions = 93,
-    ArrayPrepend = 94,
+    /// 94 was ArrayPrepend
     ArrayRemove = 95,
     ArrayReplace = 96,
     /// 97 was ArrayToString
@@ -2736,7 +2958,7 @@ pub enum ScalarFunction {
     Iszero = 114,
     /// 115 was ArrayEmpty
     ArrayPopBack = 116,
-    StringToArray = 117,
+    /// 117 was StringToArray
     /// 118 was ToTimestampNanos
     ArrayIntersect = 119,
     ArrayUnion = 120,
@@ -2747,8 +2969,8 @@ pub enum ScalarFunction {
     Levenshtein = 125,
     SubstrIndex = 126,
     FindInSet = 127,
-    ArraySort = 128,
-    ArrayDistinct = 129,
+    /// / 128 was ArraySort
+    /// / 129 was ArrayDistinct
     ArrayResize = 130,
     EndsWith = 131,
     /// / 132 was InStr
@@ -2783,9 +3005,7 @@ impl ScalarFunction {
             ScalarFunction::Signum => "Signum",
             ScalarFunction::Sin => "Sin",
             ScalarFunction::Sqrt => "Sqrt",
-            ScalarFunction::Tan => "Tan",
             ScalarFunction::Trunc => "Trunc",
-            ScalarFunction::Array => "Array",
             ScalarFunction::BitLength => "BitLength",
             ScalarFunction::Btrim => "Btrim",
             ScalarFunction::CharacterLength => "CharacterLength",
@@ -2815,14 +3035,16 @@ impl ScalarFunction {
             ScalarFunction::Strpos => "Strpos",
             ScalarFunction::Substr => "Substr",
             ScalarFunction::ToHex => "ToHex",
+            ScalarFunction::Now => "Now",
             ScalarFunction::Translate => "Translate",
             ScalarFunction::Trim => "Trim",
             ScalarFunction::Upper => "Upper",
             ScalarFunction::Coalesce => "Coalesce",
             ScalarFunction::Power => "Power",
-            ScalarFunction::StructFun => "StructFun",
+            ScalarFunction::FromUnixtime => "FromUnixtime",
             ScalarFunction::Atan2 => "Atan2",
-            ScalarFunction::ArrowTypeof => "ArrowTypeof",
+            ScalarFunction::CurrentDate => "CurrentDate",
+            ScalarFunction::CurrentTime => "CurrentTime",
             ScalarFunction::Uuid => "Uuid",
             ScalarFunction::Cbrt => "Cbrt",
             ScalarFunction::Acosh => "Acosh",
@@ -2830,19 +3052,14 @@ impl ScalarFunction {
             ScalarFunction::Atanh => "Atanh",
             ScalarFunction::Sinh => "Sinh",
             ScalarFunction::Cosh => "Cosh",
-            ScalarFunction::Tanh => "Tanh",
             ScalarFunction::Pi => "Pi",
             ScalarFunction::Degrees => "Degrees",
             ScalarFunction::Radians => "Radians",
             ScalarFunction::Factorial => "Factorial",
             ScalarFunction::Lcm => "Lcm",
             ScalarFunction::Gcd => "Gcd",
-            ScalarFunction::ArrayAppend => "ArrayAppend",
-            ScalarFunction::ArrayConcat => "ArrayConcat",
-            ScalarFunction::ArrayRepeat => "ArrayRepeat",
             ScalarFunction::ArrayPosition => "ArrayPosition",
             ScalarFunction::ArrayPositions => "ArrayPositions",
-            ScalarFunction::ArrayPrepend => "ArrayPrepend",
             ScalarFunction::ArrayRemove => "ArrayRemove",
             ScalarFunction::ArrayReplace => "ArrayReplace",
             ScalarFunction::ArrayElement => "ArrayElement",
@@ -2855,7 +3072,6 @@ impl ScalarFunction {
             ScalarFunction::Nanvl => "Nanvl",
             ScalarFunction::Iszero => "Iszero",
             ScalarFunction::ArrayPopBack => "ArrayPopBack",
-            ScalarFunction::StringToArray => "StringToArray",
             ScalarFunction::ArrayIntersect => "ArrayIntersect",
             ScalarFunction::ArrayUnion => "ArrayUnion",
             ScalarFunction::OverLay => "OverLay",
@@ -2864,8 +3080,6 @@ impl ScalarFunction {
             ScalarFunction::Levenshtein => "Levenshtein",
             ScalarFunction::SubstrIndex => "SubstrIndex",
             ScalarFunction::FindInSet => "FindInSet",
-            ScalarFunction::ArraySort => "ArraySort",
-            ScalarFunction::ArrayDistinct => "ArrayDistinct",
             ScalarFunction::ArrayResize => "ArrayResize",
             ScalarFunction::EndsWith => "EndsWith",
             ScalarFunction::MakeDate => "MakeDate",
@@ -2892,9 +3106,7 @@ impl ScalarFunction {
             "Signum" => Some(Self::Signum),
             "Sin" => Some(Self::Sin),
             "Sqrt" => Some(Self::Sqrt),
-            "Tan" => Some(Self::Tan),
             "Trunc" => Some(Self::Trunc),
-            "Array" => Some(Self::Array),
             "BitLength" => Some(Self::BitLength),
             "Btrim" => Some(Self::Btrim),
             "CharacterLength" => Some(Self::CharacterLength),
@@ -2924,14 +3136,16 @@ impl ScalarFunction {
             "Strpos" => Some(Self::Strpos),
             "Substr" => Some(Self::Substr),
             "ToHex" => Some(Self::ToHex),
+            "Now" => Some(Self::Now),
             "Translate" => Some(Self::Translate),
             "Trim" => Some(Self::Trim),
             "Upper" => Some(Self::Upper),
             "Coalesce" => Some(Self::Coalesce),
             "Power" => Some(Self::Power),
-            "StructFun" => Some(Self::StructFun),
+            "FromUnixtime" => Some(Self::FromUnixtime),
             "Atan2" => Some(Self::Atan2),
-            "ArrowTypeof" => Some(Self::ArrowTypeof),
+            "CurrentDate" => Some(Self::CurrentDate),
+            "CurrentTime" => Some(Self::CurrentTime),
             "Uuid" => Some(Self::Uuid),
             "Cbrt" => Some(Self::Cbrt),
             "Acosh" => Some(Self::Acosh),
@@ -2939,19 +3153,14 @@ impl ScalarFunction {
             "Atanh" => Some(Self::Atanh),
             "Sinh" => Some(Self::Sinh),
             "Cosh" => Some(Self::Cosh),
-            "Tanh" => Some(Self::Tanh),
             "Pi" => Some(Self::Pi),
             "Degrees" => Some(Self::Degrees),
             "Radians" => Some(Self::Radians),
             "Factorial" => Some(Self::Factorial),
             "Lcm" => Some(Self::Lcm),
             "Gcd" => Some(Self::Gcd),
-            "ArrayAppend" => Some(Self::ArrayAppend),
-            "ArrayConcat" => Some(Self::ArrayConcat),
-            "ArrayRepeat" => Some(Self::ArrayRepeat),
             "ArrayPosition" => Some(Self::ArrayPosition),
             "ArrayPositions" => Some(Self::ArrayPositions),
-            "ArrayPrepend" => Some(Self::ArrayPrepend),
             "ArrayRemove" => Some(Self::ArrayRemove),
             "ArrayReplace" => Some(Self::ArrayReplace),
             "ArrayElement" => Some(Self::ArrayElement),
@@ -2964,7 +3173,6 @@ impl ScalarFunction {
             "Nanvl" => Some(Self::Nanvl),
             "Iszero" => Some(Self::Iszero),
             "ArrayPopBack" => Some(Self::ArrayPopBack),
-            "StringToArray" => Some(Self::StringToArray),
             "ArrayIntersect" => Some(Self::ArrayIntersect),
             "ArrayUnion" => Some(Self::ArrayUnion),
             "OverLay" => Some(Self::OverLay),
@@ -2973,8 +3181,6 @@ impl ScalarFunction {
             "Levenshtein" => Some(Self::Levenshtein),
             "SubstrIndex" => Some(Self::SubstrIndex),
             "FindInSet" => Some(Self::FindInSet),
-            "ArraySort" => Some(Self::ArraySort),
-            "ArrayDistinct" => Some(Self::ArrayDistinct),
             "ArrayResize" => Some(Self::ArrayResize),
             "EndsWith" => Some(Self::EndsWith),
             "MakeDate" => Some(Self::MakeDate),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 1af661ad8e5f..4b9bd45fd55b 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -47,11 +47,10 @@ use datafusion_common::{
 use datafusion_expr::expr::Unnest;
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
-    acosh, array, array_append, array_concat, array_distinct, array_element,
-    array_except, array_intersect, array_pop_back, array_pop_front, array_position,
-    array_positions, array_prepend, array_remove, array_remove_all, array_remove_n,
-    array_repeat, array_replace, array_replace_all, array_replace_n, array_resize,
-    array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh,
+    acosh, array_element, array_except, array_intersect, array_pop_back, array_pop_front,
+    array_position, array_positions, array_remove, array_remove_all, array_remove_n,
+    array_replace, array_replace_all, array_replace_n, array_resize, array_slice,
+    array_union,  ascii, asinh, atan, atan2, atanh,
     bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr,
     concat_ws_expr, cos, cosh, cot, degrees, digest, ends_with, exp,
     expr::{self, InList, Sort, WindowFunction},
@@ -60,11 +59,10 @@ use datafusion_expr::{
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, nanvl, octet_length, overlay, pi, power, radians, random,
     repeat, replace, reverse, right, round, rpad, rtrim, sha224, sha256, sha384, sha512,
-    signum, sin, sinh, split_part, sqrt, starts_with, string_to_array, strpos,
-    struct_fun, substr, substr_index, substring, tan, tanh, to_hex, translate, trim,
-    trunc, upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction,
-    BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField,
-    GroupingSet,
+    signum, sin, sinh, split_part, sqrt, starts_with, strpos, substr,
+    substr_index, substring, to_hex, translate, trim, trunc, upper, uuid,
+    AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction,
+    Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
@@ -447,12 +445,10 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Cbrt => Self::Cbrt,
             ScalarFunction::Sin => Self::Sin,
             ScalarFunction::Cos => Self::Cos,
-            ScalarFunction::Tan => Self::Tan,
             ScalarFunction::Cot => Self::Cot,
             ScalarFunction::Atan => Self::Atan,
             ScalarFunction::Sinh => Self::Sinh,
             ScalarFunction::Cosh => Self::Cosh,
-            ScalarFunction::Tanh => Self::Tanh,
             ScalarFunction::Asinh => Self::Asinh,
             ScalarFunction::Acosh => Self::Acosh,
             ScalarFunction::Atanh => Self::Atanh,
@@ -476,18 +472,12 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Trim => Self::Trim,
             ScalarFunction::Ltrim => Self::Ltrim,
             ScalarFunction::Rtrim => Self::Rtrim,
-            ScalarFunction::ArrayAppend => Self::ArrayAppend,
-            ScalarFunction::ArraySort => Self::ArraySort,
-            ScalarFunction::ArrayConcat => Self::ArrayConcat,
             ScalarFunction::ArrayExcept => Self::ArrayExcept,
-            ScalarFunction::ArrayDistinct => Self::ArrayDistinct,
             ScalarFunction::ArrayElement => Self::ArrayElement,
             ScalarFunction::ArrayPopFront => Self::ArrayPopFront,
             ScalarFunction::ArrayPopBack => Self::ArrayPopBack,
             ScalarFunction::ArrayPosition => Self::ArrayPosition,
             ScalarFunction::ArrayPositions => Self::ArrayPositions,
-            ScalarFunction::ArrayPrepend => Self::ArrayPrepend,
-            ScalarFunction::ArrayRepeat => Self::ArrayRepeat,
             ScalarFunction::ArrayRemove => Self::ArrayRemove,
             ScalarFunction::ArrayRemoveN => Self::ArrayRemoveN,
             ScalarFunction::ArrayRemoveAll => Self::ArrayRemoveAll,
@@ -499,7 +489,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
             ScalarFunction::ArrayUnion => Self::ArrayUnion,
             ScalarFunction::ArrayResize => Self::ArrayResize,
-            ScalarFunction::Array => Self::MakeArray,
             ScalarFunction::Md5 => Self::MD5,
             ScalarFunction::Sha224 => Self::SHA224,
             ScalarFunction::Sha256 => Self::SHA256,
@@ -525,7 +514,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Right => Self::Right,
             ScalarFunction::Rpad => Self::Rpad,
             ScalarFunction::SplitPart => Self::SplitPart,
-            ScalarFunction::StringToArray => Self::StringToArray,
             ScalarFunction::StartsWith => Self::StartsWith,
             ScalarFunction::Strpos => Self::Strpos,
             ScalarFunction::Substr => Self::Substr,
@@ -537,11 +525,9 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Coalesce => Self::Coalesce,
             ScalarFunction::Pi => Self::Pi,
             ScalarFunction::Power => Self::Power,
-            ScalarFunction::StructFun => Self::Struct,
-            ScalarFunction::Atan2 => Self::Atan2,
+             ScalarFunction::Atan2 => Self::Atan2,
             ScalarFunction::Nanvl => Self::Nanvl,
             ScalarFunction::Iszero => Self::Iszero,
-            ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
             ScalarFunction::OverLay => Self::OverLay,
             ScalarFunction::Levenshtein => Self::Levenshtein,
             ScalarFunction::SubstrIndex => Self::SubstrIndex,
@@ -1404,37 +1390,12 @@ pub fn parse_expr(
                 ScalarFunction::Acosh => {
                     Ok(acosh(parse_expr(&args[0], registry, codec)?))
                 }
-                ScalarFunction::Array => Ok(array(
-                    args.to_owned()
-                        .iter()
-                        .map(|expr| parse_expr(expr, registry, codec))
-                        .collect::<Result<Vec<_>, _>>()?,
-                )),
-                ScalarFunction::ArrayAppend => Ok(array_append(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                )),
-                ScalarFunction::ArraySort => Ok(array_sort(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                    parse_expr(&args[2], registry, codec)?,
-                )),
                 ScalarFunction::ArrayPopFront => {
                     Ok(array_pop_front(parse_expr(&args[0], registry, codec)?))
                 }
                 ScalarFunction::ArrayPopBack => {
                     Ok(array_pop_back(parse_expr(&args[0], registry, codec)?))
                 }
-                ScalarFunction::ArrayPrepend => Ok(array_prepend(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                )),
-                ScalarFunction::ArrayConcat => Ok(array_concat(
-                    args.to_owned()
-                        .iter()
-                        .map(|expr| parse_expr(expr, registry, codec))
-                        .collect::<Result<Vec<_>, _>>()?,
-                )),
                 ScalarFunction::ArrayExcept => Ok(array_except(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
@@ -1452,10 +1413,6 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
                 )),
-                ScalarFunction::ArrayRepeat => Ok(array_repeat(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                )),
                 ScalarFunction::ArrayRemove => Ok(array_remove(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
@@ -1494,9 +1451,6 @@ pub fn parse_expr(
                     parse_expr(&args[2], registry, codec)?,
                     parse_expr(&args[3], registry, codec)?,
                 )),
-                ScalarFunction::ArrayDistinct => {
-                    Ok(array_distinct(parse_expr(&args[0], registry, codec)?))
-                }
                 ScalarFunction::ArrayElement => Ok(array_element(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
@@ -1514,11 +1468,9 @@ pub fn parse_expr(
                 ScalarFunction::Cbrt => Ok(cbrt(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Sin => Ok(sin(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Cos => Ok(cos(parse_expr(&args[0], registry, codec)?)),
-                ScalarFunction::Tan => Ok(tan(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Atan => Ok(atan(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Sinh => Ok(sinh(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Cosh => Ok(cosh(parse_expr(&args[0], registry, codec)?)),
-                ScalarFunction::Tanh => Ok(tanh(parse_expr(&args[0], registry, codec)?)),
                 ScalarFunction::Atanh => {
                     Ok(atanh(parse_expr(&args[0], registry, codec)?))
                 }
@@ -1753,14 +1705,6 @@ pub fn parse_expr(
                 ScalarFunction::Iszero => {
                     Ok(iszero(parse_expr(&args[0], registry, codec)?))
                 }
-                ScalarFunction::ArrowTypeof => {
-                    Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?))
-                }
-                ScalarFunction::StringToArray => Ok(string_to_array(
-                    parse_expr(&args[0], registry, codec)?,
-                    parse_expr(&args[1], registry, codec)?,
-                    parse_expr(&args[2], registry, codec)?,
-                )),
                 ScalarFunction::OverLay => Ok(overlay(
                     args.to_owned()
                         .iter()
@@ -1776,9 +1720,6 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry, codec)?,
                     parse_expr(&args[1], registry, codec)?,
                 )),
-                ScalarFunction::StructFun => {
-                    Ok(struct_fun(parse_expr(&args[0], registry, codec)?))
-                }
             }
         }
         ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode {
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 7acad1844d48..9b3b677e3c0a 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -15,20 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::csv::WriterBuilder;
-use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
-use datafusion_expr::ScalarUDF;
 use std::collections::HashMap;
 use std::fmt::Debug;
-use std::str::FromStr;
 use std::sync::Arc;
 
-use crate::common::{byte_to_string, proto_error, str_to_byte};
+use crate::common::proto_error;
 use crate::protobuf::logical_plan_node::LogicalPlanType::CustomScan;
-use crate::protobuf::{
-    copy_to_node, file_type_writer_options, CustomTableScanNode,
-    LogicalExprNodeCollection, SqlOption,
-};
+use crate::protobuf::{CustomTableScanNode, LogicalExprNodeCollection};
 use crate::{
     convert_required,
     protobuf::{
@@ -37,6 +30,7 @@ use crate::{
     },
 };
 
+use arrow::csv::WriterBuilder;
 use arrow::datatypes::{DataType, Schema, SchemaRef};
 #[cfg(feature = "parquet")]
 use datafusion::datasource::file_format::parquet::ParquetFormat;
@@ -51,9 +45,8 @@ use datafusion::{
     prelude::SessionContext,
 };
 use datafusion_common::{
-    context, file_options::StatementOptions, internal_err, not_impl_err,
-    parsers::CompressionTypeVariant, plan_datafusion_err, DataFusionError, FileType,
-    FileTypeWriterOptions, OwnedTableReference, Result,
+    context, internal_err, not_impl_err, parsers::CompressionTypeVariant,
+    plan_datafusion_err, DataFusionError, OwnedTableReference, Result,
 };
 use datafusion_expr::{
     dml,
@@ -63,13 +56,9 @@ use datafusion_expr::{
         EmptyRelation, Extension, Join, JoinConstraint, Limit, Prepare, Projection,
         Repartition, Sort, SubqueryAlias, TableScan, Values, Window,
     },
-    DistinctOn, DropView, Expr, LogicalPlan, LogicalPlanBuilder,
+    DistinctOn, DropView, Expr, LogicalPlan, LogicalPlanBuilder, ScalarUDF,
 };
 
-use datafusion::parquet::file::properties::{WriterProperties, WriterVersion};
-use datafusion_common::file_options::csv_writer::CsvWriterOptions;
-use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
-use datafusion_expr::dml::CopyOptions;
 use prost::bytes::BufMut;
 use prost::Message;
 
@@ -361,21 +350,19 @@ impl AsLogicalPlan for LogicalPlanNode {
                         ))
                     })? {
                         #[cfg(feature = "parquet")]
-                        &FileFormatType::Parquet(protobuf::ParquetFormat {}) => {
-                            Arc::new(ParquetFormat::default())
+                        FileFormatType::Parquet(protobuf::ParquetFormat {options}) => {
+                            let mut parquet = ParquetFormat::default();
+                            if let Some(options) = options {
+                                parquet = parquet.with_options(options.try_into()?)
+                            }
+                            Arc::new(parquet)
                         }
                         FileFormatType::Csv(protobuf::CsvFormat {
-                            has_header,
-                            delimiter,
-                            quote,
-                            optional_escape
+                            options
                         }) => {
-                            let mut csv = CsvFormat::default()
-                            .with_has_header(*has_header)
-                            .with_delimiter(str_to_byte(delimiter, "delimiter")?)
-                            .with_quote(str_to_byte(quote, "quote")?);
-                            if let Some(protobuf::csv_format::OptionalEscape::Escape(escape)) = optional_escape {
-                                csv = csv.with_quote(str_to_byte(escape, "escape")?);
+                            let mut csv = CsvFormat::default();
+                            if let Some(options) = options {
+                                csv = csv.with_options(options.try_into()?)
                             }
                             Arc::new(csv)},
                         FileFormatType::Avro(..) => Arc::new(AvroFormat),
@@ -864,80 +851,13 @@ impl AsLogicalPlan for LogicalPlanNode {
                 let input: LogicalPlan =
                     into_logical_plan!(copy.input, ctx, extension_codec)?;
 
-                let copy_options = match &copy.copy_options {
-                    Some(copy_to_node::CopyOptions::SqlOptions(opt)) => {
-                        let options = opt
-                            .option
-                            .iter()
-                            .map(|o| (o.key.clone(), o.value.clone()))
-                            .collect();
-                        CopyOptions::SQLOptions(StatementOptions::from(&options))
-                    }
-                    Some(copy_to_node::CopyOptions::WriterOptions(opt)) => {
-                        match &opt.file_type {
-                            Some(ft) => match ft {
-                                file_type_writer_options::FileType::ArrowOptions(_) => {
-                                    CopyOptions::WriterOptions(Box::new(
-                                        FileTypeWriterOptions::Arrow(
-                                            ArrowWriterOptions::new(),
-                                        ),
-                                    ))
-                                }
-                                file_type_writer_options::FileType::CsvOptions(
-                                    writer_options,
-                                ) => {
-                                    let writer_builder =
-                                        csv_writer_options_from_proto(writer_options)?;
-                                    CopyOptions::WriterOptions(Box::new(
-                                        FileTypeWriterOptions::CSV(
-                                            CsvWriterOptions::new(
-                                                writer_builder,
-                                                CompressionTypeVariant::UNCOMPRESSED,
-                                            ),
-                                        ),
-                                    ))
-                                }
-                                file_type_writer_options::FileType::ParquetOptions(
-                                    writer_options,
-                                ) => {
-                                    let writer_properties =
-                                        match &writer_options.writer_properties {
-                                            Some(serialized_writer_options) => {
-                                                writer_properties_from_proto(
-                                                    serialized_writer_options,
-                                                )?
-                                            }
-                                            _ => WriterProperties::default(),
-                                        };
-                                    CopyOptions::WriterOptions(Box::new(
-                                        FileTypeWriterOptions::Parquet(
-                                            ParquetWriterOptions::new(writer_properties),
-                                        ),
-                                    ))
-                                }
-                                _ => {
-                                    return Err(proto_error(
-                                        "WriterOptions unsupported file_type",
-                                    ))
-                                }
-                            },
-                            None => {
-                                return Err(proto_error(
-                                    "WriterOptions missing file_type",
-                                ))
-                            }
-                        }
-                    }
-                    None => return Err(proto_error("CopyTo missing CopyOptions")),
-                };
-
                 Ok(datafusion_expr::LogicalPlan::Copy(
                     datafusion_expr::dml::CopyTo {
                         input: Arc::new(input),
                         output_url: copy.output_url.clone(),
-                        file_format: FileType::from_str(&copy.file_type)?,
                         partition_by: copy.partition_by.clone(),
-                        copy_options,
+                        format_options: convert_required!(copy.format_options)?,
+                        options: Default::default(),
                     },
                 ))
             }
@@ -1008,30 +928,20 @@ impl AsLogicalPlan for LogicalPlanNode {
                         let mut maybe_some_type = None;
 
                         #[cfg(feature = "parquet")]
-                        if any.is::<ParquetFormat>() {
+                        if let Some(parquet) = any.downcast_ref::<ParquetFormat>() {
+                            let options = parquet.options();
                             maybe_some_type =
-                                Some(FileFormatType::Parquet(protobuf::ParquetFormat {}))
+                                Some(FileFormatType::Parquet(protobuf::ParquetFormat {
+                                    options: Some(options.try_into()?),
+                                }));
                         };
 
                         if let Some(csv) = any.downcast_ref::<CsvFormat>() {
+                            let options = csv.options();
                             maybe_some_type =
                                 Some(FileFormatType::Csv(protobuf::CsvFormat {
-                                    delimiter: byte_to_string(
-                                        csv.delimiter(),
-                                        "delimiter",
-                                    )?,
-                                    has_header: csv.has_header(),
-                                    quote: byte_to_string(csv.quote(), "quote")?,
-                                    optional_escape: if let Some(escape) = csv.escape() {
-                                        Some(
-                                            protobuf::csv_format::OptionalEscape::Escape(
-                                                byte_to_string(escape, "escape")?,
-                                            ),
-                                        )
-                                    } else {
-                                        None
-                                    },
-                                }))
+                                    options: Some(options.try_into()?),
+                                }));
                         }
 
                         if any.is::<AvroFormat>() {
@@ -1672,92 +1582,21 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlan::Copy(dml::CopyTo {
                 input,
                 output_url,
-                file_format,
-                copy_options,
+                format_options,
                 partition_by,
+                ..
             }) => {
                 let input = protobuf::LogicalPlanNode::try_from_logical_plan(
                     input,
                     extension_codec,
                 )?;
 
-                let copy_options_proto: Option<copy_to_node::CopyOptions> =
-                    match copy_options {
-                        CopyOptions::SQLOptions(opt) => {
-                            let options: Vec<SqlOption> = opt
-                                .clone()
-                                .into_inner()
-                                .iter()
-                                .map(|(k, v)| SqlOption {
-                                    key: k.to_string(),
-                                    value: v.to_string(),
-                                })
-                                .collect();
-                            Some(copy_to_node::CopyOptions::SqlOptions(
-                                protobuf::SqlOptions { option: options },
-                            ))
-                        }
-                        CopyOptions::WriterOptions(opt) => {
-                            match opt.as_ref() {
-                                FileTypeWriterOptions::Arrow(_) => {
-                                    let arrow_writer_options =
-                                        file_type_writer_options::FileType::ArrowOptions(
-                                            protobuf::ArrowWriterOptions {},
-                                        );
-                                    Some(copy_to_node::CopyOptions::WriterOptions(
-                                        protobuf::FileTypeWriterOptions {
-                                            file_type: Some(arrow_writer_options),
-                                        },
-                                    ))
-                                }
-                                FileTypeWriterOptions::CSV(csv_opts) => {
-                                    let csv_options = &csv_opts.writer_options;
-                                    let csv_writer_options = csv_writer_options_to_proto(
-                                        csv_options,
-                                        &csv_opts.compression,
-                                    );
-                                    let csv_options =
-                                        file_type_writer_options::FileType::CsvOptions(
-                                            csv_writer_options,
-                                        );
-                                    Some(copy_to_node::CopyOptions::WriterOptions(
-                                        protobuf::FileTypeWriterOptions {
-                                            file_type: Some(csv_options),
-                                        },
-                                    ))
-                                }
-                                FileTypeWriterOptions::Parquet(parquet_opts) => {
-                                    let parquet_writer_options =
-                                        protobuf::ParquetWriterOptions {
-                                            writer_properties: Some(
-                                                writer_properties_to_proto(
-                                                    &parquet_opts.writer_options,
-                                                ),
-                                            ),
-                                        };
-                                    let parquet_options = file_type_writer_options::FileType::ParquetOptions(parquet_writer_options);
-                                    Some(copy_to_node::CopyOptions::WriterOptions(
-                                        protobuf::FileTypeWriterOptions {
-                                            file_type: Some(parquet_options),
-                                        },
-                                    ))
-                                }
-                                _ => {
-                                    return Err(proto_error(
-                                        "Unsupported FileTypeWriterOptions in CopyTo",
-                                    ))
-                                }
-                            }
-                        }
-                    };
-
                 Ok(protobuf::LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::CopyTo(Box::new(
                         protobuf::CopyToNode {
                             input: Some(Box::new(input)),
                             output_url: output_url.to_string(),
-                            file_type: file_format.to_string(),
-                            copy_options: copy_options_proto,
+                            format_options: Some(format_options.try_into()?),
                             partition_by: partition_by.clone(),
                         },
                     ))),
@@ -1813,33 +1652,3 @@ pub(crate) fn csv_writer_options_from_proto(
         .with_time_format(writer_options.time_format.clone())
         .with_null(writer_options.null_value.clone()))
 }
-
-pub(crate) fn writer_properties_to_proto(
-    props: &WriterProperties,
-) -> protobuf::WriterProperties {
-    protobuf::WriterProperties {
-        data_page_size_limit: props.data_page_size_limit() as u64,
-        dictionary_page_size_limit: props.dictionary_page_size_limit() as u64,
-        data_page_row_count_limit: props.data_page_row_count_limit() as u64,
-        write_batch_size: props.write_batch_size() as u64,
-        max_row_group_size: props.max_row_group_size() as u64,
-        writer_version: format!("{:?}", props.writer_version()),
-        created_by: props.created_by().to_string(),
-    }
-}
-
-pub(crate) fn writer_properties_from_proto(
-    props: &protobuf::WriterProperties,
-) -> Result<WriterProperties, DataFusionError> {
-    let writer_version =
-        WriterVersion::from_str(&props.writer_version).map_err(proto_error)?;
-    Ok(WriterProperties::builder()
-        .set_created_by(props.created_by.clone())
-        .set_writer_version(writer_version)
-        .set_dictionary_page_size_limit(props.dictionary_page_size_limit as usize)
-        .set_data_page_row_count_limit(props.data_page_row_count_limit as usize)
-        .set_data_page_size_limit(props.data_page_size_limit as usize)
-        .set_write_batch_size(props.write_batch_size as usize)
-        .set_max_row_group_size(props.max_row_group_size as usize)
-        .build())
-}
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 7024a9fab3f9..65b4c8ba0445 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1426,11 +1426,9 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Cbrt => Self::Cbrt,
             BuiltinScalarFunction::Sin => Self::Sin,
             BuiltinScalarFunction::Cos => Self::Cos,
-            BuiltinScalarFunction::Tan => Self::Tan,
             BuiltinScalarFunction::Cot => Self::Cot,
             BuiltinScalarFunction::Sinh => Self::Sinh,
             BuiltinScalarFunction::Cosh => Self::Cosh,
-            BuiltinScalarFunction::Tanh => Self::Tanh,
             BuiltinScalarFunction::Atan => Self::Atan,
             BuiltinScalarFunction::Asinh => Self::Asinh,
             BuiltinScalarFunction::Acosh => Self::Acosh,
@@ -1456,18 +1454,12 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Ltrim => Self::Ltrim,
             BuiltinScalarFunction::Rtrim => Self::Rtrim,
             BuiltinScalarFunction::ToChar => Self::ToChar,
-            BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend,
-            BuiltinScalarFunction::ArraySort => Self::ArraySort,
-            BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat,
             BuiltinScalarFunction::ArrayExcept => Self::ArrayExcept,
-            BuiltinScalarFunction::ArrayDistinct => Self::ArrayDistinct,
             BuiltinScalarFunction::ArrayElement => Self::ArrayElement,
             BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront,
             BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack,
             BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition,
             BuiltinScalarFunction::ArrayPositions => Self::ArrayPositions,
-            BuiltinScalarFunction::ArrayPrepend => Self::ArrayPrepend,
-            BuiltinScalarFunction::ArrayRepeat => Self::ArrayRepeat,
             BuiltinScalarFunction::ArrayResize => Self::ArrayResize,
             BuiltinScalarFunction::ArrayRemove => Self::ArrayRemove,
             BuiltinScalarFunction::ArrayRemoveN => Self::ArrayRemoveN,
@@ -1479,7 +1471,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
             BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
             BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion,
-            BuiltinScalarFunction::MakeArray => Self::Array,
             BuiltinScalarFunction::MD5 => Self::Md5,
             BuiltinScalarFunction::SHA224 => Self::Sha224,
             BuiltinScalarFunction::SHA256 => Self::Sha256,
@@ -1506,7 +1497,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Right => Self::Right,
             BuiltinScalarFunction::Rpad => Self::Rpad,
             BuiltinScalarFunction::SplitPart => Self::SplitPart,
-            BuiltinScalarFunction::StringToArray => Self::StringToArray,
             BuiltinScalarFunction::StartsWith => Self::StartsWith,
             BuiltinScalarFunction::Strpos => Self::Strpos,
             BuiltinScalarFunction::Substr => Self::Substr,
@@ -1516,11 +1506,9 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Coalesce => Self::Coalesce,
             BuiltinScalarFunction::Pi => Self::Pi,
             BuiltinScalarFunction::Power => Self::Power,
-            BuiltinScalarFunction::Struct => Self::StructFun,
             BuiltinScalarFunction::Atan2 => Self::Atan2,
             BuiltinScalarFunction::Nanvl => Self::Nanvl,
             BuiltinScalarFunction::Iszero => Self::Iszero,
-            BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
             BuiltinScalarFunction::OverLay => Self::OverLay,
             BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
             BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index d3b41f114fba..16f0e94cad83 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -17,9 +17,16 @@
 
 //! Serde code to convert from protocol buffers to Rust data structures.
 
+use std::collections::HashMap;
 use std::convert::{TryFrom, TryInto};
 use std::sync::Arc;
 
+use crate::common::proto_error;
+use crate::convert_required;
+use crate::logical_plan::{self, csv_writer_options_from_proto};
+use crate::protobuf::physical_expr_node::ExprType;
+use crate::protobuf::{self, copy_to_node};
+
 use arrow::compute::SortOptions;
 use datafusion::arrow::datatypes::Schema;
 use datafusion::datasource::file_format::csv::CsvSink;
@@ -34,31 +41,24 @@ use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::WindowFunctionDefinition;
 use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
-    in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr,
-    Literal, NegativeExpr, NotExpr, TryCastExpr,
+    in_list, BinaryExpr, CaseExpr, CastExpr, Column, GetFieldAccessExpr,
+    GetIndexedFieldExpr, IsNotNullExpr, IsNullExpr, LikeExpr, Literal, NegativeExpr,
+    NotExpr, TryCastExpr,
 };
-use datafusion::physical_plan::expressions::{GetFieldAccessExpr, GetIndexedFieldExpr};
 use datafusion::physical_plan::windows::create_window_expr;
 use datafusion::physical_plan::{
     functions, ColumnStatistics, Partitioning, PhysicalExpr, Statistics, WindowExpr,
 };
-use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
+use datafusion_common::config::{
+    ColumnOptions, CsvOptions, FormatOptions, JsonOptions, ParquetOptions,
+    TableParquetOptions,
+};
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
-use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
-use datafusion_common::{
-    not_impl_err, DataFusionError, FileTypeWriterOptions, JoinSide, Result, ScalarValue,
-};
-
-use crate::common::proto_error;
-use crate::convert_required;
-use crate::logical_plan;
-use crate::protobuf;
-use crate::protobuf::physical_expr_node::ExprType;
+use datafusion_common::{not_impl_err, DataFusionError, JoinSide, Result, ScalarValue};
 
-use crate::logical_plan::{csv_writer_options_from_proto, writer_properties_from_proto};
 use chrono::{TimeZone, Utc};
 use object_store::path::Path;
 use object_store::ObjectMeta;
@@ -735,7 +735,10 @@ impl TryFrom<&protobuf::JsonSink> for JsonSink {
     type Error = DataFusionError;
 
     fn try_from(value: &protobuf::JsonSink) -> Result<Self, Self::Error> {
-        Ok(Self::new(convert_required!(value.config)?))
+        Ok(Self::new(
+            convert_required!(value.config)?,
+            convert_required!(value.writer_options)?,
+        ))
     }
 }
 
@@ -744,7 +747,10 @@ impl TryFrom<&protobuf::ParquetSink> for ParquetSink {
     type Error = DataFusionError;
 
     fn try_from(value: &protobuf::ParquetSink) -> Result<Self, Self::Error> {
-        Ok(Self::new(convert_required!(value.config)?))
+        Ok(Self::new(
+            convert_required!(value.config)?,
+            convert_required!(value.parquet_options)?,
+        ))
     }
 }
 
@@ -752,7 +758,10 @@ impl TryFrom<&protobuf::CsvSink> for CsvSink {
     type Error = DataFusionError;
 
     fn try_from(value: &protobuf::CsvSink) -> Result<Self, Self::Error> {
-        Ok(Self::new(convert_required!(value.config)?))
+        Ok(Self::new(
+            convert_required!(value.config)?,
+            convert_required!(value.writer_options)?,
+        ))
     }
 }
 
@@ -785,7 +794,6 @@ impl TryFrom<&protobuf::FileSinkConfig> for FileSinkConfig {
             output_schema: Arc::new(convert_required!(conf.output_schema)?),
             table_partition_cols,
             overwrite: conf.overwrite,
-            file_type_writer_options: convert_required!(conf.file_type_writer_options)?,
         })
     }
 }
@@ -814,34 +822,223 @@ impl From<CompressionTypeVariant> for protobuf::CompressionTypeVariant {
     }
 }
 
-impl TryFrom<&protobuf::FileTypeWriterOptions> for FileTypeWriterOptions {
+impl TryFrom<&protobuf::CsvWriterOptions> for CsvWriterOptions {
+    type Error = DataFusionError;
+
+    fn try_from(opts: &protobuf::CsvWriterOptions) -> Result<Self, Self::Error> {
+        let write_options = csv_writer_options_from_proto(opts)?;
+        let compression: CompressionTypeVariant = opts.compression().into();
+        Ok(CsvWriterOptions::new(write_options, compression))
+    }
+}
+
+impl TryFrom<&protobuf::JsonWriterOptions> for JsonWriterOptions {
+    type Error = DataFusionError;
+
+    fn try_from(opts: &protobuf::JsonWriterOptions) -> Result<Self, Self::Error> {
+        let compression: CompressionTypeVariant = opts.compression().into();
+        Ok(JsonWriterOptions::new(compression))
+    }
+}
+
+impl TryFrom<&protobuf::CsvOptions> for CsvOptions {
+    type Error = DataFusionError;
+
+    fn try_from(proto_opts: &protobuf::CsvOptions) -> Result<Self, Self::Error> {
+        Ok(CsvOptions {
+            has_header: proto_opts.has_header,
+            delimiter: proto_opts.delimiter[0],
+            quote: proto_opts.quote[0],
+            escape: proto_opts.escape.first().copied(),
+            compression: proto_opts.compression().into(),
+            schema_infer_max_rec: proto_opts.schema_infer_max_rec as usize,
+            date_format: (!proto_opts.date_format.is_empty())
+                .then(|| proto_opts.date_format.clone()),
+            datetime_format: (!proto_opts.datetime_format.is_empty())
+                .then(|| proto_opts.datetime_format.clone()),
+            timestamp_format: (!proto_opts.timestamp_format.is_empty())
+                .then(|| proto_opts.timestamp_format.clone()),
+            timestamp_tz_format: (!proto_opts.timestamp_tz_format.is_empty())
+                .then(|| proto_opts.timestamp_tz_format.clone()),
+            time_format: (!proto_opts.time_format.is_empty())
+                .then(|| proto_opts.time_format.clone()),
+            null_value: (!proto_opts.null_value.is_empty())
+                .then(|| proto_opts.null_value.clone()),
+        })
+    }
+}
+
+impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
     type Error = DataFusionError;
 
-    fn try_from(value: &protobuf::FileTypeWriterOptions) -> Result<Self, Self::Error> {
-        let file_type = value
-            .file_type
-            .as_ref()
-            .ok_or_else(|| proto_error("Missing required file_type field in protobuf"))?;
+    fn try_from(value: &protobuf::ParquetOptions) -> Result<Self, Self::Error> {
+        Ok(ParquetOptions {
+            enable_page_index: value.enable_page_index,
+            pruning: value.pruning,
+            skip_metadata: value.skip_metadata,
+            metadata_size_hint: value
+                .metadata_size_hint_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::parquet_options::MetadataSizeHintOpt::MetadataSizeHint(v) => Some(v as usize),
+                })
+                .unwrap_or(None),
+            pushdown_filters: value.pushdown_filters,
+            reorder_filters: value.reorder_filters,
+            data_pagesize_limit: value.data_pagesize_limit as usize,
+            write_batch_size: value.write_batch_size as usize,
+            writer_version: value.writer_version.clone(),
+            compression: value.compression_opt.clone().map(|opt| match opt {
+                protobuf::parquet_options::CompressionOpt::Compression(v) => Some(v),
+            }).unwrap_or(None),
+            dictionary_enabled: value.dictionary_enabled_opt.as_ref().map(|protobuf::parquet_options::DictionaryEnabledOpt::DictionaryEnabled(v)| *v),
+            // Continuing from where we left off in the TryFrom implementation
+            dictionary_page_size_limit: value.dictionary_page_size_limit as usize,
+            statistics_enabled: value
+                .statistics_enabled_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
+                })
+                .unwrap_or(None),
+            max_statistics_size: value
+                .max_statistics_size_opt.as_ref()
+                .map(|opt| match opt {
+                    protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(*v as usize),
+                })
+                .unwrap_or(None),
+            max_row_group_size: value.max_row_group_size as usize,
+            created_by: value.created_by.clone(),
+            column_index_truncate_length: value
+                .column_index_truncate_length_opt.as_ref()
+                .map(|opt| match opt {
+                    protobuf::parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v) => Some(*v as usize),
+                })
+                .unwrap_or(None),
+            data_page_row_count_limit: value.data_page_row_count_limit as usize,
+            encoding: value
+                .encoding_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::parquet_options::EncodingOpt::Encoding(v) => Some(v),
+                })
+                .unwrap_or(None),
+            bloom_filter_enabled: value.bloom_filter_enabled,
+            bloom_filter_fpp: value.clone()
+                .bloom_filter_fpp_opt
+                .map(|opt| match opt {
+                    protobuf::parquet_options::BloomFilterFppOpt::BloomFilterFpp(v) => Some(v),
+                })
+                .unwrap_or(None),
+            bloom_filter_ndv: value.clone()
+                .bloom_filter_ndv_opt
+                .map(|opt| match opt {
+                    protobuf::parquet_options::BloomFilterNdvOpt::BloomFilterNdv(v) => Some(v),
+                })
+                .unwrap_or(None),
+            allow_single_file_parallelism: value.allow_single_file_parallelism,
+            maximum_parallel_row_group_writers: value.maximum_parallel_row_group_writers as usize,
+            maximum_buffered_record_batches_per_stream: value.maximum_buffered_record_batches_per_stream as usize,
+
+        })
+    }
+}
+
+impl TryFrom<&protobuf::ColumnOptions> for ColumnOptions {
+    type Error = DataFusionError;
+    fn try_from(value: &protobuf::ColumnOptions) -> Result<Self, Self::Error> {
+        Ok(ColumnOptions {
+            compression: value.compression_opt.clone().map(|opt| match opt {
+                protobuf::column_options::CompressionOpt::Compression(v) => Some(v),
+            }).unwrap_or(None),
+            dictionary_enabled: value.dictionary_enabled_opt.as_ref().map(|protobuf::column_options::DictionaryEnabledOpt::DictionaryEnabled(v)| *v),
+            statistics_enabled: value
+                .statistics_enabled_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
+                })
+                .unwrap_or(None),
+            max_statistics_size: value
+                .max_statistics_size_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(v as usize),
+                })
+                .unwrap_or(None),
+            encoding: value
+                .encoding_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::column_options::EncodingOpt::Encoding(v) => Some(v),
+                })
+                .unwrap_or(None),
+            bloom_filter_enabled: value.bloom_filter_enabled_opt.clone().map(|opt| match opt {
+                protobuf::column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v) => Some(v),
+            })
+                .unwrap_or(None),
+            bloom_filter_fpp: value
+                .bloom_filter_fpp_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::column_options::BloomFilterFppOpt::BloomFilterFpp(v) => Some(v),
+                })
+                .unwrap_or(None),
+            bloom_filter_ndv: value
+                .bloom_filter_ndv_opt.clone()
+                .map(|opt| match opt {
+                    protobuf::column_options::BloomFilterNdvOpt::BloomFilterNdv(v) => Some(v),
+                })
+                .unwrap_or(None),
+        })
+    }
+}
 
-        match file_type {
-            protobuf::file_type_writer_options::FileType::ArrowOptions(_) => {
-                Ok(Self::Arrow(ArrowWriterOptions::new()))
+impl TryFrom<&protobuf::TableParquetOptions> for TableParquetOptions {
+    type Error = DataFusionError;
+    fn try_from(value: &protobuf::TableParquetOptions) -> Result<Self, Self::Error> {
+        let mut column_specific_options: HashMap<String, ColumnOptions> = HashMap::new();
+        for protobuf::ColumnSpecificOptions {
+            column_name,
+            options: maybe_options,
+        } in &value.column_specific_options
+        {
+            if let Some(options) = maybe_options {
+                column_specific_options.insert(column_name.clone(), options.try_into()?);
             }
+        }
+        Ok(TableParquetOptions {
+            global: value
+                .global
+                .as_ref()
+                .map(|v| v.try_into())
+                .unwrap()
+                .unwrap(),
+            column_specific_options,
+        })
+    }
+}
+
+impl TryFrom<&protobuf::JsonOptions> for JsonOptions {
+    type Error = DataFusionError;
+
+    fn try_from(proto_opts: &protobuf::JsonOptions) -> Result<Self, Self::Error> {
+        let compression: protobuf::CompressionTypeVariant = proto_opts.compression();
+        Ok(JsonOptions {
+            compression: compression.into(),
+            schema_infer_max_rec: proto_opts.schema_infer_max_rec as usize,
+        })
+    }
+}
 
-            protobuf::file_type_writer_options::FileType::JsonOptions(opts) => {
-                let compression: CompressionTypeVariant = opts.compression().into();
-                Ok(Self::JSON(JsonWriterOptions::new(compression)))
+impl TryFrom<&copy_to_node::FormatOptions> for FormatOptions {
+    type Error = DataFusionError;
+    fn try_from(value: &copy_to_node::FormatOptions) -> Result<Self, Self::Error> {
+        Ok(match value {
+            copy_to_node::FormatOptions::Csv(options) => {
+                FormatOptions::CSV(options.try_into()?)
             }
-            protobuf::file_type_writer_options::FileType::CsvOptions(opts) => {
-                let write_options = csv_writer_options_from_proto(opts)?;
-                let compression: CompressionTypeVariant = opts.compression().into();
-                Ok(Self::CSV(CsvWriterOptions::new(write_options, compression)))
+            copy_to_node::FormatOptions::Json(options) => {
+                FormatOptions::JSON(options.try_into()?)
             }
-            protobuf::file_type_writer_options::FileType::ParquetOptions(opt) => {
-                let props = opt.writer_properties.clone().unwrap_or_default();
-                let writer_properties = writer_properties_from_proto(&props)?;
-                Ok(Self::Parquet(ParquetWriterOptions::new(writer_properties)))
+            copy_to_node::FormatOptions::Parquet(options) => {
+                FormatOptions::PARQUET(options.try_into()?)
             }
-        }
+            copy_to_node::FormatOptions::Avro(_) => FormatOptions::AVRO,
+            copy_to_node::FormatOptions::Arrow(_) => FormatOptions::ARROW,
+        })
     }
 }
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 9622b8ab51d8..004948da938f 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -19,6 +19,22 @@ use std::convert::TryInto;
 use std::fmt::Debug;
 use std::sync::Arc;
 
+use self::from_proto::parse_physical_window_expr;
+
+use crate::common::{byte_to_string, proto_error, str_to_byte};
+use crate::convert_required;
+use crate::physical_plan::from_proto::{
+    parse_physical_expr, parse_physical_sort_expr, parse_physical_sort_exprs,
+    parse_protobuf_file_scan_config,
+};
+use crate::protobuf::physical_aggregate_expr_node::AggregateFunction;
+use crate::protobuf::physical_expr_node::ExprType;
+use crate::protobuf::physical_plan_node::PhysicalPlanType;
+use crate::protobuf::repartition_exec_node::PartitionMethod;
+use crate::protobuf::{
+    self, window_agg_exec_node, PhysicalPlanNode, PhysicalSortExprNodeCollection,
+};
+
 use datafusion::arrow::compute::SortOptions;
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::datasource::file_format::csv::CsvSink;
@@ -61,26 +77,10 @@ use datafusion::physical_plan::{
 };
 use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
 use datafusion_expr::ScalarUDF;
+
 use prost::bytes::BufMut;
 use prost::Message;
 
-use crate::common::str_to_byte;
-use crate::common::{byte_to_string, proto_error};
-use crate::convert_required;
-use crate::physical_plan::from_proto::{
-    parse_physical_expr, parse_physical_sort_expr, parse_physical_sort_exprs,
-    parse_protobuf_file_scan_config,
-};
-use crate::protobuf::physical_aggregate_expr_node::AggregateFunction;
-use crate::protobuf::physical_expr_node::ExprType;
-use crate::protobuf::physical_plan_node::PhysicalPlanType;
-use crate::protobuf::repartition_exec_node::PartitionMethod;
-use crate::protobuf::{
-    self, window_agg_exec_node, PhysicalPlanNode, PhysicalSortExprNodeCollection,
-};
-
-use self::from_proto::parse_physical_window_expr;
-
 pub mod from_proto;
 pub mod to_proto;
 
@@ -211,7 +211,12 @@ impl AsExecutionPlan for PhysicalPlanNode {
                         )
                     })
                     .transpose()?;
-                Ok(Arc::new(ParquetExec::new(base_config, predicate, None)))
+                Ok(Arc::new(ParquetExec::new(
+                    base_config,
+                    predicate,
+                    None,
+                    Default::default(),
+                )))
             }
             PhysicalPlanType::AvroScan(scan) => {
                 Ok(Arc::new(AvroExec::new(parse_protobuf_file_scan_config(
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index da4e87b7a853..bdb6cc668708 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -22,16 +22,15 @@ use std::{
     sync::Arc,
 };
 
-use crate::protobuf::{self, physical_window_expr_node, scalar_value::Value};
+use crate::logical_plan::csv_writer_options_to_proto;
 use crate::protobuf::{
-    physical_aggregate_expr_node, PhysicalSortExprNode, PhysicalSortExprNodeCollection,
-    ScalarValue,
+    self, copy_to_node, physical_aggregate_expr_node, physical_window_expr_node,
+    scalar_value::Value, ArrowOptions, AvroOptions, PhysicalSortExprNode,
+    PhysicalSortExprNodeCollection, ScalarValue,
 };
 
 #[cfg(feature = "parquet")]
 use datafusion::datasource::file_format::parquet::ParquetSink;
-
-use crate::logical_plan::{csv_writer_options_to_proto, writer_properties_to_proto};
 use datafusion::datasource::{
     file_format::csv::CsvSink,
     file_format::json::JsonSink,
@@ -58,16 +57,16 @@ use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindow
 use datafusion::physical_plan::{
     AggregateExpr, ColumnStatistics, PhysicalExpr, Statistics, WindowExpr,
 };
+use datafusion_common::config::{
+    ColumnOptions, CsvOptions, FormatOptions, JsonOptions, ParquetOptions,
+    TableParquetOptions,
+};
 use datafusion_common::{
-    file_options::{
-        arrow_writer::ArrowWriterOptions, avro_writer::AvroWriterOptions,
-        csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions,
-        parquet_writer::ParquetWriterOptions,
-    },
+    file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
     internal_err, not_impl_err,
     parsers::CompressionTypeVariant,
     stats::Precision,
-    DataFusionError, FileTypeWriterOptions, JoinSide, Result,
+    DataFusionError, JoinSide, Result,
 };
 
 impl TryFrom<Arc<dyn AggregateExpr>> for protobuf::PhysicalExprNode {
@@ -821,6 +820,7 @@ impl TryFrom<&JsonSink> for protobuf::JsonSink {
     fn try_from(value: &JsonSink) -> Result<Self, Self::Error> {
         Ok(Self {
             config: Some(value.config().try_into()?),
+            writer_options: Some(value.writer_options().try_into()?),
         })
     }
 }
@@ -831,6 +831,7 @@ impl TryFrom<&CsvSink> for protobuf::CsvSink {
     fn try_from(value: &CsvSink) -> Result<Self, Self::Error> {
         Ok(Self {
             config: Some(value.config().try_into()?),
+            writer_options: Some(value.writer_options().try_into()?),
         })
     }
 }
@@ -842,6 +843,7 @@ impl TryFrom<&ParquetSink> for protobuf::ParquetSink {
     fn try_from(value: &ParquetSink) -> Result<Self, Self::Error> {
         Ok(Self {
             config: Some(value.config().try_into()?),
+            parquet_options: Some(value.parquet_options().try_into()?),
         })
     }
 }
@@ -870,7 +872,6 @@ impl TryFrom<&FileSinkConfig> for protobuf::FileSinkConfig {
                 })
             })
             .collect::<Result<Vec<_>>>()?;
-        let file_type_writer_options = &conf.file_type_writer_options;
         Ok(Self {
             object_store_url: conf.object_store_url.to_string(),
             file_groups,
@@ -878,7 +879,6 @@ impl TryFrom<&FileSinkConfig> for protobuf::FileSinkConfig {
             output_schema: Some(conf.output_schema.as_ref().try_into()?),
             table_partition_cols,
             overwrite: conf.overwrite,
-            file_type_writer_options: Some(file_type_writer_options.try_into()?),
         })
     }
 }
@@ -895,44 +895,169 @@ impl From<&CompressionTypeVariant> for protobuf::CompressionTypeVariant {
     }
 }
 
-impl TryFrom<&FileTypeWriterOptions> for protobuf::FileTypeWriterOptions {
+impl TryFrom<&CsvWriterOptions> for protobuf::CsvWriterOptions {
     type Error = DataFusionError;
 
-    fn try_from(opts: &FileTypeWriterOptions) -> Result<Self, Self::Error> {
-        let file_type = match opts {
-            #[cfg(feature = "parquet")]
-            FileTypeWriterOptions::Parquet(ParquetWriterOptions { writer_options }) => {
-                protobuf::file_type_writer_options::FileType::ParquetOptions(
-                    protobuf::ParquetWriterOptions {
-                        writer_properties: Some(writer_properties_to_proto(
-                            writer_options,
-                        )),
-                    },
-                )
-            }
-            FileTypeWriterOptions::CSV(CsvWriterOptions {
-                writer_options,
-                compression,
-            }) => protobuf::file_type_writer_options::FileType::CsvOptions(
-                csv_writer_options_to_proto(writer_options, compression),
-            ),
-            FileTypeWriterOptions::JSON(JsonWriterOptions { compression }) => {
-                let compression: protobuf::CompressionTypeVariant = compression.into();
-                protobuf::file_type_writer_options::FileType::JsonOptions(
-                    protobuf::JsonWriterOptions {
-                        compression: compression.into(),
-                    },
+    fn try_from(opts: &CsvWriterOptions) -> Result<Self, Self::Error> {
+        Ok(csv_writer_options_to_proto(
+            &opts.writer_options,
+            &opts.compression,
+        ))
+    }
+}
+
+impl TryFrom<&JsonWriterOptions> for protobuf::JsonWriterOptions {
+    type Error = DataFusionError;
+
+    fn try_from(opts: &JsonWriterOptions) -> Result<Self, Self::Error> {
+        let compression: protobuf::CompressionTypeVariant = opts.compression.into();
+        Ok(protobuf::JsonWriterOptions {
+            compression: compression.into(),
+        })
+    }
+}
+
+impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
+    type Error = DataFusionError;
+
+    fn try_from(value: &ParquetOptions) -> Result<Self, Self::Error> {
+        Ok(protobuf::ParquetOptions {
+            enable_page_index: value.enable_page_index,
+            pruning: value.pruning,
+            skip_metadata: value.skip_metadata,
+            metadata_size_hint_opt: value.metadata_size_hint.map(|v| protobuf::parquet_options::MetadataSizeHintOpt::MetadataSizeHint(v as u64)),
+            pushdown_filters: value.pushdown_filters,
+            reorder_filters: value.reorder_filters,
+            data_pagesize_limit: value.data_pagesize_limit as u64,
+            write_batch_size: value.write_batch_size as u64,
+            writer_version: value.writer_version.clone(),
+            compression_opt: value.compression.clone().map(protobuf::parquet_options::CompressionOpt::Compression),
+            dictionary_enabled_opt: value.dictionary_enabled.map(protobuf::parquet_options::DictionaryEnabledOpt::DictionaryEnabled),
+            dictionary_page_size_limit: value.dictionary_page_size_limit as u64,
+            statistics_enabled_opt: value.statistics_enabled.clone().map(protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled),
+            max_statistics_size_opt: value.max_statistics_size.map(|v| protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v as u64)),
+            max_row_group_size: value.max_row_group_size as u64,
+            created_by: value.created_by.clone(),
+            column_index_truncate_length_opt: value.column_index_truncate_length.map(|v| protobuf::parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v as u64)),
+            data_page_row_count_limit: value.data_page_row_count_limit as u64,
+            encoding_opt: value.encoding.clone().map(protobuf::parquet_options::EncodingOpt::Encoding),
+            bloom_filter_enabled: value.bloom_filter_enabled,
+            bloom_filter_fpp_opt: value.bloom_filter_fpp.map(protobuf::parquet_options::BloomFilterFppOpt::BloomFilterFpp),
+            bloom_filter_ndv_opt: value.bloom_filter_ndv.map(protobuf::parquet_options::BloomFilterNdvOpt::BloomFilterNdv),
+            allow_single_file_parallelism: value.allow_single_file_parallelism,
+            maximum_parallel_row_group_writers: value.maximum_parallel_row_group_writers as u64,
+            maximum_buffered_record_batches_per_stream: value.maximum_buffered_record_batches_per_stream as u64,
+        })
+    }
+}
+
+impl TryFrom<&ColumnOptions> for protobuf::ColumnOptions {
+    type Error = DataFusionError;
+
+    fn try_from(value: &ColumnOptions) -> Result<Self, Self::Error> {
+        Ok(protobuf::ColumnOptions {
+            compression_opt: value
+                .compression
+                .clone()
+                .map(protobuf::column_options::CompressionOpt::Compression),
+            dictionary_enabled_opt: value
+                .dictionary_enabled
+                .map(protobuf::column_options::DictionaryEnabledOpt::DictionaryEnabled),
+            statistics_enabled_opt: value
+                .statistics_enabled
+                .clone()
+                .map(protobuf::column_options::StatisticsEnabledOpt::StatisticsEnabled),
+            max_statistics_size_opt: value.max_statistics_size.map(|v| {
+                protobuf::column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(
+                    v as u32,
                 )
+            }),
+            encoding_opt: value
+                .encoding
+                .clone()
+                .map(protobuf::column_options::EncodingOpt::Encoding),
+            bloom_filter_enabled_opt: value
+                .bloom_filter_enabled
+                .map(protobuf::column_options::BloomFilterEnabledOpt::BloomFilterEnabled),
+            bloom_filter_fpp_opt: value
+                .bloom_filter_fpp
+                .map(protobuf::column_options::BloomFilterFppOpt::BloomFilterFpp),
+            bloom_filter_ndv_opt: value
+                .bloom_filter_ndv
+                .map(protobuf::column_options::BloomFilterNdvOpt::BloomFilterNdv),
+        })
+    }
+}
+
+impl TryFrom<&TableParquetOptions> for protobuf::TableParquetOptions {
+    type Error = DataFusionError;
+    fn try_from(value: &TableParquetOptions) -> Result<Self, Self::Error> {
+        let column_specific_options = value
+            .column_specific_options
+            .iter()
+            .map(|(k, v)| {
+                Ok(protobuf::ColumnSpecificOptions {
+                    column_name: k.into(),
+                    options: Some(v.try_into()?),
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(protobuf::TableParquetOptions {
+            global: Some((&value.global).try_into()?),
+            column_specific_options,
+        })
+    }
+}
+
+impl TryFrom<&CsvOptions> for protobuf::CsvOptions {
+    type Error = DataFusionError; // Define or use an appropriate error type
+
+    fn try_from(opts: &CsvOptions) -> Result<Self, Self::Error> {
+        let compression: protobuf::CompressionTypeVariant = opts.compression.into();
+        Ok(protobuf::CsvOptions {
+            has_header: opts.has_header,
+            delimiter: vec![opts.delimiter],
+            quote: vec![opts.quote],
+            escape: opts.escape.map_or_else(Vec::new, |e| vec![e]),
+            compression: compression.into(),
+            schema_infer_max_rec: opts.schema_infer_max_rec as u64,
+            date_format: opts.date_format.clone().unwrap_or_default(),
+            datetime_format: opts.datetime_format.clone().unwrap_or_default(),
+            timestamp_format: opts.timestamp_format.clone().unwrap_or_default(),
+            timestamp_tz_format: opts.timestamp_tz_format.clone().unwrap_or_default(),
+            time_format: opts.time_format.clone().unwrap_or_default(),
+            null_value: opts.null_value.clone().unwrap_or_default(),
+        })
+    }
+}
+
+impl TryFrom<&JsonOptions> for protobuf::JsonOptions {
+    type Error = DataFusionError;
+
+    fn try_from(opts: &JsonOptions) -> Result<Self, Self::Error> {
+        let compression: protobuf::CompressionTypeVariant = opts.compression.into();
+        Ok(protobuf::JsonOptions {
+            compression: compression.into(),
+            schema_infer_max_rec: opts.schema_infer_max_rec as u64,
+        })
+    }
+}
+
+impl TryFrom<&FormatOptions> for copy_to_node::FormatOptions {
+    type Error = DataFusionError;
+    fn try_from(value: &FormatOptions) -> std::result::Result<Self, Self::Error> {
+        Ok(match value {
+            FormatOptions::CSV(options) => {
+                copy_to_node::FormatOptions::Csv(options.try_into()?)
             }
-            FileTypeWriterOptions::Avro(AvroWriterOptions {}) => {
-                return not_impl_err!("Avro file sink protobuf serialization")
+            FormatOptions::JSON(options) => {
+                copy_to_node::FormatOptions::Json(options.try_into()?)
             }
-            FileTypeWriterOptions::Arrow(ArrowWriterOptions {}) => {
-                return not_impl_err!("Arrow file sink protobuf serialization")
+            FormatOptions::PARQUET(options) => {
+                copy_to_node::FormatOptions::Parquet(options.try_into()?)
             }
-        };
-        Ok(Self {
-            file_type: Some(file_type),
+            FormatOptions::AVRO => copy_to_node::FormatOptions::Avro(AvroOptions {}),
+            FormatOptions::ARROW => copy_to_node::FormatOptions::Arrow(ArrowOptions {}),
         })
     }
 }
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index fb9f2967553f..2c8cf07e9eff 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -21,33 +21,23 @@ use std::fmt::{self, Debug, Formatter};
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, FixedSizeListArray};
-use arrow::csv::WriterBuilder;
 use arrow::datatypes::{
     DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
     IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields, UnionMode,
 };
-
-use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
-use datafusion_expr::{ScalarUDF, ScalarUDFImpl};
-use datafusion_proto::logical_plan::to_proto::serialize_expr;
-use prost::Message;
-
 use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::TableProvider;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
-use datafusion::parquet::file::properties::{WriterProperties, WriterVersion};
 use datafusion::prelude::*;
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
-use datafusion_common::file_options::csv_writer::CsvWriterOptions;
-use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
-use datafusion_common::file_options::StatementOptions;
-use datafusion_common::parsers::CompressionTypeVariant;
+use datafusion_common::config::{FormatOptions, TableOptions};
 use datafusion_common::scalar::ScalarStructBuilder;
-use datafusion_common::{internal_err, not_impl_err, plan_err, FileTypeWriterOptions};
-use datafusion_common::{DFField, DFSchema, DFSchemaRef, DataFusionError, ScalarValue};
-use datafusion_common::{FileType, Result};
-use datafusion_expr::dml::{CopyOptions, CopyTo};
+use datafusion_common::{
+    internal_err, not_impl_err, plan_err, DFField, DFSchema, DFSchemaRef,
+    DataFusionError, Result, ScalarValue,
+};
+use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
     self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction,
     Sort, Unnest,
@@ -57,17 +47,21 @@ use datafusion_expr::{
     col, create_udaf, lit, Accumulator, AggregateFunction,
     BuiltinScalarFunction::{Sqrt, Substr},
     ColumnarValue, Expr, ExprSchemable, LogicalPlan, Operator, PartitionEvaluator,
-    Signature, TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
-    WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
+    ScalarUDF, ScalarUDFImpl, Signature, TryCast, Volatility, WindowFrame,
+    WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, WindowUDF,
+    WindowUDFImpl,
 };
 use datafusion_proto::bytes::{
     logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec,
     logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
 };
+use datafusion_proto::logical_plan::to_proto::serialize_expr;
 use datafusion_proto::logical_plan::LogicalExtensionCodec;
 use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
 use datafusion_proto::protobuf;
 
+use prost::Message;
+
 #[cfg(feature = "json")]
 fn roundtrip_json_test(proto: &protobuf::LogicalExprNode) {
     let string = serde_json::to_string(proto).unwrap();
@@ -321,15 +315,16 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> {
 
     let input = create_csv_scan(&ctx).await?;
 
-    let mut options = HashMap::new();
-    options.insert("foo".to_string(), "bar".to_string());
+    let mut table_options =
+        TableOptions::default_from_session_config(ctx.state().config_options());
+    table_options.set("csv.delimiter", ";")?;
 
     let plan = LogicalPlan::Copy(CopyTo {
         input: Arc::new(input),
         output_url: "test.csv".to_string(),
-        file_format: FileType::CSV,
         partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        copy_options: CopyOptions::SQLOptions(StatementOptions::from(&options)),
+        format_options: FormatOptions::CSV(table_options.csv.clone()),
+        options: Default::default(),
     });
 
     let bytes = logical_plan_to_bytes(&plan)?;
@@ -345,24 +340,25 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
 
     let input = create_csv_scan(&ctx).await?;
 
-    let writer_properties = WriterProperties::builder()
-        .set_bloom_filter_enabled(true)
-        .set_created_by("DataFusion Test".to_string())
-        .set_writer_version(WriterVersion::PARQUET_2_0)
-        .set_write_batch_size(111)
-        .set_data_page_size_limit(222)
-        .set_data_page_row_count_limit(333)
-        .set_dictionary_page_size_limit(444)
-        .set_max_row_group_size(555)
-        .build();
+    let table_options =
+        TableOptions::default_from_session_config(ctx.state().config_options());
+    let mut parquet_format = table_options.parquet;
+
+    parquet_format.global.bloom_filter_enabled = true;
+    parquet_format.global.created_by = "DataFusion Test".to_string();
+    parquet_format.global.writer_version = "PARQUET_2_0".to_string();
+    parquet_format.global.write_batch_size = 111;
+    parquet_format.global.data_pagesize_limit = 222;
+    parquet_format.global.data_page_row_count_limit = 333;
+    parquet_format.global.dictionary_page_size_limit = 444;
+    parquet_format.global.max_row_group_size = 555;
+
     let plan = LogicalPlan::Copy(CopyTo {
         input: Arc::new(input),
         output_url: "test.parquet".to_string(),
-        file_format: FileType::PARQUET,
+        format_options: FormatOptions::PARQUET(parquet_format.clone()),
         partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        copy_options: CopyOptions::WriterOptions(Box::new(
-            FileTypeWriterOptions::Parquet(ParquetWriterOptions::new(writer_properties)),
-        )),
+        options: Default::default(),
     });
 
     let bytes = logical_plan_to_bytes(&plan)?;
@@ -372,27 +368,11 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.parquet", copy_to.output_url);
-            assert_eq!(FileType::PARQUET, copy_to.file_format);
             assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
-            match &copy_to.copy_options {
-                CopyOptions::WriterOptions(y) => match y.as_ref() {
-                    FileTypeWriterOptions::Parquet(p) => {
-                        let props = &p.writer_options;
-                        assert_eq!("DataFusion Test", props.created_by());
-                        assert_eq!(
-                            "PARQUET_2_0",
-                            format!("{:?}", props.writer_version())
-                        );
-                        assert_eq!(111, props.write_batch_size());
-                        assert_eq!(222, props.data_page_size_limit());
-                        assert_eq!(333, props.data_page_row_count_limit());
-                        assert_eq!(444, props.dictionary_page_size_limit());
-                        assert_eq!(555, props.max_row_group_size());
-                    }
-                    _ => panic!(),
-                },
-                _ => panic!(),
-            }
+            assert_eq!(
+                copy_to.format_options,
+                FormatOptions::PARQUET(parquet_format)
+            );
         }
         _ => panic!(),
     }
@@ -408,11 +388,9 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
     let plan = LogicalPlan::Copy(CopyTo {
         input: Arc::new(input),
         output_url: "test.arrow".to_string(),
-        file_format: FileType::ARROW,
         partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::Arrow(
-            ArrowWriterOptions::new(),
-        ))),
+        format_options: FormatOptions::ARROW,
+        options: Default::default(),
     });
 
     let bytes = logical_plan_to_bytes(&plan)?;
@@ -422,15 +400,8 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.arrow", copy_to.output_url);
-            assert_eq!(FileType::ARROW, copy_to.file_format);
+            assert_eq!(FormatOptions::ARROW, copy_to.format_options);
             assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
-            match &copy_to.copy_options {
-                CopyOptions::WriterOptions(y) => match y.as_ref() {
-                    FileTypeWriterOptions::Arrow(_) => {}
-                    _ => panic!(),
-                },
-                _ => panic!(),
-            }
         }
         _ => panic!(),
     }
@@ -444,25 +415,23 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
 
     let input = create_csv_scan(&ctx).await?;
 
-    let writer_properties = WriterBuilder::new()
-        .with_delimiter(b'*')
-        .with_date_format("dd/MM/yyyy".to_string())
-        .with_datetime_format("dd/MM/yyyy HH:mm:ss".to_string())
-        .with_timestamp_format("HH:mm:ss.SSSSSS".to_string())
-        .with_time_format("HH:mm:ss".to_string())
-        .with_null("NIL".to_string());
+    let table_options =
+        TableOptions::default_from_session_config(ctx.state().config_options());
+    let mut csv_format = table_options.csv;
+
+    csv_format.delimiter = b'*';
+    csv_format.date_format = Some("dd/MM/yyyy".to_string());
+    csv_format.datetime_format = Some("dd/MM/yyyy HH:mm:ss".to_string());
+    csv_format.timestamp_format = Some("HH:mm:ss.SSSSSS".to_string());
+    csv_format.time_format = Some("HH:mm:ss".to_string());
+    csv_format.null_value = Some("NIL".to_string());
 
     let plan = LogicalPlan::Copy(CopyTo {
         input: Arc::new(input),
         output_url: "test.csv".to_string(),
-        file_format: FileType::CSV,
         partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::CSV(
-            CsvWriterOptions::new(
-                writer_properties,
-                CompressionTypeVariant::UNCOMPRESSED,
-            ),
-        ))),
+        format_options: FormatOptions::CSV(csv_format.clone()),
+        options: Default::default(),
     });
 
     let bytes = logical_plan_to_bytes(&plan)?;
@@ -472,26 +441,8 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.csv", copy_to.output_url);
-            assert_eq!(FileType::CSV, copy_to.file_format);
+            assert_eq!(FormatOptions::CSV(csv_format), copy_to.format_options);
             assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
-            match &copy_to.copy_options {
-                CopyOptions::WriterOptions(y) => match y.as_ref() {
-                    FileTypeWriterOptions::CSV(p) => {
-                        let props = &p.writer_options;
-                        assert_eq!(b'*', props.delimiter());
-                        assert_eq!("dd/MM/yyyy", props.date_format().unwrap());
-                        assert_eq!(
-                            "dd/MM/yyyy HH:mm:ss",
-                            props.datetime_format().unwrap()
-                        );
-                        assert_eq!("HH:mm:ss.SSSSSS", props.timestamp_format().unwrap());
-                        assert_eq!("HH:mm:ss", props.time_format().unwrap());
-                        assert_eq!("NIL", props.null());
-                    }
-                    _ => panic!(),
-                },
-                _ => panic!(),
-            }
         }
         _ => panic!(),
     }
@@ -582,24 +533,39 @@ async fn roundtrip_expr_api() -> Result<()> {
     let expr_list = vec![
         encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")),
         decode(lit("1234"), lit("hex")),
-        array_to_string(array(vec![lit(1), lit(2), lit(3)]), lit(",")),
-        array_dims(array(vec![lit(1), lit(2), lit(3)])),
-        array_ndims(array(vec![lit(1), lit(2), lit(3)])),
-        cardinality(array(vec![lit(1), lit(2), lit(3)])),
+        array_to_string(make_array(vec![lit(1), lit(2), lit(3)]), lit(",")),
+        array_dims(make_array(vec![lit(1), lit(2), lit(3)])),
+        array_ndims(make_array(vec![lit(1), lit(2), lit(3)])),
+        cardinality(make_array(vec![lit(1), lit(2), lit(3)])),
+        string_to_array(lit("abc#def#ghl"), lit("#"), lit(",")),
         range(lit(1), lit(10), lit(2)),
         gen_series(lit(1), lit(10), lit(2)),
-        array_has(array(vec![lit(1), lit(2), lit(3)]), lit(1)),
+        array_append(make_array(vec![lit(1), lit(2), lit(3)]), lit(4)),
+        array_prepend(lit(1), make_array(vec![lit(2), lit(3), lit(4)])),
+        array_concat(vec![
+            make_array(vec![lit(1), lit(2)]),
+            make_array(vec![lit(3), lit(4)]),
+        ]),
+        make_array(vec![lit(1), lit(2), lit(3)]),
+        array_has(make_array(vec![lit(1), lit(2), lit(3)]), lit(1)),
         array_has_all(
-            array(vec![lit(1), lit(2), lit(3)]),
-            array(vec![lit(1), lit(2)]),
+            make_array(vec![lit(1), lit(2), lit(3)]),
+            make_array(vec![lit(1), lit(2)]),
         ),
         array_has_any(
-            array(vec![lit(1), lit(2), lit(3)]),
-            array(vec![lit(1), lit(4)]),
+            make_array(vec![lit(1), lit(2), lit(3)]),
+            make_array(vec![lit(1), lit(4)]),
+        ),
+        array_empty(make_array(vec![lit(1), lit(2), lit(3)])),
+        array_length(make_array(vec![lit(1), lit(2), lit(3)])),
+        array_repeat(lit(1), lit(3)),
+        flatten(make_array(vec![lit(1), lit(2), lit(3)])),
+        array_sort(
+            make_array(vec![lit(3), lit(4), lit(1), lit(2)]),
+            lit("desc"),
+            lit("NULLS LAST"),
         ),
-        array_empty(array(vec![lit(1), lit(2), lit(3)])),
-        array_length(array(vec![lit(1), lit(2), lit(3)])),
-        flatten(array(vec![lit(1), lit(2), lit(3)])),
+        array_distinct(make_array(vec![lit(1), lit(3), lit(3), lit(2), lit(2)])),
     ];
 
     // ensure expressions created with the expr api can be round tripped
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index a3c0b3eccd3c..3441a9f7fa11 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::ops::Deref;
+use std::sync::Arc;
+use std::vec;
+
 use arrow::csv::WriterBuilder;
 use datafusion::arrow::array::ArrayRef;
 use datafusion::arrow::compute::kernels::sort::SortOptions;
@@ -32,7 +36,6 @@ use datafusion::execution::context::ExecutionProps;
 use datafusion::logical_expr::{
     create_udf, BuiltinScalarFunction, JoinType, Operator, Volatility,
 };
-use datafusion::parquet::file::properties::WriterProperties;
 use datafusion::physical_expr::expressions::NthValueAgg;
 use datafusion::physical_expr::window::SlidingAggregateWindowExpr;
 use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr};
@@ -66,21 +69,18 @@ use datafusion::physical_plan::{
 };
 use datafusion::prelude::SessionContext;
 use datafusion::scalar::ScalarValue;
+use datafusion_common::config::TableParquetOptions;
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
-use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
-use datafusion_common::{FileTypeWriterOptions, Result};
+use datafusion_common::Result;
 use datafusion_expr::{
     Accumulator, AccumulatorFactoryFunction, AggregateUDF, ColumnarValue, Signature,
     SimpleAggregateUDF, WindowFrame, WindowFrameBound,
 };
 use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec};
 use datafusion_proto::protobuf;
-use std::ops::Deref;
-use std::sync::Arc;
-use std::vec;
 
 /// Perform a serde roundtrip and assert that the string representation of the before and after plans
 /// are identical. Note that this often isn't sufficient to guarantee that no information is
@@ -271,6 +271,7 @@ fn roundtrip_window() -> Result<()> {
             "FIRST_VALUE(a) PARTITION BY [b] ORDER BY [a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW",
             col("a", &schema)?,
             DataType::Int64,
+            false,
         )),
         &[col("b", &schema)?],
         &[PhysicalSortExpr {
@@ -560,6 +561,7 @@ fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> {
         scan_config,
         Some(predicate),
         None,
+        Default::default(),
     )))
 }
 
@@ -586,7 +588,12 @@ async fn roundtrip_parquet_exec_with_table_partition_cols() -> Result<()> {
         output_ordering: vec![],
     };
 
-    roundtrip_test(Arc::new(ParquetExec::new(scan_config, None, None)))
+    roundtrip_test(Arc::new(ParquetExec::new(
+        scan_config,
+        None,
+        None,
+        Default::default(),
+    )))
 }
 
 #[test]
@@ -764,11 +771,11 @@ fn roundtrip_json_sink() -> Result<()> {
         output_schema: schema.clone(),
         table_partition_cols: vec![("plan_type".to_string(), DataType::Utf8)],
         overwrite: true,
-        file_type_writer_options: FileTypeWriterOptions::JSON(JsonWriterOptions::new(
-            CompressionTypeVariant::UNCOMPRESSED,
-        )),
     };
-    let data_sink = Arc::new(JsonSink::new(file_sink_config));
+    let data_sink = Arc::new(JsonSink::new(
+        file_sink_config,
+        JsonWriterOptions::new(CompressionTypeVariant::UNCOMPRESSED),
+    ));
     let sort_order = vec![PhysicalSortRequirement::new(
         Arc::new(Column::new("plan_type", 0)),
         Some(SortOptions {
@@ -799,12 +806,11 @@ fn roundtrip_csv_sink() -> Result<()> {
         output_schema: schema.clone(),
         table_partition_cols: vec![("plan_type".to_string(), DataType::Utf8)],
         overwrite: true,
-        file_type_writer_options: FileTypeWriterOptions::CSV(CsvWriterOptions::new(
-            WriterBuilder::default(),
-            CompressionTypeVariant::ZSTD,
-        )),
     };
-    let data_sink = Arc::new(CsvSink::new(file_sink_config));
+    let data_sink = Arc::new(CsvSink::new(
+        file_sink_config,
+        CsvWriterOptions::new(WriterBuilder::default(), CompressionTypeVariant::ZSTD),
+    ));
     let sort_order = vec![PhysicalSortRequirement::new(
         Arc::new(Column::new("plan_type", 0)),
         Some(SortOptions {
@@ -832,12 +838,7 @@ fn roundtrip_csv_sink() -> Result<()> {
         .unwrap();
     assert_eq!(
         CompressionTypeVariant::ZSTD,
-        csv_sink
-            .config()
-            .file_type_writer_options
-            .try_into_csv()
-            .unwrap()
-            .compression
+        csv_sink.writer_options().compression
     );
 
     Ok(())
@@ -857,11 +858,11 @@ fn roundtrip_parquet_sink() -> Result<()> {
         output_schema: schema.clone(),
         table_partition_cols: vec![("plan_type".to_string(), DataType::Utf8)],
         overwrite: true,
-        file_type_writer_options: FileTypeWriterOptions::Parquet(
-            ParquetWriterOptions::new(WriterProperties::default()),
-        ),
     };
-    let data_sink = Arc::new(ParquetSink::new(file_sink_config));
+    let data_sink = Arc::new(ParquetSink::new(
+        file_sink_config,
+        TableParquetOptions::default(),
+    ));
     let sort_order = vec![PhysicalSortRequirement::new(
         Arc::new(Column::new("plan_type", 0)),
         Some(SortOptions {
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index e838a4cafb2a..d45a195cb653 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -271,8 +271,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             ),
 
             SQLExpr::Cast {
-                expr, data_type, ..
+                expr,
+                data_type,
+                format,
             } => {
+                if let Some(format) = format {
+                    return not_impl_err!("CAST with format is not supported: {format}");
+                }
+
                 let dt = self.convert_data_type(&data_type)?;
                 let expr =
                     self.sql_expr_to_logical_expr(*expr, schema, planner_context)?;
@@ -295,15 +301,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
 
             SQLExpr::TryCast {
-                expr, data_type, ..
-            } => Ok(Expr::TryCast(TryCast::new(
-                Box::new(self.sql_expr_to_logical_expr(
-                    *expr,
-                    schema,
-                    planner_context,
-                )?),
-                self.convert_data_type(&data_type)?,
-            ))),
+                expr,
+                data_type,
+                format,
+            } => {
+                if let Some(format) = format {
+                    return not_impl_err!("CAST with format is not supported: {format}");
+                }
+
+                Ok(Expr::TryCast(TryCast::new(
+                    Box::new(self.sql_expr_to_logical_expr(
+                        *expr,
+                        schema,
+                        planner_context,
+                    )?),
+                    self.convert_data_type(&data_type)?,
+                )))
+            }
 
             SQLExpr::TypedString { data_type, value } => Ok(Expr::Cast(Cast::new(
                 Box::new(lit(value)),
@@ -478,7 +492,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 trim_where,
                 trim_what,
                 trim_characters,
-                ..
             } => self.sql_trim_to_expr(
                 *expr,
                 trim_where,
@@ -583,8 +596,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 self.sql_expr_to_logical_expr(value, input_schema, planner_context)
             })
             .collect::<Result<Vec<_>>>()?;
-        Ok(Expr::ScalarFunction(ScalarFunction::new(
-            BuiltinScalarFunction::Struct,
+        let struct_func = self
+            .context_provider
+            .get_function_meta("struct")
+            .ok_or_else(|| {
+                internal_datafusion_err!("Unable to find expected 'struct' function")
+            })?;
+        Ok(Expr::ScalarFunction(ScalarFunction::new_udf(
+            struct_func,
             args,
         )))
     }
@@ -801,7 +820,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 distinct,
                 order_by,
                 null_treatment,
-                ..
+                filter: None, // filter is passed in
             }) => Ok(Expr::AggregateFunction(expr::AggregateFunction::new(
                 fun,
                 args,
@@ -814,7 +833,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 order_by,
                 null_treatment,
             ))),
-            _ => plan_err!(
+            Expr::AggregateFunction(..) => {
+                internal_err!("Expected null filter clause in aggregate function")
+            }
+            _ => internal_err!(
                 "AggregateExpressionWithFilter expression was not an AggregateFunction"
             ),
         }
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 15524b9ffab1..8d19b32b8e40 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -22,9 +22,7 @@ use arrow_schema::DataType;
 use datafusion_common::{
     not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
 };
-use datafusion_expr::expr::ScalarFunction;
-use datafusion_expr::expr::{BinaryExpr, Placeholder};
-use datafusion_expr::BuiltinScalarFunction;
+use datafusion_expr::expr::{BinaryExpr, Placeholder, ScalarFunction};
 use datafusion_expr::{lit, Expr, Operator};
 use log::debug;
 use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, Value};
@@ -143,10 +141,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             })
             .collect::<Result<Vec<_>>>()?;
 
-        Ok(Expr::ScalarFunction(ScalarFunction::new(
-            BuiltinScalarFunction::MakeArray,
-            values,
-        )))
+        if let Some(udf) = self.context_provider.get_function_meta("make_array") {
+            Ok(Expr::ScalarFunction(ScalarFunction::new_udf(udf, values)))
+        } else {
+            not_impl_err!(
+                "array_expression featrue is disable, So should implement make_array UDF by yourself"
+            )
+        }
     }
 
     /// Convert a SQL interval expression to a DataFusion logical plan
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index d805f61397e9..da66ee197adb 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -36,6 +36,7 @@ mod relation;
 mod select;
 mod set_expr;
 mod statement;
+pub mod unparser;
 pub mod utils;
 mod values;
 
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 35063a6cfa06..412c3b753ed5 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -16,6 +16,8 @@
 // under the License.
 
 use std::collections::{BTreeMap, HashMap, HashSet};
+use std::path::Path;
+use std::str::FromStr;
 use std::sync::Arc;
 
 use crate::parser::{
@@ -28,15 +30,14 @@ use crate::planner::{
 use crate::utils::normalize_ident;
 
 use arrow_schema::DataType;
-use datafusion_common::file_options::StatementOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
     exec_err, not_impl_err, plan_datafusion_err, plan_err, schema_err,
     unqualified_field_not_found, Column, Constraints, DFField, DFSchema, DFSchemaRef,
-    DataFusionError, OwnedTableReference, Result, ScalarValue, SchemaError,
+    DataFusionError, FileType, OwnedTableReference, Result, ScalarValue, SchemaError,
     SchemaReference, TableReference, ToDFSchema,
 };
-use datafusion_expr::dml::{CopyOptions, CopyTo};
+use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check;
 use datafusion_expr::logical_plan::builder::project;
 use datafusion_expr::logical_plan::DdlStatement;
@@ -829,25 +830,37 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         };
 
-        // TODO, parse options as Vec<(String, String)> to avoid this conversion
-        let options = statement
-            .options
-            .iter()
-            .map(|(s, v)| (s.to_owned(), v.to_string()))
-            .collect::<Vec<(String, String)>>();
-
-        let mut statement_options = StatementOptions::new(options);
-        let file_format = statement_options.try_infer_file_type(&statement.target)?;
-        let partition_by = statement_options.take_partition_by();
+        let mut options = HashMap::new();
+        for (key, value) in statement.options {
+            let value_string = match value {
+                Value::SingleQuotedString(s) => s.to_string(),
+                Value::DollarQuotedString(s) => s.to_string(),
+                Value::UnQuotedString(s) => s.to_string(),
+                Value::Number(_, _) | Value::Boolean(_) => value.to_string(),
+                Value::DoubleQuotedString(_)
+                | Value::EscapedStringLiteral(_)
+                | Value::NationalStringLiteral(_)
+                | Value::SingleQuotedByteStringLiteral(_)
+                | Value::DoubleQuotedByteStringLiteral(_)
+                | Value::RawStringLiteral(_)
+                | Value::HexStringLiteral(_)
+                | Value::Null
+                | Value::Placeholder(_) => {
+                    return plan_err!("Unsupported Value in COPY statement {}", value);
+                }
+            };
+            options.insert(key.to_lowercase(), value_string.to_lowercase());
+        }
 
-        let copy_options = CopyOptions::SQLOptions(statement_options);
+        let file_type = try_infer_file_type(&mut options, &statement.target)?;
+        let partition_by = take_partition_by(&mut options);
 
         Ok(LogicalPlan::Copy(CopyTo {
             input: Arc::new(input),
             output_url: statement.target,
-            file_format,
+            format_options: file_type.into(),
             partition_by,
-            copy_options,
+            options,
         }))
     }
 
@@ -1456,3 +1469,82 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .is_ok()
     }
 }
+
+/// Infers the file type for a given target based on provided options or file extension.
+///
+/// This function tries to determine the file type based on the 'format' option present
+/// in the provided options hashmap. If 'format' is not explicitly set, the function attempts
+/// to infer the file type from the file extension of the target. It returns an error if neither
+/// the format option is set nor the file extension can be determined or parsed.
+///
+/// # Arguments
+///
+/// * `options` - A mutable reference to a HashMap containing options where the file format
+/// might be specified under the 'format' key.
+/// * `target` - A string slice representing the path to the file for which the file type needs to be inferred.
+///
+/// # Returns
+///
+/// Returns `Result<FileType>` which is Ok if the file type could be successfully inferred,
+/// otherwise returns an error in case of failure to determine or parse the file format or extension.
+///
+/// # Errors
+///
+/// This function returns an error in two cases:
+/// - If the 'format' option is not set and the file extension cannot be retrieved from `target`.
+/// - If the file extension is found but cannot be converted into a valid string.
+///
+pub fn try_infer_file_type(
+    options: &mut HashMap<String, String>,
+    target: &str,
+) -> Result<FileType> {
+    let explicit_format = options.remove("format");
+    let format = match explicit_format {
+        Some(s) => FileType::from_str(&s),
+        None => {
+            // try to infer file format from file extension
+            let extension: &str = &Path::new(target)
+                .extension()
+                .ok_or(DataFusionError::Configuration(
+                    "Format not explicitly set and unable to get file extension!"
+                        .to_string(),
+                ))?
+                .to_str()
+                .ok_or(DataFusionError::Configuration(
+                    "Format not explicitly set and failed to parse file extension!"
+                        .to_string(),
+                ))?
+                .to_lowercase();
+
+            FileType::from_str(extension)
+        }
+    }?;
+
+    Ok(format)
+}
+
+/// Extracts and parses the 'partition_by' option from a provided options hashmap.
+///
+/// This function looks for a 'partition_by' key in the options hashmap. If found,
+/// it splits the value by commas, trims each resulting string, and replaces double
+/// single quotes with a single quote. It returns a vector of partition column names.
+///
+/// # Arguments
+///
+/// * `options` - A mutable reference to a HashMap containing options where 'partition_by'
+/// might be specified.
+///
+/// # Returns
+///
+/// Returns a `Vec<String>` containing partition column names. If the 'partition_by' option
+/// is not present, returns an empty vector.
+pub fn take_partition_by(options: &mut HashMap<String, String>) -> Vec<String> {
+    let partition_by = options.remove("partition_by");
+    match partition_by {
+        Some(part_cols) => part_cols
+            .split(',')
+            .map(|s| s.trim().replace("''", "'"))
+            .collect::<Vec<_>>(),
+        None => vec![],
+    }
+}
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
new file mode 100644
index 000000000000..3af33ad0afda
--- /dev/null
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Dialect is used to capture dialect specific syntax.
+/// Note: this trait will eventually be replaced by the Dialect in the SQLparser package
+///
+/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
+pub trait Dialect {
+    fn identifier_quote_style(&self) -> Option<char>;
+}
+pub struct DefaultDialect {}
+
+impl Dialect for DefaultDialect {
+    fn identifier_quote_style(&self) -> Option<char> {
+        None
+    }
+}
+
+pub struct PostgreSqlDialect {}
+
+impl Dialect for PostgreSqlDialect {
+    fn identifier_quote_style(&self) -> Option<char> {
+        Some('"')
+    }
+}
+
+pub struct MySqlDialect {}
+
+impl Dialect for MySqlDialect {
+    fn identifier_quote_style(&self) -> Option<char> {
+        Some('`')
+    }
+}
+
+pub struct SqliteDialect {}
+
+impl Dialect for SqliteDialect {
+    fn identifier_quote_style(&self) -> Option<char> {
+        Some('`')
+    }
+}
+
+pub struct CustomDialect {
+    identifier_quote_style: Option<char>,
+}
+
+impl CustomDialect {
+    pub fn new(identifier_quote_style: Option<char>) -> Self {
+        Self {
+            identifier_quote_style,
+        }
+    }
+}
+
+impl Dialect for CustomDialect {
+    fn identifier_quote_style(&self) -> Option<char> {
+        self.identifier_quote_style
+    }
+}
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
new file mode 100644
index 000000000000..bb14c8a70739
--- /dev/null
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -0,0 +1,355 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::{not_impl_err, Column, Result, ScalarValue};
+use datafusion_expr::{
+    expr::{Alias, InList, ScalarFunction, WindowFunction},
+    Between, BinaryExpr, Case, Cast, Expr, Like, Operator,
+};
+use sqlparser::ast;
+
+use super::Unparser;
+
+/// Convert a DataFusion [`Expr`] to `sqlparser::ast::Expr`
+///
+/// This function is the opposite of `SqlToRel::sql_to_expr` and can
+/// be used to, among other things, convert `Expr`s to strings.
+///
+/// # Example
+/// ```
+/// use datafusion_expr::{col, lit};
+/// use datafusion_sql::unparser::expr_to_sql;
+/// let expr = col("a").gt(lit(4));
+/// let sql = expr_to_sql(&expr).unwrap();
+///
+/// assert_eq!(format!("{}", sql), "a > 4")
+/// ```
+pub fn expr_to_sql(expr: &Expr) -> Result<ast::Expr> {
+    let unparser = Unparser::default();
+    unparser.expr_to_sql(expr)
+}
+
+impl Unparser<'_> {
+    pub fn expr_to_sql(&self, expr: &Expr) -> Result<ast::Expr> {
+        match expr {
+            Expr::InList(InList {
+                expr,
+                list: _,
+                negated: _,
+            }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            Expr::ScalarFunction(ScalarFunction { .. }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            Expr::Between(Between {
+                expr,
+                negated: _,
+                low: _,
+                high: _,
+            }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            Expr::Column(col) => self.col_to_sql(col),
+            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
+                let l = self.expr_to_sql(left.as_ref())?;
+                let r = self.expr_to_sql(right.as_ref())?;
+                let op = self.op_to_sql(op)?;
+
+                Ok(self.binary_op_to_sql(l, r, op))
+            }
+            Expr::Case(Case {
+                expr,
+                when_then_expr: _,
+                else_expr: _,
+            }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            Expr::Cast(Cast { expr, data_type: _ }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            Expr::Literal(value) => Ok(ast::Expr::Value(self.scalar_to_sql(value)?)),
+            Expr::Alias(Alias { expr, name: _, .. }) => self.expr_to_sql(expr),
+            Expr::WindowFunction(WindowFunction {
+                fun: _,
+                args: _,
+                partition_by: _,
+                order_by: _,
+                window_frame: _,
+                null_treatment: _,
+            }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            Expr::Like(Like {
+                negated: _,
+                expr,
+                pattern: _,
+                escape_char: _,
+                case_insensitive: _,
+            }) => {
+                not_impl_err!("Unsupported expression: {expr:?}")
+            }
+            _ => not_impl_err!("Unsupported expression: {expr:?}"),
+        }
+    }
+
+    fn col_to_sql(&self, col: &Column) -> Result<ast::Expr> {
+        if let Some(table_ref) = &col.relation {
+            let mut id = table_ref.to_vec();
+            id.push(col.name.to_string());
+            return Ok(ast::Expr::CompoundIdentifier(
+                id.iter().map(|i| self.new_ident(i.to_string())).collect(),
+            ));
+        }
+        Ok(ast::Expr::Identifier(self.new_ident(col.name.to_string())))
+    }
+
+    fn new_ident(&self, str: String) -> ast::Ident {
+        ast::Ident {
+            value: str,
+            quote_style: self.dialect.identifier_quote_style(),
+        }
+    }
+
+    fn binary_op_to_sql(
+        &self,
+        lhs: ast::Expr,
+        rhs: ast::Expr,
+        op: ast::BinaryOperator,
+    ) -> ast::Expr {
+        ast::Expr::BinaryOp {
+            left: Box::new(lhs),
+            op,
+            right: Box::new(rhs),
+        }
+    }
+
+    fn op_to_sql(&self, op: &Operator) -> Result<ast::BinaryOperator> {
+        match op {
+            Operator::Eq => Ok(ast::BinaryOperator::Eq),
+            Operator::NotEq => Ok(ast::BinaryOperator::NotEq),
+            Operator::Lt => Ok(ast::BinaryOperator::Lt),
+            Operator::LtEq => Ok(ast::BinaryOperator::LtEq),
+            Operator::Gt => Ok(ast::BinaryOperator::Gt),
+            Operator::GtEq => Ok(ast::BinaryOperator::GtEq),
+            Operator::Plus => Ok(ast::BinaryOperator::Plus),
+            Operator::Minus => Ok(ast::BinaryOperator::Minus),
+            Operator::Multiply => Ok(ast::BinaryOperator::Multiply),
+            Operator::Divide => Ok(ast::BinaryOperator::Divide),
+            Operator::Modulo => Ok(ast::BinaryOperator::Modulo),
+            Operator::And => Ok(ast::BinaryOperator::And),
+            Operator::Or => Ok(ast::BinaryOperator::Or),
+            Operator::IsDistinctFrom => not_impl_err!("unsupported operation: {op:?}"),
+            Operator::IsNotDistinctFrom => not_impl_err!("unsupported operation: {op:?}"),
+            Operator::RegexMatch => Ok(ast::BinaryOperator::PGRegexMatch),
+            Operator::RegexIMatch => Ok(ast::BinaryOperator::PGRegexIMatch),
+            Operator::RegexNotMatch => Ok(ast::BinaryOperator::PGRegexNotMatch),
+            Operator::RegexNotIMatch => Ok(ast::BinaryOperator::PGRegexNotIMatch),
+            Operator::ILikeMatch => Ok(ast::BinaryOperator::PGILikeMatch),
+            Operator::NotLikeMatch => Ok(ast::BinaryOperator::PGNotLikeMatch),
+            Operator::LikeMatch => Ok(ast::BinaryOperator::PGLikeMatch),
+            Operator::NotILikeMatch => Ok(ast::BinaryOperator::PGNotILikeMatch),
+            Operator::BitwiseAnd => Ok(ast::BinaryOperator::BitwiseAnd),
+            Operator::BitwiseOr => Ok(ast::BinaryOperator::BitwiseOr),
+            Operator::BitwiseXor => Ok(ast::BinaryOperator::BitwiseXor),
+            Operator::BitwiseShiftRight => Ok(ast::BinaryOperator::PGBitwiseShiftRight),
+            Operator::BitwiseShiftLeft => Ok(ast::BinaryOperator::PGBitwiseShiftLeft),
+            Operator::StringConcat => Ok(ast::BinaryOperator::StringConcat),
+            Operator::AtArrow => not_impl_err!("unsupported operation: {op:?}"),
+            Operator::ArrowAt => not_impl_err!("unsupported operation: {op:?}"),
+        }
+    }
+
+    fn scalar_to_sql(&self, v: &ScalarValue) -> Result<ast::Value> {
+        match v {
+            ScalarValue::Null => Ok(ast::Value::Null),
+            ScalarValue::Boolean(Some(b)) => Ok(ast::Value::Boolean(b.to_owned())),
+            ScalarValue::Boolean(None) => Ok(ast::Value::Null),
+            ScalarValue::Float32(Some(f)) => Ok(ast::Value::Number(f.to_string(), false)),
+            ScalarValue::Float32(None) => Ok(ast::Value::Null),
+            ScalarValue::Float64(Some(f)) => Ok(ast::Value::Number(f.to_string(), false)),
+            ScalarValue::Float64(None) => Ok(ast::Value::Null),
+            ScalarValue::Decimal128(Some(_), ..) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::Decimal128(None, ..) => Ok(ast::Value::Null),
+            ScalarValue::Decimal256(Some(_), ..) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::Decimal256(None, ..) => Ok(ast::Value::Null),
+            ScalarValue::Int8(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)),
+            ScalarValue::Int8(None) => Ok(ast::Value::Null),
+            ScalarValue::Int16(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)),
+            ScalarValue::Int16(None) => Ok(ast::Value::Null),
+            ScalarValue::Int32(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)),
+            ScalarValue::Int32(None) => Ok(ast::Value::Null),
+            ScalarValue::Int64(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)),
+            ScalarValue::Int64(None) => Ok(ast::Value::Null),
+            ScalarValue::UInt8(Some(ui)) => Ok(ast::Value::Number(ui.to_string(), false)),
+            ScalarValue::UInt8(None) => Ok(ast::Value::Null),
+            ScalarValue::UInt16(Some(ui)) => {
+                Ok(ast::Value::Number(ui.to_string(), false))
+            }
+            ScalarValue::UInt16(None) => Ok(ast::Value::Null),
+            ScalarValue::UInt32(Some(ui)) => {
+                Ok(ast::Value::Number(ui.to_string(), false))
+            }
+            ScalarValue::UInt32(None) => Ok(ast::Value::Null),
+            ScalarValue::UInt64(Some(ui)) => {
+                Ok(ast::Value::Number(ui.to_string(), false))
+            }
+            ScalarValue::UInt64(None) => Ok(ast::Value::Null),
+            ScalarValue::Utf8(Some(str)) => {
+                Ok(ast::Value::SingleQuotedString(str.to_string()))
+            }
+            ScalarValue::Utf8(None) => Ok(ast::Value::Null),
+            ScalarValue::LargeUtf8(Some(str)) => {
+                Ok(ast::Value::SingleQuotedString(str.to_string()))
+            }
+            ScalarValue::LargeUtf8(None) => Ok(ast::Value::Null),
+            ScalarValue::Binary(Some(_)) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::Binary(None) => Ok(ast::Value::Null),
+            ScalarValue::FixedSizeBinary(..) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::LargeBinary(Some(_)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::LargeBinary(None) => Ok(ast::Value::Null),
+            ScalarValue::FixedSizeList(_a) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::List(_a) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::LargeList(_a) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::Date32(Some(_d)) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::Date32(None) => Ok(ast::Value::Null),
+            ScalarValue::Date64(Some(_d)) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::Date64(None) => Ok(ast::Value::Null),
+            ScalarValue::Time32Second(Some(_t)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::Time32Second(None) => Ok(ast::Value::Null),
+            ScalarValue::Time32Millisecond(Some(_t)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::Time32Millisecond(None) => Ok(ast::Value::Null),
+            ScalarValue::Time64Microsecond(Some(_t)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::Time64Microsecond(None) => Ok(ast::Value::Null),
+            ScalarValue::Time64Nanosecond(Some(_t)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::Time64Nanosecond(None) => Ok(ast::Value::Null),
+            ScalarValue::TimestampSecond(Some(_ts), _) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::TimestampSecond(None, _) => Ok(ast::Value::Null),
+            ScalarValue::TimestampMillisecond(Some(_ts), _) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::TimestampMillisecond(None, _) => Ok(ast::Value::Null),
+            ScalarValue::TimestampMicrosecond(Some(_ts), _) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::TimestampMicrosecond(None, _) => Ok(ast::Value::Null),
+            ScalarValue::TimestampNanosecond(Some(_ts), _) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::TimestampNanosecond(None, _) => Ok(ast::Value::Null),
+            ScalarValue::IntervalYearMonth(Some(_i)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::IntervalYearMonth(None) => Ok(ast::Value::Null),
+            ScalarValue::IntervalDayTime(Some(_i)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::IntervalDayTime(None) => Ok(ast::Value::Null),
+            ScalarValue::IntervalMonthDayNano(Some(_i)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::IntervalMonthDayNano(None) => Ok(ast::Value::Null),
+            ScalarValue::DurationSecond(Some(_d)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::DurationSecond(None) => Ok(ast::Value::Null),
+            ScalarValue::DurationMillisecond(Some(_d)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::DurationMillisecond(None) => Ok(ast::Value::Null),
+            ScalarValue::DurationMicrosecond(Some(_d)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::DurationMicrosecond(None) => Ok(ast::Value::Null),
+            ScalarValue::DurationNanosecond(Some(_d)) => {
+                not_impl_err!("Unsupported scalar: {v:?}")
+            }
+            ScalarValue::DurationNanosecond(None) => Ok(ast::Value::Null),
+            ScalarValue::Struct(_) => not_impl_err!("Unsupported scalar: {v:?}"),
+            ScalarValue::Dictionary(..) => not_impl_err!("Unsupported scalar: {v:?}"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion_common::TableReference;
+    use datafusion_expr::{col, lit};
+
+    use crate::unparser::dialect::CustomDialect;
+
+    use super::*;
+
+    #[test]
+    fn expr_to_sql_ok() -> Result<()> {
+        let tests: Vec<(Expr, &str)> = vec![
+            (col("a").gt(lit(4)), r#"a > 4"#),
+            (
+                Expr::Column(Column {
+                    relation: Some(TableReference::partial("a", "b")),
+                    name: "c".to_string(),
+                })
+                .gt(lit(4)),
+                r#"a.b.c > 4"#,
+            ),
+        ];
+
+        for (expr, expected) in tests {
+            let ast = expr_to_sql(&expr)?;
+
+            let actual = format!("{}", ast);
+
+            assert_eq!(actual, expected);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn custom_dialect() -> Result<()> {
+        let dialect = CustomDialect::new(Some('\''));
+        let unparser = Unparser::new(&dialect);
+
+        let expr = col("a").gt(lit(4));
+        let ast = unparser.expr_to_sql(&expr)?;
+
+        let actual = format!("{}", ast);
+
+        let expected = r#"'a' > 4"#;
+        assert_eq!(actual, expected);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/common/src/file_options/parse_utils.rs b/datafusion/sql/src/unparser/mod.rs
similarity index 58%
rename from datafusion/common/src/file_options/parse_utils.rs
rename to datafusion/sql/src/unparser/mod.rs
index 38cf5eb489f7..77a9de0975ed 100644
--- a/datafusion/common/src/file_options/parse_utils.rs
+++ b/datafusion/sql/src/unparser/mod.rs
@@ -15,17 +15,27 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Functions for parsing arbitrary passed strings to valid file_option settings
-use crate::{DataFusionError, Result};
+mod expr;
 
-/// Converts a String option to a bool, or returns an error if not a valid bool string.
-pub(crate) fn parse_boolean_string(option: &str, value: String) -> Result<bool> {
-    match value.to_lowercase().as_str() {
-        "true" => Ok(true),
-        "false" => Ok(false),
-        _ => Err(DataFusionError::Configuration(format!(
-            "Unsupported value {value} for option {option}! \
-            Valid values are true or false!"
-        ))),
+pub use expr::expr_to_sql;
+
+use self::dialect::{DefaultDialect, Dialect};
+pub mod dialect;
+
+pub struct Unparser<'a> {
+    dialect: &'a dyn Dialect,
+}
+
+impl<'a> Unparser<'a> {
+    pub fn new(dialect: &'a dyn Dialect) -> Self {
+        Self { dialect }
+    }
+}
+
+impl<'a> Default for Unparser<'a> {
+    fn default() -> Self {
+        Self {
+            dialect: &DefaultDialect {},
+        }
     }
 }
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 266f04580f11..19bcf6024b50 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -182,7 +182,7 @@ CREATE TABLE array_agg_distinct_list_table AS VALUES
   ('b', [0,1])
 ;
 
-# Apply array_sort to have determinisitic result, higher dimension nested array also works but not for array sort,
+# Apply array_sort to have deterministic result, higher dimension nested array also works but not for array sort,
 # so they are covered in `datafusion/physical-expr/src/aggregate/array_agg_distinct.rs`
 query ??
 select array_sort(c1), array_sort(c2) from (
@@ -1359,7 +1359,7 @@ NULL 4 29 1.260869565217 123 -117 23
 NULL 5 -194 -13.857142857143 118 -101 14
 NULL NULL 781 7.81 125 -117 100
 
-# TODO: array_agg_distinct output is non-determinisitic -- rewrite with array_sort(list_sort)
+# TODO: array_agg_distinct output is non-deterministic -- rewrite with array_sort(list_sort)
 #       unnest is also not available, so manually unnesting via CROSS JOIN
 # additional count(1) forces array_agg_distinct instead of array_agg over aggregated by c2 data
 #
@@ -2255,9 +2255,10 @@ select median(a) from (select 1 as a where 1=0);
 ----
 NULL
 
-query error DataFusion error: Execution error: aggregate function needs at least one non-null element
+query I
 select approx_median(a) from (select 1 as a where 1=0);
-
+----
+NULL
 
 # aggregate_decimal_sum
 query RT
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 434fe8c959e6..b729e5c10f3d 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5598,6 +5598,39 @@ select
 ----
 [] [9223372036854775807] [] [9223372036854775806] [] [-9223372036854775807] [] [-9223372036854775808]
 
+# Test range(start, stop, step) with NULL values
+query ?
+select range(start, stop, step) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop),
+  (values (3), (NULL)) as step_values(step)
+where start is null or stop is null or step is null
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+# Test range(start, stop) with NULL values
+query ?
+select range(start, stop) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop)
+where start is null or stop is null
+----
+NULL
+NULL
+NULL
+
+# Test range(stop) with NULL value
+query ?
+select range(NULL)
+----
+NULL
+
 ## should throw error
 query error
 select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
@@ -5678,6 +5711,39 @@ select
 ----
 [9223372036854775807] [9223372036854775807] [-9223372036854775808] [-9223372036854775808]
 
+# Test generate_series(start, stop, step) with NULL values
+query ?
+select generate_series(start, stop, step) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop),
+  (values (3), (NULL)) as step_values(step)
+where start is null or stop is null or step is null
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+# Test generate_series(start, stop) with NULL values
+query ?
+select generate_series(start, stop) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop)
+where start is null or stop is null
+----
+NULL
+NULL
+NULL
+
+# Test generate_series(stop) with NULL value
+query ?
+select generate_series(NULL)
+----
+NULL
+
 
 ## array_except
 
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 463b51c940d1..df23a993ebce 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -21,13 +21,13 @@ create table source_table(col1 integer, col2 varchar) as values (1, 'Foo'), (2,
 
 # Copy to directory as multiple files
 query IT
-COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, compression 'zstd(10)');
+COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, 'parquet.compression' 'zstd(10)');
 ----
 2
 
 # Copy to directory as partitioned files
 query IT
-COPY source_table TO 'test_files/scratch/copy/partitioned_table1/' (format parquet, compression 'zstd(10)', partition_by 'col2');
+COPY source_table TO 'test_files/scratch/copy/partitioned_table1/' (format parquet, 'parquet.compression' 'zstd(10)', partition_by 'col2');
 ----
 2
 
@@ -55,7 +55,7 @@ select * from validate_partitioned_parquet_bar order by col1;
 # Copy to directory as partitioned files
 query ITT
 COPY (values (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table2/' 
-(format parquet, compression 'zstd(10)', partition_by 'column2, column3');
+(format parquet, partition_by 'column2, column3', 'parquet.compression' 'zstd(10)');
 ----
 3
 
@@ -83,7 +83,7 @@ select * from validate_partitioned_parquet_a_x order by column1;
 # Copy to directory as partitioned files
 query TTT
 COPY (values ('1', 'a', 'x'), ('2', 'b', 'y'), ('3', 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table3/' 
-(format parquet, compression 'zstd(10)', partition_by 'column1, column3');
+(format parquet, 'parquet.compression' 'zstd(10)', partition_by 'column1, column3');
 ----
 3
 
@@ -139,10 +139,10 @@ LOCATION 'test_files/scratch/copy/escape_quote/' PARTITIONED BY ("'test2'", "'te
 #select * from validate_partitioned_escape_quote;
 
 query TT
-EXPLAIN COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, compression 'zstd(10)');
+EXPLAIN COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, 'parquet.compression' 'zstd(10)');
 ----
 logical_plan
-CopyTo: format=parquet output_url=test_files/scratch/copy/table/ options: (compression 'zstd(10)')
+CopyTo: format=parquet output_url=test_files/scratch/copy/table/ options: (parquet.compression zstd(10))
 --TableScan: source_table projection=[col1, col2]
 physical_plan
 FileSinkExec: sink=ParquetSink(file_groups=[])
@@ -152,10 +152,15 @@ FileSinkExec: sink=ParquetSink(file_groups=[])
 query error DataFusion error: Invalid or Unsupported Configuration: Format not explicitly set and unable to get file extension!
 EXPLAIN COPY source_table to 'test_files/scratch/copy/table/'
 
-query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: query"\)
-EXPLAIN COPY source_table to 'test_files/scratch/copy/table/' (format parquet)
 query TT
-EXPLAIN COPY source_table to 'test_files/scratch/copy/table/' (format parquet, per_thread_output true)
+EXPLAIN COPY source_table to 'test_files/scratch/copy/table/' (format parquet)
+----
+logical_plan
+CopyTo: format=parquet output_url=test_files/scratch/copy/table/ options: ()
+--TableScan: source_table projection=[col1, col2]
+physical_plan
+FileSinkExec: sink=ParquetSink(file_groups=[])
+--MemoryExec: partitions=1, partition_sizes=[1]
 
 # Copy more files to directory via query
 query IT
@@ -251,30 +256,30 @@ query IT
 COPY source_table
 TO 'test_files/scratch/copy/table_with_options/'
 (format parquet,
-compression snappy,
-'compression::col1' 'zstd(5)',
-'compression::col2' snappy,
-max_row_group_size 12345,
-data_pagesize_limit 1234,
-write_batch_size 1234,
-writer_version 2.0,
-dictionary_page_size_limit 123,
-created_by 'DF copy.slt',
-column_index_truncate_length 123,
-data_page_row_count_limit 1234,
-bloom_filter_enabled true,
-'bloom_filter_enabled::col1' false,
-'bloom_filter_fpp::col2' 0.456,
-'bloom_filter_ndv::col2' 456,
-encoding plain,
-'encoding::col1' DELTA_BINARY_PACKED,
-'dictionary_enabled::col2' true,
-dictionary_enabled false,
-statistics_enabled page,
-'statistics_enabled::col2' none,
-max_statistics_size 123,
-bloom_filter_fpp 0.001,
-bloom_filter_ndv 100
+'parquet.compression' snappy,
+'parquet.compression::col1' 'zstd(5)',
+'parquet.compression::col2' snappy,
+'parquet.max_row_group_size' 12345,
+'parquet.data_pagesize_limit' 1234,
+'parquet.write_batch_size' 1234,
+'parquet.writer_version' 2.0,
+'parquet.dictionary_page_size_limit' 123,
+'parquet.created_by' 'DF copy.slt',
+'parquet.column_index_truncate_length' 123,
+'parquet.data_page_row_count_limit' 1234,
+'parquet.bloom_filter_enabled' true,
+'parquet.bloom_filter_enabled::col1' false,
+'parquet.bloom_filter_fpp::col2' 0.456,
+'parquet.bloom_filter_ndv::col2' 456,
+'parquet.encoding' plain,
+'parquet.encoding::col1' DELTA_BINARY_PACKED,
+'parquet.dictionary_enabled::col2' true,
+'parquet.dictionary_enabled' false,
+'parquet.statistics_enabled' page,
+'parquet.statistics_enabled::col2' none,
+'parquet.max_statistics_size' 123,
+'parquet.bloom_filter_fpp' 0.001,
+'parquet.bloom_filter_ndv' 100
 )
 ----
 2
@@ -307,7 +312,7 @@ select * from validate_parquet_single;
 
 # copy from table to folder of compressed json files
 query IT
-COPY source_table  to 'test_files/scratch/copy/table_json_gz' (format json, compression 'gzip');
+COPY source_table  to 'test_files/scratch/copy/table_json_gz' (format json, 'json.compression' gzip);
 ----
 2
 
@@ -323,7 +328,7 @@ select * from validate_json_gz;
 
 # copy from table to folder of compressed csv files
 query IT
-COPY source_table  to 'test_files/scratch/copy/table_csv' (format csv, header false, compression 'gzip');
+COPY source_table  to 'test_files/scratch/copy/table_csv' (format csv, 'csv.has_header' false, 'csv.compression' gzip);
 ----
 2
 
@@ -390,11 +395,11 @@ query IT
 COPY source_table
 to 'test_files/scratch/copy/table_csv_with_options'
 (format csv,
-header false,
-compression 'uncompressed',
-datetime_format '%FT%H:%M:%S.%9f',
-delimiter ';',
-null_value 'NULLVAL');
+'csv.has_header' false,
+'csv.compression' uncompressed,
+'csv.datetime_format' '%FT%H:%M:%S.%9f',
+'csv.delimiter' ';',
+'csv.null_value' 'NULLVAL');
 ----
 2
 
@@ -469,8 +474,8 @@ select * from validate_arrow;
 # Error cases:
 
 # Copy from table with options
-query error DataFusion error: Invalid or Unsupported Configuration: Found unsupported option row_group_size with value 55 for JSON format!
-COPY source_table  to 'test_files/scratch/copy/table.json' (row_group_size 55);
+query error DataFusion error: Invalid or Unsupported Configuration: Config value "row_group_size" not found on JsonOptions
+COPY source_table  to 'test_files/scratch/copy/table.json' ('json.row_group_size' 55);
 
 # Incomplete statement
 query error DataFusion error: SQL error: ParserError\("Expected \), found: EOF"\)
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index c08d5a55c366..3b85dd9e986f 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -99,3 +99,10 @@ CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH LOCATION 'foo.csv';
 # Unrecognized random clause
 statement error DataFusion error: SQL error: ParserError\("Unexpected token FOOBAR"\)
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV FOOBAR BARBAR BARFOO LOCATION 'foo.csv';
+
+# Conflicting options
+statement error DataFusion error: Invalid or Unsupported Configuration: Key "parquet.column_index_truncate_length" is not applicable for CSV format
+CREATE EXTERNAL TABLE csv_table (column1 int)
+STORED AS CSV
+LOCATION 'foo.csv'
+OPTIONS ('csv.delimiter' ';', 'parquet.column_index_truncate_length' '123')
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index 0e8171b5a870..7b299c0cf143 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -23,7 +23,7 @@ c2 VARCHAR
 ) STORED AS CSV
 WITH HEADER ROW
 DELIMITER ','
-OPTIONS ('quote' '~')
+OPTIONS ('csv.quote' '~')
 LOCATION '../core/tests/data/quote.csv';
 
 statement ok
@@ -33,7 +33,7 @@ c2 VARCHAR
 ) STORED AS CSV
 WITH HEADER ROW
 DELIMITER ','
-OPTIONS ('escape' '\"')
+OPTIONS ('csv.escape' '\')
 LOCATION '../core/tests/data/escape.csv';
 
 query TT
@@ -64,6 +64,31 @@ id7 value"7
 id8 value"8
 id9 value"9
 
+statement ok
+CREATE EXTERNAL TABLE csv_with_escape_2 (
+c1 VARCHAR,
+c2 VARCHAR
+) STORED AS CSV
+WITH HEADER ROW
+DELIMITER ','
+OPTIONS ('csv.escape' '"')
+LOCATION '../core/tests/data/escape.csv';
+
+# TODO: Validate this with better data.
+query TT
+select * from csv_with_escape_2;
+----
+id0 value\0"
+id1 value\1"
+id2 value\2"
+id3 value\3"
+id4 value\4"
+id5 value\5"
+id6 value\6"
+id7 value\7"
+id8 value\8"
+id9 value\9"
+
 
 # Read partitioned csv
 statement ok
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 9b4b449340b0..15fe670a454c 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -158,7 +158,7 @@ DROP TABLE parquet_table_with_order;
 # create a single csv file
 statement ok
 COPY  (VALUES (1), (2), (3), (4), (5)) TO 'test_files/scratch/repartition_scan/csv_table/1.csv'
-(FORMAT csv, HEADER true);
+(FORMAT csv, 'csv.has_header' true);
 
 statement ok
 CREATE EXTERNAL TABLE csv_table(column1 int)
diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt
index 440fb2c6ef2b..fccd144a37fb 100644
--- a/datafusion/sqllogictest/test_files/set_variable.slt
+++ b/datafusion/sqllogictest/test_files/set_variable.slt
@@ -65,7 +65,7 @@ SHOW datafusion.execution.batch_size
 datafusion.execution.batch_size 1
 
 # set variable unknown variable
-statement error DataFusion error: External error: could not find config namespace for key "aabbcc"
+statement error DataFusion error: Invalid or Unsupported Configuration: could not find config namespace for key "aabbcc"
 SET aabbcc to '1'
 
 # set bool variable
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index a474da85b3d4..39c105a4dcce 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -4299,11 +4299,283 @@ LIMIT 5;
 14 94
 
 # Tests schema and data are in sync for mixed nulls and not nulls values for builtin window function
-query T rowsort
-select lag(a) over () as x1
+query T
+select lag(a) over (order by a ASC NULLS FIRST) as x1
         from
         (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null);
 ----
 NULL
 NULL
-b
+NULL
+
+# Test for ignore nulls in FIRST_VALUE
+statement ok
+CREATE TABLE t AS VALUES (null::bigint), (3), (4);
+
+query I
+SELECT FIRST_VALUE(column1) OVER() FROM t;
+----
+NULL
+NULL
+NULL
+
+query I
+SELECT FIRST_VALUE(column1) RESPECT NULLS OVER() FROM t;
+----
+NULL
+NULL
+NULL
+
+query I
+SELECT FIRST_VALUE(column1) IGNORE NULLS OVER() FROM t;
+----
+3
+3
+3
+
+statement ok
+DROP TABLE t;
+
+# Test for ignore nulls with ORDER BY in FIRST_VALUE
+statement ok
+CREATE TABLE t AS VALUES  (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 2), (5, 5), (6, 6);
+
+query II
+SELECT column1, column2 FROM t ORDER BY column2;
+----
+NULL 1
+NULL 2
+4 3
+3 4
+5 5
+6 6
+
+query II
+SELECT FIRST_VALUE(column1) OVER(ORDER BY column2), column2 FROM t;
+----
+NULL 1
+NULL 2
+NULL 3
+NULL 4
+NULL 5
+NULL 6
+
+query II
+SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2), column2 FROM t;
+----
+NULL 1
+NULL 2
+NULL 3
+NULL 4
+NULL 5
+NULL 6
+
+query II
+SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2), column2 FROM t;
+----
+NULL 1
+NULL 2
+4 3
+4 4
+4 5
+4 6
+
+query II
+SELECT FIRST_VALUE(column1)OVER(ORDER BY column2 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
+----
+NULL 1
+NULL 2
+NULL 3
+4 4
+3 5
+5 6
+
+query II
+SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
+----
+NULL 1
+4 2
+4 3
+4 4
+3 5
+5 6
+
+statement ok
+DROP TABLE t;
+
+# Test for ignore nulls with ORDER BY in FIRST_VALUE with all NULL values
+statement ok
+CREATE TABLE t AS VALUES  (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 2);
+
+query II
+SELECT FIRST_VALUE(column1) OVER(ORDER BY column2), column2 FROM t;
+----
+NULL 1
+NULL 2
+NULL 3
+NULL 4
+
+query II
+SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2), column2 FROM t;
+----
+NULL 1
+NULL 2
+NULL 3
+NULL 4
+
+query II
+SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2), column2 FROM t;
+----
+NULL 1
+NULL 2
+NULL 3
+NULL 4
+
+statement ok
+DROP TABLE t;
+
+# Test for ignore nulls in LAST_VALUE
+statement ok
+CREATE TABLE t AS VALUES (1), (3), (null::bigint);
+
+query I
+SELECT LAST_VALUE(column1) OVER() FROM t;
+----
+NULL
+NULL
+NULL
+
+query I
+SELECT LAST_VALUE(column1) RESPECT NULLS OVER() FROM t;
+----
+NULL
+NULL
+NULL
+
+query I
+SELECT LAST_VALUE(column1) IGNORE NULLS OVER() FROM t;
+----
+3
+3
+3
+
+statement ok
+DROP TABLE t;
+
+# Test for ignore nulls with ORDER BY in LAST_VALUE
+statement ok
+CREATE TABLE t AS VALUES  (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 2), (5, 5), (6, 6);
+
+query II
+SELECT column1, column2 FROM t ORDER BY column2 DESC NULLS LAST;
+----
+6 6
+5 5
+3 4
+4 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST), column2 FROM t;
+----
+6 6
+5 5
+3 4
+4 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST), column2 FROM t;
+----
+6 6
+5 5
+3 4
+4 3
+4 2
+4 1
+
+query II
+SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
+----
+NULL 6
+NULL 5
+NULL 4
+NULL 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
+----
+NULL 6
+NULL 5
+NULL 4
+NULL 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
+----
+4 6
+4 5
+4 4
+4 3
+4 2
+4 1
+
+query II
+SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
+----
+5 6
+3 5
+4 4
+NULL 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
+----
+5 6
+3 5
+4 4
+4 3
+4 2
+NULL 1
+
+statement ok
+DROP TABLE t;
+
+# Test for ignore nulls with ORDER BY in LAST_VALUE with all NULLs
+statement ok
+CREATE TABLE t AS VALUES  (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 2);
+
+query II
+SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
+----
+NULL 4
+NULL 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
+----
+NULL 4
+NULL 3
+NULL 2
+NULL 1
+
+query II
+SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
+----
+NULL 4
+NULL 3
+NULL 2
+NULL 1
+
+statement ok
+DROP TABLE t;
diff --git a/datafusion/substrait/src/physical_plan/consumer.rs b/datafusion/substrait/src/physical_plan/consumer.rs
index 3098dc386e6a..11ddb91ad391 100644
--- a/datafusion/substrait/src/physical_plan/consumer.rs
+++ b/datafusion/substrait/src/physical_plan/consumer.rs
@@ -125,8 +125,12 @@ pub async fn from_substrait_rel(
                         }
                     }
 
-                    Ok(Arc::new(ParquetExec::new(base_config, None, None))
-                        as Arc<dyn ExecutionPlan>)
+                    Ok(Arc::new(ParquetExec::new(
+                        base_config,
+                        None,
+                        None,
+                        Default::default(),
+                    )) as Arc<dyn ExecutionPlan>)
                 }
                 _ => not_impl_err!(
                     "Only LocalFile reads are supported when parsing physical"
diff --git a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
index e5af3f94cc05..70887e393491 100644
--- a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
@@ -50,8 +50,12 @@ async fn parquet_exec() -> Result<()> {
         table_partition_cols: vec![],
         output_ordering: vec![],
     };
-    let parquet_exec: Arc<dyn ExecutionPlan> =
-        Arc::new(ParquetExec::new(scan_config, None, None));
+    let parquet_exec: Arc<dyn ExecutionPlan> = Arc::new(ParquetExec::new(
+        scan_config,
+        None,
+        None,
+        Default::default(),
+    ));
 
     let mut extension_info: (
         Vec<extensions::SimpleExtensionDeclaration>,
diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md
index dcb599b9b3b2..05eb063c3dc9 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -207,42 +207,44 @@ select log(-1), log(0), sqrt(-1);
 
 ## Array Expressions
 
-| Syntax                                 | Description                                                                                                                                                              |
-| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| array_append(array, element)           | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]`                                                                                  |
-| array_concat(array[, ..., array_n])    | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]`                                                                                          |
-| array_has(array, element)              | Returns true if the array contains the element `array_has([1,2,3], 1) -> true`                                                                                           |
-| array_has_all(array, sub-array)        | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true`                                                                         |
-| array_has_any(array, sub-array)        | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true`                                                                                |
-| array_dims(array)                      | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]`                                                                               |
-| array_distinct(array)                  | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]`                                                |
-| array_element(array, index)            | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3`                                                                               |
-| flatten(array)                         | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`                                                                    |
-| array_length(array, dimension)         | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5`                                                                                          |
-| array_ndims(array)                     | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`                                                                                |
-| array_pop_front(array)                 | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]`                                                                                      |
-| array_pop_back(array)                  | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]`                                                                                        |
-| array_position(array, element)         | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`                                                                |
-| array_positions(array, element)        | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]`                                                           |
-| array_prepend(array, element)          | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]`                                                                          |
-| array_repeat(element, count)           | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]`                                                                                     |
-| array_remove(array, element)           | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]`                                        |
-| array_remove_n(array, element, max)    | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]`                               |
-| array_remove_all(array, element)       | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]`                                               |
-| array_replace(array, from, to)         | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]`             |
-| array_replace_n(array, from, to, max)  | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` |
-| array_replace_all(array, from, to)     | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]`              |
-| array_slice(array, begin,end)          | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]`                                                                              |
-| array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]`                                                    |
-| array_to_string(array, delimiter)      | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4`                                                                        |
-| array_intersect(array1, array2)        | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                                       |
-| array_union(array1, array2)            | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]`                   |
-| array_except(array1, array2)           | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                              |
-| array_resize(array, size, value)       | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 4, 5, 6]`       |
-| cardinality(array)                     | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                                                                            |
-| make_array(value1, [value2 [, ...]])   | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`                                                                         |
-| range(start [, stop, step])            | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`                                                                           |
-| trim_array(array, n)                   | Deprecated                                                                                                                                                               |
+| Syntax                                         | Description                                                                                                                                                                                                             |
+| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| array_append(array, element)                   | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]`                                                                                                                                 |
+| array_concat(array[, ..., array_n])            | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]`                                                                                                                                         |
+| array_has(array, element)                      | Returns true if the array contains the element `array_has([1,2,3], 1) -> true`                                                                                                                                          |
+| array_has_all(array, sub-array)                | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true`                                                                                                                        |
+| array_has_any(array, sub-array)                | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true`                                                                                                                               |
+| array_dims(array)                              | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]`                                                                                                                              |
+| array_distinct(array)                          | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]`                                                                                               |
+| array_element(array, index)                    | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3`                                                                                                                              |
+| flatten(array)                                 | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`                                                                                                                   |
+| array_length(array, dimension)                 | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5`                                                                                                                                         |
+| array_ndims(array)                             | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`                                                                                                                               |
+| array_pop_front(array)                         | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]`                                                                                                                                     |
+| array_pop_back(array)                          | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]`                                                                                                                                       |
+| array_position(array, element)                 | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`                                                                                                               |
+| array_positions(array, element)                | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]`                                                                                                          |
+| array_prepend(array, element)                  | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]`                                                                                                                         |
+| array_repeat(element, count)                   | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]`                                                                                                                                    |
+| array_remove(array, element)                   | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]`                                                                                       |
+| array_remove_n(array, element, max)            | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]`                                                                              |
+| array_remove_all(array, element)               | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]`                                                                                              |
+| array_replace(array, from, to)                 | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]`                                                            |
+| array_replace_n(array, from, to, max)          | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]`                                                |
+| array_replace_all(array, from, to)             | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]`                                                             |
+| array_slice(array, begin,end)                  | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]`                                                                                                                             |
+| array_slice(array, begin, end, stride)         | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]`                                                                                                   |
+| array_to_string(array, delimiter)              | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4`                                                                                                                       |
+| array_intersect(array1, array2)                | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                                                                                      |
+| array_union(array1, array2)                    | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]`                                                                  |
+| array_except(array1, array2)                   | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]`                                                                             |
+| array_resize(array, size, value)               | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 4, 5, 6]`                                                      |
+| array_sort(array, desc, null_first)            | Returns sorted array. `array_sort([3, 1, 2, 5, 4]) -> [1, 2, 3, 4, 5]`                                                                                                                                                  |
+| cardinality(array)                             | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                                                                                                                           |
+| make_array(value1, [value2 [, ...]])           | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`                                                                                                                        |
+| range(start [, stop, step])                    | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`                                                                                                                          |
+| string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` |
+| trim_array(array, n)                           | Deprecated                                                                                                                                                                                                              |
 
 ## Regular Expressions
 
diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md
index 9f99d7bcb8ca..bfbd3433f1cf 100644
--- a/docs/source/user-guide/sql/data_types.md
+++ b/docs/source/user-guide/sql/data_types.md
@@ -30,11 +30,11 @@ the `arrow_typeof` function. For example:
 
 ```sql
 select arrow_typeof(interval '1 month');
-+-------------------------------------+
-| arrowtypeof(IntervalYearMonth("1")) |
-+-------------------------------------+
-| Interval(YearMonth)                 |
-+-------------------------------------+
++---------------------------------------------------------------------+
+| arrow_typeof(IntervalMonthDayNano("79228162514264337593543950336")) |
++---------------------------------------------------------------------+
+| Interval(MonthDayNano)                                              |
++---------------------------------------------------------------------+
 ```
 
 You can cast a SQL expression to a specific Arrow type using the `arrow_cast` function
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index b0385b492365..420de5f3fdba 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1949,9 +1949,10 @@ from_unixtime(expression)
 - [array_concat](#array_concat)
 - [array_contains](#array_contains)
 - [array_dims](#array_dims)
+- [array_distinct](#array_distinct)
 - [array_has](#array_has)
 - [array_has_all](#array_has_all)
-- [array_has_any](#array_has_any)]
+- [array_has_any](#array_has_any)
 - [array_element](#array_element)
 - [array_except](#array_except)
 - [array_extract](#array_extract)
@@ -1987,6 +1988,7 @@ from_unixtime(expression)
 - [list_cat](#list_cat)
 - [list_concat](#list_concat)
 - [list_dims](#list_dims)
+- [list_distinct](#list_distinct)
 - [list_element](#list_element)
 - [list_extract](#list_extract)
 - [list_has](#list_has)
@@ -2204,6 +2206,34 @@ array_dims(array)
 
 - list_dims
 
+### `array_distinct`
+
+Returns distinct values from the array after removing duplicates.
+
+```
+array_distinct(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+  Can be a constant, column, or function, and any combination of array operators.
+
+#### Example
+
+```
+❯ select array_distinct([1, 3, 2, 3, 1, 2, 4]);
++---------------------------------+
+| array_distinct(List([1,2,3,4])) |
++---------------------------------+
+| [1, 2, 3, 4]                    |
++---------------------------------+
+```
+
+#### Aliases
+
+- list_distinct
+
 ### `array_element`
 
 Extracts the element with the index n from the array.
@@ -3113,6 +3143,7 @@ _Alias of [make_array](#make_array)._
 ### `string_to_array`
 
 Splits a string in to an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL.
+`SELECT string_to_array('abc##def', '##')` or `SELECT string_to_array('abc def', ' ', 'def')`
 
 ```
 starts_with(str, delimiter[, null_str])