diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 9921c446f85d..5c051a7dee82 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -153,6 +153,10 @@ macro_rules! config_namespace { config_namespace! { /// Options related to catalog and directory scanning + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct CatalogOptions { /// Whether the default catalog and schema should be created automatically. pub create_default_catalog_and_schema: bool, default = true @@ -180,6 +184,10 @@ config_namespace! { config_namespace! { /// Options related to SQL parser + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct SqlParserOptions { /// When set to true, SQL parser will parse float as decimal type pub parse_float_as_decimal: bool, default = false @@ -196,6 +204,10 @@ config_namespace! { config_namespace! { /// Options related to query execution + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct ExecutionOptions { /// Default batch size while creating new batches, it's especially useful for /// buffer-in-memory batches since creating tiny batches would result in too much @@ -283,6 +295,10 @@ config_namespace! { config_namespace! { /// Options related to parquet files + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct ParquetOptions { /// If true, reads the Parquet data page level metadata (the /// Page Index), if present, to reduce the I/O and number of @@ -306,7 +322,7 @@ config_namespace! { pub metadata_size_hint: Option, default = None /// If true, filter expressions are be applied during the parquet decoding operation to - /// reduce the number of rows decoded + /// reduce the number of rows decoded. This optimization is sometimes called "late materialization". pub pushdown_filters: bool, default = false /// If true, filter expressions evaluated during the parquet decoding operation @@ -416,6 +432,10 @@ config_namespace! { config_namespace! { /// Options related to aggregate execution + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct AggregateOptions { /// Specifies the threshold for using `ScalarValue`s to update /// accumulators during high-cardinality aggregations for each input batch. @@ -433,6 +453,10 @@ config_namespace! { config_namespace! { /// Options related to query optimization + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct OptimizerOptions { /// When set to true, the optimizer will push a limit operation into /// grouped aggregations which have no aggregate expressions, as a soft limit, @@ -541,6 +565,10 @@ config_namespace! { config_namespace! { /// Options controlling explain output + /// + /// See also: [`SessionConfig`] + /// + /// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html pub struct ExplainOptions { /// When set to true, the explain statement will only print logical plans pub logical_plan_only: bool, default = false diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs index 8556335b395a..5c7550bec18b 100644 --- a/datafusion/execution/src/config.rs +++ b/datafusion/execution/src/config.rs @@ -24,7 +24,66 @@ use std::{ use datafusion_common::{config::ConfigOptions, Result, ScalarValue}; -/// Configuration options for Execution context +/// Configuration options for [`SessionContext`]. +/// +/// Can be passed to `SessionContext::with_config` to customize the configuration of DataFusion. +/// +/// Options can be set using namespaces keys with `.` as the separator, where the +/// namespace determines which configuration struct the value to routed to. All +/// built-in options are under the `datafusion` namespace. +/// +/// For example, the key `datafusion.execution.batch_size` will set [ExecutionOptions::batch_size][datafusion_common::config::ExecutionOptions::batch_size], +/// because [ConfigOptions::execution] is [ExecutionOptions][datafusion_common::config::ExecutionOptions]. Similarly, the key +/// `datafusion.execution.parquet.pushdown_filters` will set [ParquetOptions::pushdown_filters][datafusion_common::config::ParquetOptions::pushdown_filters], +/// since [ExecutionOptions::parquet][datafusion_common::config::ExecutionOptions::parquet] is [ParquetOptions][datafusion_common::config::ParquetOptions]. +/// +/// Some options have convenience methods. For example [SessionConfig::with_batch_size] is +/// shorthand for setting `datafusion.execution.batch_size`. +/// +/// ``` +/// use datafusion_execution::config::SessionConfig; +/// use datafusion_common::ScalarValue; +/// +/// let config = SessionConfig::new() +/// .set("datafusion.execution.batch_size", ScalarValue::UInt64(Some(1234))) +/// .set_bool("datafusion.execution.parquet.pushdown_filters", true); +/// +/// assert_eq!(config.batch_size(), 1234); +/// assert_eq!(config.options().execution.batch_size, 1234); +/// assert_eq!(config.options().execution.parquet.pushdown_filters, true); +/// ``` +/// +/// You can also directly mutate the options via [SessionConfig::options_mut]. +/// So the following is equivalent to the above: +/// +/// ``` +/// # use datafusion_execution::config::SessionConfig; +/// # use datafusion_common::ScalarValue; +/// # +/// let mut config = SessionConfig::new(); +/// config.options_mut().execution.batch_size = 1234; +/// config.options_mut().execution.parquet.pushdown_filters = true; +/// # +/// # assert_eq!(config.batch_size(), 1234); +/// # assert_eq!(config.options().execution.batch_size, 1234); +/// # assert_eq!(config.options().execution.parquet.pushdown_filters, true); +/// ``` +/// +/// ## Built-in options +/// +/// | Namespace | Config struct | +/// | --------- | ------------- | +/// | `datafusion.catalog` | [CatalogOptions][datafusion_common::config::CatalogOptions] | +/// | `datafusion.execution` | [ExecutionOptions][datafusion_common::config::ExecutionOptions] | +/// | `datafusion.optimizer` | [OptimizerOptions][datafusion_common::config::OptimizerOptions] | +/// | `datafusion.sql_parser` | [SqlParserOptions][datafusion_common::config::SqlParserOptions] | +/// | `datafusion.explain` | [ExplainOptions][datafusion_common::config::ExplainOptions] | +/// +/// ## Custom configuration +/// +/// Configuration options can be extended. See [SessionConfig::with_extension] for details. +/// +/// [`SessionContext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html #[derive(Clone, Debug)] pub struct SessionConfig { /// Configuration options @@ -62,6 +121,35 @@ impl SessionConfig { Ok(ConfigOptions::from_string_hash_map(settings)?.into()) } + /// Return a handle to the configuration options. + /// + /// Can be used to read the current configuration. + /// + /// ``` + /// use datafusion_execution::config::SessionConfig; + /// + /// let config = SessionConfig::new(); + /// assert!(config.options().execution.batch_size > 0); + /// ``` + pub fn options(&self) -> &ConfigOptions { + &self.options + } + + /// Return a mutable handle to the configuration options. + /// + /// Can be used to set configuration options. + /// + /// ``` + /// use datafusion_execution::config::SessionConfig; + /// + /// let mut config = SessionConfig::new(); + /// config.options_mut().execution.batch_size = 1024; + /// assert_eq!(config.options().execution.batch_size, 1024); + /// ``` + pub fn options_mut(&mut self) -> &mut ConfigOptions { + &mut self.options + } + /// Set a configuration option pub fn set(mut self, key: &str, value: ScalarValue) -> Self { self.options.set(key, &value.to_string()).unwrap(); @@ -346,16 +434,6 @@ impl SessionConfig { &mut self.options } - /// Return a handle to the configuration options. - pub fn options(&self) -> &ConfigOptions { - &self.options - } - - /// Return a mutable handle to the configuration options. - pub fn options_mut(&mut self) -> &mut ConfigOptions { - &mut self.options - } - /// Add extensions. /// /// Extensions can be used to attach extra data to the session config -- e.g. tracing information or caches.