Skip to content

Commit

Permalink
docs: document SessionConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
wjones127 committed Jan 11, 2024
1 parent e862539 commit d1dbe78
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 12 deletions.
30 changes: 29 additions & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ macro_rules! config_namespace {

config_namespace! {
/// Options related to catalog and directory scanning
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct CatalogOptions {
/// Whether the default catalog and schema should be created automatically.
pub create_default_catalog_and_schema: bool, default = true
Expand Down Expand Up @@ -180,6 +184,10 @@ config_namespace! {

config_namespace! {
/// Options related to SQL parser
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct SqlParserOptions {
/// When set to true, SQL parser will parse float as decimal type
pub parse_float_as_decimal: bool, default = false
Expand All @@ -196,6 +204,10 @@ config_namespace! {

config_namespace! {
/// Options related to query execution
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct ExecutionOptions {
/// Default batch size while creating new batches, it's especially useful for
/// buffer-in-memory batches since creating tiny batches would result in too much
Expand Down Expand Up @@ -283,6 +295,10 @@ config_namespace! {

config_namespace! {
/// Options related to parquet files
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct ParquetOptions {
/// If true, reads the Parquet data page level metadata (the
/// Page Index), if present, to reduce the I/O and number of
Expand All @@ -306,7 +322,7 @@ config_namespace! {
pub metadata_size_hint: Option<usize>, default = None

/// If true, filter expressions are be applied during the parquet decoding operation to
/// reduce the number of rows decoded
/// reduce the number of rows decoded. This optimization is sometimes called "late materialization".
pub pushdown_filters: bool, default = false

/// If true, filter expressions evaluated during the parquet decoding operation
Expand Down Expand Up @@ -416,6 +432,10 @@ config_namespace! {

config_namespace! {
/// Options related to aggregate execution
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct AggregateOptions {
/// Specifies the threshold for using `ScalarValue`s to update
/// accumulators during high-cardinality aggregations for each input batch.
Expand All @@ -433,6 +453,10 @@ config_namespace! {

config_namespace! {
/// Options related to query optimization
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct OptimizerOptions {
/// When set to true, the optimizer will push a limit operation into
/// grouped aggregations which have no aggregate expressions, as a soft limit,
Expand Down Expand Up @@ -541,6 +565,10 @@ config_namespace! {

config_namespace! {
/// Options controlling explain output
///
/// See also: [`SessionConfig`]
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct ExplainOptions {
/// When set to true, the explain statement will only print logical plans
pub logical_plan_only: bool, default = false
Expand Down
100 changes: 89 additions & 11 deletions datafusion/execution/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,66 @@ use std::{

use datafusion_common::{config::ConfigOptions, Result, ScalarValue};

/// Configuration options for Execution context
/// Configuration options for [`SessionContext`].
///
/// Can be passed to `SessionContext::with_config` to customize the configuration of DataFusion.
///
/// Options can be set using namespaces keys with `.` as the separator, where the
/// namespace determines which configuration struct the value to routed to. All
/// built-in options are under the `datafusion` namespace.
///
/// For example, the key `datafusion.execution.batch_size` will set [ExecutionOptions::batch_size][datafusion_common::config::ExecutionOptions::batch_size],
/// because [ConfigOptions::execution] is [ExecutionOptions][datafusion_common::config::ExecutionOptions]. Similarly, the key
/// `datafusion.execution.parquet.pushdown_filters` will set [ParquetOptions::pushdown_filters][datafusion_common::config::ParquetOptions::pushdown_filters],
/// since [ExecutionOptions::parquet][datafusion_common::config::ExecutionOptions::parquet] is [ParquetOptions][datafusion_common::config::ParquetOptions].
///
/// Some options have convenience methods. For example [SessionConfig::with_batch_size] is
/// shorthand for setting `datafusion.execution.batch_size`.
///
/// ```
/// use datafusion_execution::config::SessionConfig;
/// use datafusion_common::ScalarValue;
///
/// let config = SessionConfig::new()
/// .set("datafusion.execution.batch_size", ScalarValue::UInt64(Some(1234)))
/// .set_bool("datafusion.execution.parquet.pushdown_filters", true);
///
/// assert_eq!(config.batch_size(), 1234);
/// assert_eq!(config.options().execution.batch_size, 1234);
/// assert_eq!(config.options().execution.parquet.pushdown_filters, true);
/// ```
///
/// You can also directly mutate the options via [SessionConfig::options_mut].
/// So the following is equivalent to the above:
///
/// ```
/// # use datafusion_execution::config::SessionConfig;
/// # use datafusion_common::ScalarValue;
/// #
/// let mut config = SessionConfig::new();
/// config.options_mut().execution.batch_size = 1234;
/// config.options_mut().execution.parquet.pushdown_filters = true;
/// #
/// # assert_eq!(config.batch_size(), 1234);
/// # assert_eq!(config.options().execution.batch_size, 1234);
/// # assert_eq!(config.options().execution.parquet.pushdown_filters, true);
/// ```
///
/// ## Built-in options
///
/// | Namespace | Config struct |
/// | --------- | ------------- |
/// | `datafusion.catalog` | [CatalogOptions][datafusion_common::config::CatalogOptions] |
/// | `datafusion.execution` | [ExecutionOptions][datafusion_common::config::ExecutionOptions] |
/// | `datafusion.optimizer` | [OptimizerOptions][datafusion_common::config::OptimizerOptions] |
/// | `datafusion.sql_parser` | [SqlParserOptions][datafusion_common::config::SqlParserOptions] |
/// | `datafusion.explain` | [ExplainOptions][datafusion_common::config::ExplainOptions] |
///
/// ## Custom configuration
///
/// Configuration options can be extended. See [SessionConfig::with_extension] for details.
///
/// [`SessionContext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
#[derive(Clone, Debug)]
pub struct SessionConfig {
/// Configuration options
Expand Down Expand Up @@ -62,6 +121,35 @@ impl SessionConfig {
Ok(ConfigOptions::from_string_hash_map(settings)?.into())
}

/// Return a handle to the configuration options.
///
/// Can be used to read the current configuration.
///
/// ```
/// use datafusion_execution::config::SessionConfig;
///
/// let config = SessionConfig::new();
/// assert!(config.options().execution.batch_size > 0);
/// ```
pub fn options(&self) -> &ConfigOptions {
&self.options
}

/// Return a mutable handle to the configuration options.
///
/// Can be used to set configuration options.
///
/// ```
/// use datafusion_execution::config::SessionConfig;
///
/// let mut config = SessionConfig::new();
/// config.options_mut().execution.batch_size = 1024;
/// assert_eq!(config.options().execution.batch_size, 1024);
/// ```
pub fn options_mut(&mut self) -> &mut ConfigOptions {
&mut self.options
}

/// Set a configuration option
pub fn set(mut self, key: &str, value: ScalarValue) -> Self {
self.options.set(key, &value.to_string()).unwrap();
Expand Down Expand Up @@ -346,16 +434,6 @@ impl SessionConfig {
&mut self.options
}

/// Return a handle to the configuration options.
pub fn options(&self) -> &ConfigOptions {
&self.options
}

/// Return a mutable handle to the configuration options.
pub fn options_mut(&mut self) -> &mut ConfigOptions {
&mut self.options
}

/// Add extensions.
///
/// Extensions can be used to attach extra data to the session config -- e.g. tracing information or caches.
Expand Down

0 comments on commit d1dbe78

Please sign in to comment.