Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move default catalog and schema onto ConfigOptions (#3887) #4805

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions datafusion/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ config_namespace! {
/// concurrency. Defaults to the number of cpu cores on the system.
pub create_default_catalog_and_schema: bool, default = true

/// The default catalog name - this impacts what SQL queries use if not specified
pub default_catalog: String, default = "datafusion".to_string()

/// The default schema name - this impacts what SQL queries use if not specified
pub default_schema: String, default = "public".to_string()

/// Should DataFusion provide access to `information_schema`
/// virtual tables for displaying schema information
pub information_schema: bool, default = false
Expand Down
55 changes: 29 additions & 26 deletions datafusion/core/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,6 @@ use super::options::{
AvroReadOptions, CsvReadOptions, NdJsonReadOptions, ParquetReadOptions,
};

/// The default catalog name - this impacts what SQL queries use if not specified
const DEFAULT_CATALOG: &str = "datafusion";
/// The default schema name - this impacts what SQL queries use if not specified
const DEFAULT_SCHEMA: &str = "public";

/// SessionContext is the main interface for executing queries with DataFusion. It stands for
/// the connection between user and DataFusion/Ballista cluster.
/// The context provides the following functionality
Expand Down Expand Up @@ -380,18 +375,32 @@ impl SessionContext {
// so for now, we default to default catalog
let tokens: Vec<&str> = schema_name.split('.').collect();
let (catalog, schema_name) = match tokens.len() {
1 => Ok((DEFAULT_CATALOG, schema_name.as_str())),
2 => Ok((tokens[0], tokens[1])),
_ => Err(DataFusionError::Execution(format!(
"Unable to parse catalog from {schema_name}"
))),
}?;
let catalog = self.catalog(catalog).ok_or_else(|| {
1 => {
let state = self.state.read();
let name = &state.config.options.catalog.default_catalog;
let catalog =
state.catalog_list.catalog(name).ok_or_else(|| {
DataFusionError::Execution(format!(
"Missing '{DEFAULT_CATALOG}' catalog"
"Missing default catalog '{name}'"
))
})?;

(catalog, tokens[0])
}
2 => {
let name = &tokens[0];
let catalog = self.catalog(name).ok_or_else(|| {
DataFusionError::Execution(format!(
"Missing catalog '{name}'"
))
})?;
(catalog, tokens[1])
}
_ => {
return Err(DataFusionError::Execution(format!(
"Unable to parse catalog from {schema_name}"
)))
}
};
let schema = catalog.schema(schema_name);

match (if_not_exists, schema) {
Expand Down Expand Up @@ -1097,11 +1106,6 @@ impl Hasher for IdHasher {
/// Configuration options for session context
#[derive(Clone)]
pub struct SessionConfig {
/// Default catalog name for table resolution
default_catalog: String,
/// Default schema name for table resolution (not in ConfigOptions
/// due to `resolve_table_ref` which passes back references)
default_schema: String,
/// Configuration options
options: ConfigOptions,
/// Opaque extensions.
Expand All @@ -1111,8 +1115,6 @@ pub struct SessionConfig {
impl Default for SessionConfig {
fn default() -> Self {
Self {
default_catalog: DEFAULT_CATALOG.to_owned(),
default_schema: DEFAULT_SCHEMA.to_owned(),
options: ConfigOptions::new(),
// Assume no extensions by default.
extensions: HashMap::with_capacity_and_hasher(
Expand Down Expand Up @@ -1218,8 +1220,8 @@ impl SessionConfig {
catalog: impl Into<String>,
schema: impl Into<String>,
) -> Self {
self.default_catalog = catalog.into();
self.default_schema = schema.into();
self.options.catalog.default_catalog = catalog.into();
self.options.catalog.default_schema = schema.into();
self
}

Expand Down Expand Up @@ -1434,15 +1436,15 @@ impl SessionState {

default_catalog
.register_schema(
&config.default_schema,
&config.config_options().catalog.default_schema,
Arc::new(MemorySchemaProvider::new()),
)
.expect("memory catalog provider can register schema");

Self::register_default_schema(&config, &runtime, &default_catalog);

catalog_list.register_catalog(
config.default_catalog.clone(),
config.config_options().catalog.default_catalog.clone(),
Arc::new(default_catalog),
);
}
Expand Down Expand Up @@ -1564,9 +1566,10 @@ impl SessionState {
&'a self,
table_ref: impl Into<TableReference<'a>>,
) -> ResolvedTableReference<'a> {
let catalog = &self.config_options().catalog;
table_ref
.into()
.resolve(&self.config.default_catalog, &self.config.default_schema)
.resolve(&catalog.default_catalog, &catalog.default_schema)
}

fn schema_for_ref<'a>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ query R
SHOW ALL
----
datafusion.catalog.create_default_catalog_and_schema true
datafusion.catalog.default_catalog datafusion
datafusion.catalog.default_schema public
datafusion.catalog.format NULL
datafusion.catalog.has_header false
datafusion.catalog.information_schema true
Expand Down
4 changes: 3 additions & 1 deletion docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ If the value in the environment variable cannot be cast to the type of the confi
Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions.

| key | default | description |
| --------------------------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| --------------------------------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| datafusion.catalog.create_default_catalog_and_schema | true | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of cpu cores on the system. |
| datafusion.catalog.default_catalog | datafusion | The default catalog name - this impacts what SQL queries use if not specified |
| datafusion.catalog.default_schema | public | The default schema name - this impacts what SQL queries use if not specified |
| datafusion.catalog.information_schema | false | Should DataFusion provide access to `information_schema` virtual tables for displaying schema information |
| datafusion.catalog.location | NULL | Location scanned to load tables for `default` schema |
| datafusion.catalog.format | NULL | Type of `TableProvider` to use when loading `default` schema |
Expand Down