From 17619d6af9ce49458d7cdc1fe314910792179ac7 Mon Sep 17 00:00:00 2001 From: emcake <3726783+emcake@users.noreply.github.com> Date: Tue, 16 Jan 2024 10:33:21 +0000 Subject: [PATCH 1/4] reinstate checks for copy-if-not-exists --- crates/deltalake-aws/src/lib.rs | 7 +++++++ crates/deltalake-aws/src/storage.rs | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/crates/deltalake-aws/src/lib.rs b/crates/deltalake-aws/src/lib.rs index dd33854456..dc598cf084 100644 --- a/crates/deltalake-aws/src/lib.rs +++ b/crates/deltalake-aws/src/lib.rs @@ -43,6 +43,13 @@ impl LogStoreFactory for S3LogStoreFactory { let store = url_prefix_handler(store, Path::parse(location.path())?)?; let s3_options = S3StorageOptions::from_map(&options.0); + if s3_options.copy_if_not_exists.is_some() { + debug!("S3LogStoreFactory has been asked to create a LogStore where the underlying store has copy-if-not-exists enabled - no locking provider required"); + return Ok(deltalake_core::logstore::default_logstore( + store, location, options, + )); + } + if s3_options.locking_provider.as_deref() != Some("dynamodb") { debug!("S3LogStoreFactory has been asked to create a LogStore without the dynamodb locking provider"); return Ok(deltalake_core::logstore::default_logstore( diff --git a/crates/deltalake-aws/src/storage.rs b/crates/deltalake-aws/src/storage.rs index b71d17bb64..4d914a073e 100644 --- a/crates/deltalake-aws/src/storage.rs +++ b/crates/deltalake-aws/src/storage.rs @@ -57,6 +57,10 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { )?; let options = S3StorageOptions::from_map(&options.0); + if options.copy_if_not_exists.is_some() { + // If the copy-if-not-exists env var is set, we don't need to instantiate a locking client or check for allow-unsafe-rename. + return Ok((Arc::new(*store), prefix)); + } let store = S3StorageBackend::try_new( store.into(), Some("dynamodb") == options.locking_provider.as_deref() || options.allow_unsafe_rename, @@ -87,6 +91,7 @@ pub struct S3StorageOptions { pub sts_pool_idle_timeout: Duration, pub s3_get_internal_server_error_retries: usize, pub allow_unsafe_rename: bool, + pub copy_if_not_exists: Option, pub extra_opts: HashMap, } @@ -141,6 +146,8 @@ impl S3StorageOptions { .map(|val| str_is_truthy(&val)) .unwrap_or(false); + let copy_if_not_exists = str_option(options, s3_constants::AWS_COPY_IF_NOT_EXISTS); + Self { endpoint_url, region, @@ -158,6 +165,7 @@ impl S3StorageOptions { s3_get_internal_server_error_retries, allow_unsafe_rename, extra_opts, + copy_if_not_exists, } } @@ -366,6 +374,10 @@ pub mod s3_constants { /// Only safe if there is one writer to a given table. pub const AWS_S3_ALLOW_UNSAFE_RENAME: &str = "AWS_S3_ALLOW_UNSAFE_RENAME"; + /// If set, specifies how to enable copy-if-not-exists on the S3 endpoint. Allows + /// bypassing the use of a lock client like DynamoDB. + pub const AWS_COPY_IF_NOT_EXISTS: &str = "AWS_COPY_IF_NOT_EXISTS"; + /// The list of option keys owned by the S3 module. /// Option keys not contained in this list will be added to the `extra_opts` /// field of [crate::storage::s3::S3StorageOptions]. @@ -442,6 +454,7 @@ mod tests { s3_get_internal_server_error_retries: 10, extra_opts: HashMap::new(), allow_unsafe_rename: false, + copy_if_not_exists: None }, options ); @@ -511,6 +524,7 @@ mod tests { s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string() }, allow_unsafe_rename: false, + copy_if_not_exists: None }, options ); @@ -563,6 +577,7 @@ mod tests { s3_get_internal_server_error_retries: 3, extra_opts: hashmap! {}, allow_unsafe_rename: false, + copy_if_not_exists: None }, options ); From 1451cfd4fcf1ef31a1ac74933ab34158eaa916ef Mon Sep 17 00:00:00 2001 From: emcake <3726783+emcake@users.noreply.github.com> Date: Tue, 16 Jan 2024 11:00:16 +0000 Subject: [PATCH 2/4] fix --- crates/deltalake-aws/src/storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/deltalake-aws/src/storage.rs b/crates/deltalake-aws/src/storage.rs index 4d914a073e..397db5d36d 100644 --- a/crates/deltalake-aws/src/storage.rs +++ b/crates/deltalake-aws/src/storage.rs @@ -59,7 +59,7 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { let options = S3StorageOptions::from_map(&options.0); if options.copy_if_not_exists.is_some() { // If the copy-if-not-exists env var is set, we don't need to instantiate a locking client or check for allow-unsafe-rename. - return Ok((Arc::new(*store), prefix)); + return Ok((Arc::from(store), prefix)); } let store = S3StorageBackend::try_new( store.into(), From 1d22683d7dbcaf5350e92a65e523ca6f032c67cd Mon Sep 17 00:00:00 2001 From: emcake <3726783+emcake@users.noreply.github.com> Date: Tue, 16 Jan 2024 12:57:22 +0000 Subject: [PATCH 3/4] check via the config key rather than S3StorageOptions --- crates/deltalake-aws/src/lib.rs | 9 +++++++-- crates/deltalake-aws/src/storage.rs | 16 +++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/crates/deltalake-aws/src/lib.rs b/crates/deltalake-aws/src/lib.rs index dc598cf084..2630f80512 100644 --- a/crates/deltalake-aws/src/lib.rs +++ b/crates/deltalake-aws/src/lib.rs @@ -5,6 +5,7 @@ pub mod logstore; pub mod storage; use lazy_static::lazy_static; +use object_store::aws::AmazonS3ConfigKey; use regex::Regex; use std::{ collections::HashMap, @@ -41,15 +42,19 @@ impl LogStoreFactory for S3LogStoreFactory { options: &StorageOptions, ) -> DeltaResult> { let store = url_prefix_handler(store, Path::parse(location.path())?)?; - let s3_options = S3StorageOptions::from_map(&options.0); - if s3_options.copy_if_not_exists.is_some() { + if options + .0 + .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref()) + { debug!("S3LogStoreFactory has been asked to create a LogStore where the underlying store has copy-if-not-exists enabled - no locking provider required"); return Ok(deltalake_core::logstore::default_logstore( store, location, options, )); } + let s3_options = S3StorageOptions::from_map(&options.0); + if s3_options.locking_provider.as_deref() != Some("dynamodb") { debug!("S3LogStoreFactory has been asked to create a LogStore without the dynamodb locking provider"); return Ok(deltalake_core::logstore::default_logstore( diff --git a/crates/deltalake-aws/src/storage.rs b/crates/deltalake-aws/src/storage.rs index 397db5d36d..f0da3dea80 100644 --- a/crates/deltalake-aws/src/storage.rs +++ b/crates/deltalake-aws/src/storage.rs @@ -56,11 +56,16 @@ impl ObjectStoreFactory for S3ObjectStoreFactory { }), )?; - let options = S3StorageOptions::from_map(&options.0); - if options.copy_if_not_exists.is_some() { + if options + .0 + .contains_key(AmazonS3ConfigKey::CopyIfNotExists.as_ref()) + { // If the copy-if-not-exists env var is set, we don't need to instantiate a locking client or check for allow-unsafe-rename. return Ok((Arc::from(store), prefix)); } + + let options = S3StorageOptions::from_map(&options.0); + let store = S3StorageBackend::try_new( store.into(), Some("dynamodb") == options.locking_provider.as_deref() || options.allow_unsafe_rename, @@ -91,7 +96,6 @@ pub struct S3StorageOptions { pub sts_pool_idle_timeout: Duration, pub s3_get_internal_server_error_retries: usize, pub allow_unsafe_rename: bool, - pub copy_if_not_exists: Option, pub extra_opts: HashMap, } @@ -146,8 +150,6 @@ impl S3StorageOptions { .map(|val| str_is_truthy(&val)) .unwrap_or(false); - let copy_if_not_exists = str_option(options, s3_constants::AWS_COPY_IF_NOT_EXISTS); - Self { endpoint_url, region, @@ -165,7 +167,6 @@ impl S3StorageOptions { s3_get_internal_server_error_retries, allow_unsafe_rename, extra_opts, - copy_if_not_exists, } } @@ -454,7 +455,6 @@ mod tests { s3_get_internal_server_error_retries: 10, extra_opts: HashMap::new(), allow_unsafe_rename: false, - copy_if_not_exists: None }, options ); @@ -524,7 +524,6 @@ mod tests { s3_constants::AWS_S3_ADDRESSING_STYLE.to_string() => "virtual".to_string() }, allow_unsafe_rename: false, - copy_if_not_exists: None }, options ); @@ -577,7 +576,6 @@ mod tests { s3_get_internal_server_error_retries: 3, extra_opts: hashmap! {}, allow_unsafe_rename: false, - copy_if_not_exists: None }, options ); From 3578ef59dfa4097b7311e33e359a530ee0a0b04f Mon Sep 17 00:00:00 2001 From: emcake <3726783+emcake@users.noreply.github.com> Date: Tue, 16 Jan 2024 12:58:18 +0000 Subject: [PATCH 4/4] remove extra config key --- crates/deltalake-aws/src/storage.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/deltalake-aws/src/storage.rs b/crates/deltalake-aws/src/storage.rs index f0da3dea80..87d488b54f 100644 --- a/crates/deltalake-aws/src/storage.rs +++ b/crates/deltalake-aws/src/storage.rs @@ -375,10 +375,6 @@ pub mod s3_constants { /// Only safe if there is one writer to a given table. pub const AWS_S3_ALLOW_UNSAFE_RENAME: &str = "AWS_S3_ALLOW_UNSAFE_RENAME"; - /// If set, specifies how to enable copy-if-not-exists on the S3 endpoint. Allows - /// bypassing the use of a lock client like DynamoDB. - pub const AWS_COPY_IF_NOT_EXISTS: &str = "AWS_COPY_IF_NOT_EXISTS"; - /// The list of option keys owned by the S3 module. /// Option keys not contained in this list will be added to the `extra_opts` /// field of [crate::storage::s3::S3StorageOptions].