diff --git a/core/dbio/iop/duckdb.go b/core/dbio/iop/duckdb.go index d4ee48ef..4af29ce7 100644 --- a/core/dbio/iop/duckdb.go +++ b/core/dbio/iop/duckdb.go @@ -133,8 +133,8 @@ func (duck *DuckDb) PrepareFsSecretAndURI(uri string) string { "ENDPOINT": "ENDPOINT", } - if strings.Contains(fsProps["endpoint"], "r2.cloudflarestorage.com") { - accountID := strings.Split(fsProps["endpoint"], ".")[0] + if strings.Contains(fsProps["ENDPOINT"], "r2.cloudflarestorage.com") { + accountID := strings.Split(fsProps["ENDPOINT"], ".")[0] secretProps = append(secretProps, "ACCOUNT_ID "+accountID) secretProps = append(secretProps, "TYPE R2") scopeScheme = "r2" @@ -143,6 +143,16 @@ func (duck *DuckDb) PrepareFsSecretAndURI(uri string) string { secretProps = append(secretProps, "TYPE S3") } + // clean up endpoint + if endpoint := fsProps["ENDPOINT"]; strings.HasPrefix(endpoint, "http") { + fsProps["ENDPOINT"] = strings.TrimPrefix(endpoint, "https://") + fsProps["ENDPOINT"] = strings.TrimPrefix(endpoint, "http://") + } + + // add default provider chain (https://duckdb.org/docs/extensions/httpfs/s3api.html#credential_chain-provider) + secretSQL := dbio.TypeDbDuckDb.GetTemplateValue("core.default_s3_secret") + duck.secrets = append(duck.secrets, secretSQL) + case dbio.TypeFileGoogle: secretKeyMap = map[string]string{ "ACCESS_KEY_ID": "KEY_ID", diff --git a/core/dbio/templates/duckdb.yaml b/core/dbio/templates/duckdb.yaml index a2af753e..4bbdeac8 100755 --- a/core/dbio/templates/duckdb.yaml +++ b/core/dbio/templates/duckdb.yaml @@ -33,6 +33,12 @@ core: write_partition_columns {write_partition_columns}, partition_by ( {partition_columns} ) ) + default_s3_secret: | + create or replace secret s3_default ( + type s3, + provider credential_chain, + chain 'env;config' + ) metadata: @@ -174,7 +180,7 @@ function: iceberg_scanner: iceberg_scan('{uri}', allow_moved_paths = true) delta_scanner: delta_scan('{uri}') - parquet_scanner: read_parquet([{uris}]) + parquet_scanner: read_parquet([{uris}], filename = true) # csv_scanner: read_csv('{uri}', delim='{delimiter}', header={header}, columns={columns}, max_line_size=134217728, parallel=true, quote='{quote}', escape='{escape}', nullstr='{null_if}') csv_scanner: read_csv([{uris}], delim='{delimiter}', header={header}, max_line_size=134217728, parallel=true, quote='{quote}', escape='{escape}', nullstr='{null_if}')