diff --git a/Makefile b/Makefile index a8ea67db..903eb020 100644 --- a/Makefile +++ b/Makefile @@ -6,3 +6,9 @@ EXT_CONFIG=${PROJ_DIR}extension_config.cmake # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile + +#### Override the included format target because we have different source tree layout +format: + find spatial/src -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i + find spatial/include -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i + cmake-format -i CMakeLists.txt \ No newline at end of file diff --git a/spatial/include/spatial/gdal/file_handler.hpp b/spatial/include/spatial/gdal/file_handler.hpp index cd3fd9bc..2143ae93 100644 --- a/spatial/include/spatial/gdal/file_handler.hpp +++ b/spatial/include/spatial/gdal/file_handler.hpp @@ -10,6 +10,7 @@ namespace gdal { class DuckDBFileSystemHandler; class GDALClientContextState : public ClientContextState { + ClientContext &context; string client_prefix; DuckDBFileSystemHandler *fs_handler; diff --git a/spatial/src/spatial/core/io/osm/st_read_osm.cpp b/spatial/src/spatial/core/io/osm/st_read_osm.cpp index 5aa49ddb..751ed337 100644 --- a/spatial/src/spatial/core/io/osm/st_read_osm.cpp +++ b/spatial/src/spatial/core/io/osm/st_read_osm.cpp @@ -84,11 +84,6 @@ static unique_ptr Bind(ClientContext &context, TableFunctionBindIn names.push_back("ref_types"); // Create bind data - auto &config = DBConfig::GetConfig(context); - if (!config.options.enable_external_access) { - throw PermissionException("Scanning OSM files is disabled through configuration"); - } - auto file_name = StringValue::Get(input.inputs[0]); auto result = make_uniq(file_name); return std::move(result); diff --git a/spatial/src/spatial/gdal/file_handler.cpp b/spatial/src/spatial/gdal/file_handler.cpp index bde8661b..6f04630b 100644 --- a/spatial/src/spatial/gdal/file_handler.cpp +++ b/spatial/src/spatial/gdal/file_handler.cpp @@ -24,7 +24,8 @@ class DuckDBFileHandle : public VSIVirtualHandle { bool is_eof; public: - explicit DuckDBFileHandle(unique_ptr file_handle_p) : file_handle(std::move(file_handle_p)), is_eof(false) { + explicit DuckDBFileHandle(unique_ptr file_handle_p) + : file_handle(std::move(file_handle_p)), is_eof(false) { } vsi_l_offset Tell() override { @@ -216,16 +217,24 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler { return nullptr; } - // Fall back to GDAL instead - auto handler = VSIFileManager::GetHandler(file_name); - if (handler) { - return handler->Open(file_name, access); - } else { + // Fall back to GDAL instead (if external access is enabled) + if (!context.db->config.options.enable_external_access) { + if (bSetError) { + VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled", + file_name); + } + return nullptr; + } + + const auto handler = VSIFileManager::GetHandler(file_name); + if (!handler) { if (bSetError) { VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); } return nullptr; } + + return handler->Open(file_name, access); } } @@ -377,7 +386,7 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler { // use their own attached file systems. This is necessary because GDAL is // not otherwise aware of the connection context. // -GDALClientContextState::GDALClientContextState(ClientContext &context) { +GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) { // Create a new random prefix for this client client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); @@ -387,6 +396,8 @@ GDALClientContextState::GDALClientContextState(ClientContext &context) { // Register the file handler VSIFileManager::InstallHandler(client_prefix, fs_handler); + + // Also pass a reference to the client context } GDALClientContextState::~GDALClientContextState() { @@ -404,6 +415,9 @@ void GDALClientContextState::QueryEnd() { string GDALClientContextState::GetPrefix(const string &value) const { // If the user explicitly asked for a VSI prefix, we don't add our own if (StringUtil::StartsWith(value, "/vsi")) { + if (!context.db->config.options.enable_external_access) { + throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); + } return value; } return client_prefix + value; diff --git a/spatial/src/spatial/gdal/functions/st_read.cpp b/spatial/src/spatial/gdal/functions/st_read.cpp index 600fad8c..2394b435 100644 --- a/spatial/src/spatial/gdal/functions/st_read.cpp +++ b/spatial/src/spatial/gdal/functions/st_read.cpp @@ -61,54 +61,6 @@ static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter } } -// TODO: Verify that this actually corresponds to the same sql subset expected by OGR SQL -static string FilterToGdal(const TableFilter &filter, const string &column_name) { - - switch (filter.filter_type) { - case TableFilterType::CONSTANT_COMPARISON: { - auto &constant_filter = filter.Cast(); - return KeywordHelper::WriteOptionallyQuoted(column_name) + - ExpressionTypeToOperator(constant_filter.comparison_type) + constant_filter.constant.ToSQLString(); - } - case TableFilterType::CONJUNCTION_AND: { - auto &and_filter = filter.Cast(); - vector filters; - for (const auto &child_filter : and_filter.child_filters) { - filters.push_back(FilterToGdal(*child_filter, column_name)); - } - return StringUtil::Join(filters, " AND "); - } - case TableFilterType::CONJUNCTION_OR: { - auto &or_filter = filter.Cast(); - vector filters; - for (const auto &child_filter : or_filter.child_filters) { - filters.push_back(FilterToGdal(*child_filter, column_name)); - } - return StringUtil::Join(filters, " OR "); - } - case TableFilterType::IS_NOT_NULL: { - return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NOT NULL"; - } - case TableFilterType::IS_NULL: { - return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NULL"; - } - default: - throw NotImplementedException("FilterToGdal: filter type not implemented"); - } -} - -static string FilterToGdal(const TableFilterSet &set, const vector &column_ids, - const vector &column_names) { - - vector filters; - for (auto &input_filter : set.filters) { - auto col_idx = column_ids[input_filter.first]; - auto &col_name = column_names[col_idx]; - filters.push_back(FilterToGdal(*input_filter.second, col_name)); - } - return StringUtil::Join(filters, " AND "); -} - struct GdalScanFunctionData : public TableFunctionData { int layer_idx; bool sequential_layer_scan = false; @@ -153,11 +105,6 @@ struct GdalScanGlobalState : ArrowScanGlobalState { unique_ptr GdalTableFunction::Bind(ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names) { - auto &config = DBConfig::GetConfig(context); - if (!config.options.enable_external_access) { - throw PermissionException("Scanning GDAL files is disabled through configuration"); - } - auto result = make_uniq(); // First scan for "options" parameter @@ -470,13 +417,6 @@ unique_ptr GdalTableFunction::InitGlobal(ClientContext TryApplySpatialFilter(layer, data.spatial_filter.get()); // TODO: Apply projection pushdown - // Apply predicate pushdown - // We simply create a string out of the predicates and pass it to GDAL. - if (input.filters) { - auto filter_clause = FilterToGdal(*input.filters, input.column_ids, data.all_names); - layer->SetAttributeFilter(filter_clause.c_str()); - } - // Create arrow stream from layer gstate.stream = make_uniq(); @@ -678,7 +618,6 @@ void GdalTableFunction::Register(DatabaseInstance &db) { scan.get_partition_data = ArrowTableFunction::ArrowGetPartitionData; scan.projection_pushdown = true; - scan.filter_pushdown = true; scan.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); scan.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); diff --git a/test/sql/gdal/gdal_vsi.test b/test/sql/gdal/gdal_vsi.test index 00bbdf94..c80a9b44 100644 --- a/test/sql/gdal/gdal_vsi.test +++ b/test/sql/gdal/gdal_vsi.test @@ -5,3 +5,14 @@ query I SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz'); ---- 50 + + +# Disable external access +statement ok +set enable_external_access = false; + +# Test read via VSI +statement error +SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz'); +---- +with VSI prefix: External access is disabled \ No newline at end of file