Skip to content

Commit

Permalink
only check external access flag explicitly when accessing raw gdal vs…
Browse files Browse the repository at this point in the history
…i handlers
  • Loading branch information
Maxxen committed Dec 5, 2024
1 parent cd02956 commit 4cda975
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 73 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ EXT_CONFIG=${PROJ_DIR}extension_config.cmake

# Include the Makefile from extension-ci-tools
include extension-ci-tools/makefiles/duckdb_extension.Makefile

#### Override the included format target because we have different source tree layout
format:
find spatial/src -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
find spatial/include -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
cmake-format -i CMakeLists.txt
1 change: 1 addition & 0 deletions spatial/include/spatial/gdal/file_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace gdal {
class DuckDBFileSystemHandler;

class GDALClientContextState : public ClientContextState {
ClientContext &context;
string client_prefix;
DuckDBFileSystemHandler *fs_handler;

Expand Down
5 changes: 0 additions & 5 deletions spatial/src/spatial/core/io/osm/st_read_osm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,6 @@ static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindIn
names.push_back("ref_types");

// Create bind data
auto &config = DBConfig::GetConfig(context);
if (!config.options.enable_external_access) {
throw PermissionException("Scanning OSM files is disabled through configuration");
}

auto file_name = StringValue::Get(input.inputs[0]);
auto result = make_uniq<BindData>(file_name);
return std::move(result);
Expand Down
28 changes: 21 additions & 7 deletions spatial/src/spatial/gdal/file_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class DuckDBFileHandle : public VSIVirtualHandle {
bool is_eof;

public:
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(std::move(file_handle_p)), is_eof(false) {
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p)
: file_handle(std::move(file_handle_p)), is_eof(false) {
}

vsi_l_offset Tell() override {
Expand Down Expand Up @@ -216,16 +217,24 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
return nullptr;
}

// Fall back to GDAL instead
auto handler = VSIFileManager::GetHandler(file_name);
if (handler) {
return handler->Open(file_name, access);
} else {
// Fall back to GDAL instead (if external access is enabled)
if (!context.db->config.options.enable_external_access) {
if (bSetError) {
VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled",
file_name);
}
return nullptr;
}

const auto handler = VSIFileManager::GetHandler(file_name);
if (!handler) {
if (bSetError) {
VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what());
}
return nullptr;
}

return handler->Open(file_name, access);
}
}

Expand Down Expand Up @@ -377,7 +386,7 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
// use their own attached file systems. This is necessary because GDAL is
// not otherwise aware of the connection context.
//
GDALClientContextState::GDALClientContextState(ClientContext &context) {
GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) {

// Create a new random prefix for this client
client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID()));
Expand All @@ -387,6 +396,8 @@ GDALClientContextState::GDALClientContextState(ClientContext &context) {

// Register the file handler
VSIFileManager::InstallHandler(client_prefix, fs_handler);

// Also pass a reference to the client context
}

GDALClientContextState::~GDALClientContextState() {
Expand All @@ -404,6 +415,9 @@ void GDALClientContextState::QueryEnd() {
string GDALClientContextState::GetPrefix(const string &value) const {
// If the user explicitly asked for a VSI prefix, we don't add our own
if (StringUtil::StartsWith(value, "/vsi")) {
if (!context.db->config.options.enable_external_access) {
throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value);
}
return value;
}
return client_prefix + value;
Expand Down
61 changes: 0 additions & 61 deletions spatial/src/spatial/gdal/functions/st_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,54 +61,6 @@ static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter
}
}

// TODO: Verify that this actually corresponds to the same sql subset expected by OGR SQL
static string FilterToGdal(const TableFilter &filter, const string &column_name) {

switch (filter.filter_type) {
case TableFilterType::CONSTANT_COMPARISON: {
auto &constant_filter = filter.Cast<ConstantFilter>();
return KeywordHelper::WriteOptionallyQuoted(column_name) +
ExpressionTypeToOperator(constant_filter.comparison_type) + constant_filter.constant.ToSQLString();
}
case TableFilterType::CONJUNCTION_AND: {
auto &and_filter = filter.Cast<ConjunctionAndFilter>();
vector<string> filters;
for (const auto &child_filter : and_filter.child_filters) {
filters.push_back(FilterToGdal(*child_filter, column_name));
}
return StringUtil::Join(filters, " AND ");
}
case TableFilterType::CONJUNCTION_OR: {
auto &or_filter = filter.Cast<ConjunctionOrFilter>();
vector<string> filters;
for (const auto &child_filter : or_filter.child_filters) {
filters.push_back(FilterToGdal(*child_filter, column_name));
}
return StringUtil::Join(filters, " OR ");
}
case TableFilterType::IS_NOT_NULL: {
return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NOT NULL";
}
case TableFilterType::IS_NULL: {
return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NULL";
}
default:
throw NotImplementedException("FilterToGdal: filter type not implemented");
}
}

static string FilterToGdal(const TableFilterSet &set, const vector<idx_t> &column_ids,
const vector<string> &column_names) {

vector<string> filters;
for (auto &input_filter : set.filters) {
auto col_idx = column_ids[input_filter.first];
auto &col_name = column_names[col_idx];
filters.push_back(FilterToGdal(*input_filter.second, col_name));
}
return StringUtil::Join(filters, " AND ");
}

struct GdalScanFunctionData : public TableFunctionData {
int layer_idx;
bool sequential_layer_scan = false;
Expand Down Expand Up @@ -153,11 +105,6 @@ struct GdalScanGlobalState : ArrowScanGlobalState {
unique_ptr<FunctionData> GdalTableFunction::Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {

auto &config = DBConfig::GetConfig(context);
if (!config.options.enable_external_access) {
throw PermissionException("Scanning GDAL files is disabled through configuration");
}

auto result = make_uniq<GdalScanFunctionData>();

// First scan for "options" parameter
Expand Down Expand Up @@ -470,13 +417,6 @@ unique_ptr<GlobalTableFunctionState> GdalTableFunction::InitGlobal(ClientContext
TryApplySpatialFilter(layer, data.spatial_filter.get());
// TODO: Apply projection pushdown

// Apply predicate pushdown
// We simply create a string out of the predicates and pass it to GDAL.
if (input.filters) {
auto filter_clause = FilterToGdal(*input.filters, input.column_ids, data.all_names);
layer->SetAttributeFilter(filter_clause.c_str());
}

// Create arrow stream from layer

gstate.stream = make_uniq<ArrowArrayStreamWrapper>();
Expand Down Expand Up @@ -678,7 +618,6 @@ void GdalTableFunction::Register(DatabaseInstance &db) {
scan.get_partition_data = ArrowTableFunction::ArrowGetPartitionData;

scan.projection_pushdown = true;
scan.filter_pushdown = true;

scan.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR);
scan.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR);
Expand Down
11 changes: 11 additions & 0 deletions test/sql/gdal/gdal_vsi.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,14 @@ query I
SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz');
----
50


# Disable external access
statement ok
set enable_external_access = false;

# Test read via VSI
statement error
SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz');
----
with VSI prefix: External access is disabled

0 comments on commit 4cda975

Please sign in to comment.