Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only check external access flag on raw GDAL vis handlers, disable filter pushdown in st_read #464

Merged
merged 1 commit into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ EXT_CONFIG=${PROJ_DIR}extension_config.cmake

# Include the Makefile from extension-ci-tools
include extension-ci-tools/makefiles/duckdb_extension.Makefile

#### Override the included format target because we have different source tree layout
format:
find spatial/src -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
find spatial/include -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
cmake-format -i CMakeLists.txt
1 change: 1 addition & 0 deletions spatial/include/spatial/gdal/file_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace gdal {
class DuckDBFileSystemHandler;

class GDALClientContextState : public ClientContextState {
ClientContext &context;
string client_prefix;
DuckDBFileSystemHandler *fs_handler;

Expand Down
5 changes: 0 additions & 5 deletions spatial/src/spatial/core/io/osm/st_read_osm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,6 @@ static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindIn
names.push_back("ref_types");

// Create bind data
auto &config = DBConfig::GetConfig(context);
if (!config.options.enable_external_access) {
throw PermissionException("Scanning OSM files is disabled through configuration");
}

auto file_name = StringValue::Get(input.inputs[0]);
auto result = make_uniq<BindData>(file_name);
return std::move(result);
Expand Down
28 changes: 21 additions & 7 deletions spatial/src/spatial/gdal/file_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class DuckDBFileHandle : public VSIVirtualHandle {
bool is_eof;

public:
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(std::move(file_handle_p)), is_eof(false) {
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p)
: file_handle(std::move(file_handle_p)), is_eof(false) {
}

vsi_l_offset Tell() override {
Expand Down Expand Up @@ -216,16 +217,24 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
return nullptr;
}

// Fall back to GDAL instead
auto handler = VSIFileManager::GetHandler(file_name);
if (handler) {
return handler->Open(file_name, access);
} else {
// Fall back to GDAL instead (if external access is enabled)
if (!context.db->config.options.enable_external_access) {
if (bSetError) {
VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled",
file_name);
}
return nullptr;
}

const auto handler = VSIFileManager::GetHandler(file_name);
if (!handler) {
if (bSetError) {
VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what());
}
return nullptr;
}

return handler->Open(file_name, access);
}
}

Expand Down Expand Up @@ -377,7 +386,7 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
// use their own attached file systems. This is necessary because GDAL is
// not otherwise aware of the connection context.
//
GDALClientContextState::GDALClientContextState(ClientContext &context) {
GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) {

// Create a new random prefix for this client
client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID()));
Expand All @@ -387,6 +396,8 @@ GDALClientContextState::GDALClientContextState(ClientContext &context) {

// Register the file handler
VSIFileManager::InstallHandler(client_prefix, fs_handler);

// Also pass a reference to the client context
}

GDALClientContextState::~GDALClientContextState() {
Expand All @@ -404,6 +415,9 @@ void GDALClientContextState::QueryEnd() {
string GDALClientContextState::GetPrefix(const string &value) const {
// If the user explicitly asked for a VSI prefix, we don't add our own
if (StringUtil::StartsWith(value, "/vsi")) {
if (!context.db->config.options.enable_external_access) {
throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value);
}
return value;
}
return client_prefix + value;
Expand Down
61 changes: 0 additions & 61 deletions spatial/src/spatial/gdal/functions/st_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,54 +61,6 @@ static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter
}
}

// TODO: Verify that this actually corresponds to the same sql subset expected by OGR SQL
static string FilterToGdal(const TableFilter &filter, const string &column_name) {

switch (filter.filter_type) {
case TableFilterType::CONSTANT_COMPARISON: {
auto &constant_filter = filter.Cast<ConstantFilter>();
return KeywordHelper::WriteOptionallyQuoted(column_name) +
ExpressionTypeToOperator(constant_filter.comparison_type) + constant_filter.constant.ToSQLString();
}
case TableFilterType::CONJUNCTION_AND: {
auto &and_filter = filter.Cast<ConjunctionAndFilter>();
vector<string> filters;
for (const auto &child_filter : and_filter.child_filters) {
filters.push_back(FilterToGdal(*child_filter, column_name));
}
return StringUtil::Join(filters, " AND ");
}
case TableFilterType::CONJUNCTION_OR: {
auto &or_filter = filter.Cast<ConjunctionOrFilter>();
vector<string> filters;
for (const auto &child_filter : or_filter.child_filters) {
filters.push_back(FilterToGdal(*child_filter, column_name));
}
return StringUtil::Join(filters, " OR ");
}
case TableFilterType::IS_NOT_NULL: {
return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NOT NULL";
}
case TableFilterType::IS_NULL: {
return KeywordHelper::WriteOptionallyQuoted(column_name) + " IS NULL";
}
default:
throw NotImplementedException("FilterToGdal: filter type not implemented");
}
}

static string FilterToGdal(const TableFilterSet &set, const vector<idx_t> &column_ids,
const vector<string> &column_names) {

vector<string> filters;
for (auto &input_filter : set.filters) {
auto col_idx = column_ids[input_filter.first];
auto &col_name = column_names[col_idx];
filters.push_back(FilterToGdal(*input_filter.second, col_name));
}
return StringUtil::Join(filters, " AND ");
}

struct GdalScanFunctionData : public TableFunctionData {
int layer_idx;
bool sequential_layer_scan = false;
Expand Down Expand Up @@ -153,11 +105,6 @@ struct GdalScanGlobalState : ArrowScanGlobalState {
unique_ptr<FunctionData> GdalTableFunction::Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {

auto &config = DBConfig::GetConfig(context);
if (!config.options.enable_external_access) {
throw PermissionException("Scanning GDAL files is disabled through configuration");
}

auto result = make_uniq<GdalScanFunctionData>();

// First scan for "options" parameter
Expand Down Expand Up @@ -470,13 +417,6 @@ unique_ptr<GlobalTableFunctionState> GdalTableFunction::InitGlobal(ClientContext
TryApplySpatialFilter(layer, data.spatial_filter.get());
// TODO: Apply projection pushdown

// Apply predicate pushdown
// We simply create a string out of the predicates and pass it to GDAL.
if (input.filters) {
auto filter_clause = FilterToGdal(*input.filters, input.column_ids, data.all_names);
layer->SetAttributeFilter(filter_clause.c_str());
}

// Create arrow stream from layer

gstate.stream = make_uniq<ArrowArrayStreamWrapper>();
Expand Down Expand Up @@ -678,7 +618,6 @@ void GdalTableFunction::Register(DatabaseInstance &db) {
scan.get_partition_data = ArrowTableFunction::ArrowGetPartitionData;

scan.projection_pushdown = true;
scan.filter_pushdown = true;

scan.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR);
scan.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR);
Expand Down
11 changes: 11 additions & 0 deletions test/sql/gdal/gdal_vsi.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,14 @@ query I
SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz');
----
50


# Disable external access
statement ok
set enable_external_access = false;

# Test read via VSI
statement error
SELECT COUNT(*) FROM st_read('/vsigzip/__WORKING_DIRECTORY__/test/data/amsterdam_roads_50.geojson.gz');
----
with VSI prefix: External access is disabled
Loading