diff --git a/spatial/src/spatial/gdal/file_handler.cpp b/spatial/src/spatial/gdal/file_handler.cpp index 3fe1abf5..9f3a711a 100644 --- a/spatial/src/spatial/gdal/file_handler.cpp +++ b/spatial/src/spatial/gdal/file_handler.cpp @@ -21,6 +21,7 @@ namespace gdal { class DuckDBFileHandle : public VSIVirtualHandle { private: unique_ptr file_handle; + bool is_eof; public: explicit DuckDBFileHandle(unique_ptr file_handle_p) : file_handle(std::move(file_handle_p)) { @@ -30,6 +31,8 @@ class DuckDBFileHandle : public VSIVirtualHandle { return static_cast(file_handle->SeekPosition()); } int Seek(vsi_l_offset nOffset, int nWhence) override { + is_eof = false; + if (nWhence == SEEK_SET && nOffset == 0) { // Use the reset function instead to allow compressed file handles to rewind // even if they don't support seeking @@ -66,11 +69,22 @@ class DuckDBFileHandle : public VSIVirtualHandle { } } catch (...) { } + + if(remaining_bytes != 0) { + if(file_handle->SeekPosition() == file_handle->GetFileSize()) { + // Is at EOF! + is_eof = true; + } + // else, error! + // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes + // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. + } + return nCount - (remaining_bytes / nSize); } int Eof() override { - return file_handle->SeekPosition() == file_handle->GetFileSize() ? TRUE : FALSE; + return is_eof ? TRUE : FALSE; } size_t Write(const void *pBuffer, size_t nSize, size_t nCount) override { @@ -122,6 +136,8 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler { return pszFilename + client_prefix.size(); } + string AddPrefix(const string &value) { return client_prefix + value; } + VSIVirtualHandle *Open(const char *prefixed_file_name, const char *access, bool bSetError, CSLConstList /* papszOptions */) override { auto file_name = StripPrefix(prefixed_file_name); @@ -310,7 +326,8 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler { if (files_count >= max_files) { return; } - files.AddString(file_name.c_str()); + const auto tmp = AddPrefix(file_name); + files.AddString(tmp.c_str()); files_count++; }); return files.StealList(); @@ -321,9 +338,14 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler { auto &fs = FileSystem::GetFileSystem(context); CPLStringList files; - auto file_vector = fs.Glob(file_name); + + auto file_name_without_ext = fs.JoinPath(StringUtil::GetFilePath(file_name), StringUtil::GetFileStem(file_name)); + auto file_glob = file_name_without_ext + ".*"; + + auto file_vector = fs.Glob(file_glob); for (auto &file : file_vector) { - files.AddString(file.c_str()); + auto tmp = AddPrefix(file); + files.AddString(tmp.c_str()); } return files.StealList(); } diff --git a/spatial/src/spatial/gdal/functions/st_read.cpp b/spatial/src/spatial/gdal/functions/st_read.cpp index 177548c3..b730baa4 100644 --- a/spatial/src/spatial/gdal/functions/st_read.cpp +++ b/spatial/src/spatial/gdal/functions/st_read.cpp @@ -176,15 +176,16 @@ unique_ptr GdalTableFunction::Bind(ClientContext &context, TableFu } } + // Now we can open the dataset + auto &ctx_state = GDALClientContextState::GetOrCreate(context); + auto siblings_params = input.named_parameters.find("sibling_files"); if (siblings_params != input.named_parameters.end()) { for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { - result->dataset_sibling_files.AddString(StringValue::Get(param).c_str()); + result->dataset_sibling_files.AddString(ctx_state.GetPrefix(StringValue::Get(param)).c_str()); } } - // Now we can open the dataset - auto &ctx_state = GDALClientContextState::GetOrCreate(context); result->raw_file_name = input.inputs[0].GetValue(); result->prefixed_file_name = ctx_state.GetPrefix(result->raw_file_name); diff --git a/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp b/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp index 210685fd..e01648ff 100644 --- a/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp +++ b/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp @@ -59,7 +59,7 @@ void GeographicLibFunctions::RegisterDistanceWithin(DatabaseInstance &db) { ScalarFunctionSet set("ST_DWithin_Spheroid"); set.AddFunction( ScalarFunction({spatial::core::GeoTypes::POINT_2D(), spatial::core::GeoTypes::POINT_2D(), LogicalType::DOUBLE}, - LogicalType::DOUBLE, GeodesicPoint2DFunction)); + LogicalType::BOOLEAN, GeodesicPoint2DFunction)); ExtensionUtil::RegisterFunction(db, set); DocUtil::AddDocumentation(db, "ST_DWithin_Spheroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); diff --git a/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.cpg b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.cpg new file mode 100644 index 00000000..3ad133c0 --- /dev/null +++ b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.dbf b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.dbf new file mode 100644 index 00000000..943389b0 Binary files /dev/null and b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.dbf differ diff --git a/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.prj b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.prj new file mode 100644 index 00000000..5307c8f5 --- /dev/null +++ b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.prj @@ -0,0 +1 @@ +GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH], AUTHORITY["EPSG","4326"]] \ No newline at end of file diff --git a/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shp b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shp new file mode 100644 index 00000000..646a949d Binary files /dev/null and b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shp differ diff --git a/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shx b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shx new file mode 100644 index 00000000..330702bc Binary files /dev/null and b/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shx differ diff --git a/test/sql/gdal/gdal_shapefile.test b/test/sql/gdal/gdal_shapefile.test new file mode 100644 index 00000000..b89e00cf --- /dev/null +++ b/test/sql/gdal/gdal_shapefile.test @@ -0,0 +1,7 @@ +require spatial + +# This used to fail because our GDAL filesytem wrapper was too aggressive in marking EOF +query I +SELECT COUNT(*) FROM st_read('__WORKING_DIRECTORY__/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shp'); +---- +5