diff --git a/cpp/examples/arrow/filesystem_definition_example.cc b/cpp/examples/arrow/filesystem_definition_example.cc index 612569a641f91..efe1bd10470c0 100644 --- a/cpp/examples/arrow/filesystem_definition_example.cc +++ b/cpp/examples/arrow/filesystem_definition_example.cc @@ -16,10 +16,7 @@ // under the License. #include -<<<<<<< HEAD #include -======= ->>>>>>> e447cfaac (GH-38309: [C++] build filesystems as separate modules) #include #include #include diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index deac04af72f5a..0a31a64b7a3a4 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -28,7 +28,9 @@ add_arrow_test(filesystem-test EXTRA_LABELS filesystem DEFINITIONS - ARROW_FILESYSTEM_EXAMPLE_LIBPATH="$") + ARROW_FILESYSTEM_EXAMPLE_LIBPATH="$" + EXTRA_DEPENDENCIES + arrow_filesystem_example) if(ARROW_BUILD_BENCHMARKS) add_arrow_benchmark(localfs_benchmark diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index b321a13685eca..651646b124cfc 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -270,6 +270,11 @@ Result FileSystem::PathFromUri(const std::string& uri_string) const return Status::NotImplemented("PathFromUri is not yet supported on this filesystem"); } +Result FileSystem::MakeUri(std::string path) const { + return Status::NotImplemented("MakeUri is not yet supported for ", type_name(), + " filesystems"); +} + ////////////////////////////////////////////////////////////////////////// // SubTreeFileSystem implementation @@ -853,14 +858,6 @@ Result> FileSystemFromUriReal(const Uri& uri, } } - if (scheme == "file") { - std::string path; - ARROW_ASSIGN_OR_RAISE(auto options, LocalFileSystemOptions::FromUri(uri, &path)); - if (out_path != nullptr) { - *out_path = path; - } - return std::make_shared(options, io_context); - } if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE ARROW_ASSIGN_OR_RAISE(auto options, AzureOptions::FromUri(uri, out_path)); diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 272e42256a388..1452d7cf0ab16 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -197,6 +197,11 @@ class ARROW_EXPORT FileSystem /// \return The path inside the filesystem that is indicated by the URI. virtual Result PathFromUri(const std::string& uri_string) const; + /// \brief Make a URI from which FileSystemFromUri produces an equivalent filesystem + /// \param path The path component to use in the resulting URI + /// \return A URI string, or an error if an equivalent URI cannot be produced + virtual Result MakeUri(std::string path) const; + virtual bool Equals(const FileSystem& other) const = 0; virtual bool Equals(const std::shared_ptr& other) const { diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc index fbb33fd00868b..10aa67cb339b9 100644 --- a/cpp/src/arrow/filesystem/localfs.cc +++ b/cpp/src/arrow/filesystem/localfs.cc @@ -39,6 +39,7 @@ #include "arrow/io/type_fwd.h" #include "arrow/util/async_generator.h" #include "arrow/util/io_util.h" +#include "arrow/util/string.h" #include "arrow/util/uri.h" #include "arrow/util/windows_fixup.h" @@ -246,8 +247,20 @@ Result LocalFileSystemOptions::FromUri( std::string(internal::RemoveTrailingSlash(uri.path(), /*preserve_root=*/true)); } - // TODO handle use_mmap option - return LocalFileSystemOptions(); + LocalFileSystemOptions options; + ARROW_ASSIGN_OR_RAISE(auto params, uri.query_items()); + for (const auto& [key, value] : params) { + if (key == "use_mmap") { + if (value.empty()) { + options.use_mmap = true; + continue; + } else { + ARROW_ASSIGN_OR_RAISE(options.use_mmap, ::arrow::internal::ParseBoolean(value)); + } + break; + } + } + return options; } LocalFileSystem::LocalFileSystem(const io::IOContext& io_context) @@ -273,6 +286,11 @@ Result LocalFileSystem::PathFromUri(const std::string& uri_string) authority_handling); } +Result LocalFileSystem::MakeUri(std::string path) const { + ARROW_ASSIGN_OR_RAISE(path, DoNormalizePath(std::move(path))); + return "file://" + path + (options_.use_mmap ? "?use_mmap" : ""); +} + bool LocalFileSystem::Equals(const FileSystem& other) const { if (other.type_name() != type_name()) { return false; @@ -686,4 +704,19 @@ Result> LocalFileSystem::OpenAppendStream( return OpenOutputStreamGeneric(path, truncate, append); } +static Result> LocalFileSystemFactory( + const arrow::util::Uri& uri, const io::IOContext& io_context, std::string* out_path) { + std::string path; + ARROW_ASSIGN_OR_RAISE(auto options, LocalFileSystemOptions::FromUri(uri, &path)); + if (out_path != nullptr) { + *out_path = std::move(path); + } + return std::make_shared(options, io_context); +} + +FileSystemRegistrar kLocalFileSystemModule[]{ + {"file", LocalFileSystemFactory}, + {"local", LocalFileSystemFactory}, +}; + } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/localfs.h b/cpp/src/arrow/filesystem/localfs.h index 45a3da317f663..d72e8f7d74d51 100644 --- a/cpp/src/arrow/filesystem/localfs.h +++ b/cpp/src/arrow/filesystem/localfs.h @@ -83,6 +83,7 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem { Result NormalizePath(std::string path) override; Result PathFromUri(const std::string& uri_string) const override; + Result MakeUri(std::string path) const override; bool Equals(const FileSystem& other) const override; diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 0beb20eecf38b..fb927466b7f55 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include -#include #include #include #include @@ -186,86 +184,6 @@ TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { //////////////////////////////////////////////////////////////////////////// // Misc tests -Result> SlowFileSystemFactory(const Uri& uri, - const io::IOContext& io_context, - std::string* out_path) { - auto local_uri = "file" + uri.ToString().substr(uri.scheme().size()); - ARROW_ASSIGN_OR_RAISE(auto base_fs, FileSystemFromUri(local_uri, io_context, out_path)); - double average_latency = 1; - int32_t seed = 0xDEADBEEF; - ARROW_ASSIGN_OR_RAISE(auto params, uri.query_items()); - for (const auto& [key, value] : params) { - if (key == "average_latency") { - average_latency = std::stod(value); - } - if (key == "seed") { - seed = std::stoi(value, nullptr, /*base=*/16); - } - } - return std::make_shared(base_fs, average_latency, seed); -} -FileSystemRegistrar kSlowFileSystemModule{ - "slowfile", - SlowFileSystemFactory, -}; - -TEST(FileSystemFromUri, LinkedRegisteredFactory) { - // Since the registrar's definition is in this translation unit (which is linked to the - // unit test executable), its factory will be registered be loaded automatically before - // main() is entered. - std::string path; - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("slowfile:///hey/yo", &path)); - EXPECT_EQ(path, "/hey/yo"); - EXPECT_EQ(fs->type_name(), "slow"); -} - -TEST(FileSystemFromUri, LoadedRegisteredFactory) { - // Since the registrar's definition is in libarrow_filesystem_example.so, - // its factory will be registered only after the library is dynamically loaded. - std::string path; - EXPECT_THAT(FileSystemFromUri("example:///hey/yo", &path), Raises(StatusCode::Invalid)); - - EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); - - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); - EXPECT_EQ(path, "/hey/yo"); - EXPECT_EQ(fs->type_name(), "local"); -} - -TEST(FileSystemFromUri, RuntimeRegisteredFactory) { - std::string path; - EXPECT_THAT(FileSystemFromUri("slowfile2:///hey/yo", &path), - Raises(StatusCode::Invalid)); - - EXPECT_THAT(RegisterFileSystemFactory("slowfile2", SlowFileSystemFactory), Ok()); - - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("slowfile2:///hey/yo", &path)); - EXPECT_EQ(path, "/hey/yo"); - EXPECT_EQ(fs->type_name(), "slow"); - - EXPECT_THAT( - RegisterFileSystemFactory("slowfile2", SlowFileSystemFactory), - Raises(StatusCode::KeyError, - testing::HasSubstr("Attempted to register factory for scheme 'slowfile2' " - "but that scheme is already registered"))); -} - -FileSystemRegistrar kSegfaultFileSystemModule[]{ - {"segfault", nullptr}, - {"segfault", nullptr}, - {"segfault", nullptr}, -}; -TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { - // Since multiple registrars are defined in this translation unit which all - // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri - // is invalidated and raises KeyError. - std::string path; - EXPECT_THAT(FileSystemFromUri("segfault:///hey/yo", &path), - Raises(StatusCode::KeyError)); - // other schemes are not affected by the collision - EXPECT_THAT(FileSystemFromUri("slowfile:///hey/yo", &path), Ok()); -} - TEST(DetectAbsolutePath, Basics) { ASSERT_TRUE(DetectAbsolutePath("/")); ASSERT_TRUE(DetectAbsolutePath("/foo")); @@ -389,6 +307,7 @@ class TestLocalFS : public LocalFSTestMixin { std::string path; ASSERT_OK_AND_ASSIGN(fs_, fs_from_uri(uri, &path)); ASSERT_EQ(fs_->type_name(), "local"); + local_fs_ = ::arrow::internal::checked_pointer_cast(fs_); ASSERT_EQ(path, expected_path); ASSERT_OK_AND_ASSIGN(path, fs_->PathFromUri(uri)); ASSERT_EQ(path, expected_path); @@ -500,8 +419,15 @@ TYPED_TEST(TestLocalFS, FileSystemFromUriFile) { // Variations this->TestLocalUri("file:/foo/bar", "/foo/bar"); + ASSERT_FALSE(this->local_fs_->options().use_mmap); this->TestLocalUri("file:///foo/bar", "/foo/bar"); this->TestLocalUri("file:///some%20path/%25percent", "/some path/%percent"); + + this->TestLocalUri("file:///_?use_mmap", "/_"); + ASSERT_TRUE(this->local_fs_->options().use_mmap); + ASSERT_OK_AND_ASSIGN(auto uri, this->fs_->MakeUri("/_")); + EXPECT_EQ(uri, "file:///_?use_mmap"); + #ifdef _WIN32 this->TestLocalUri("file:/C:/foo/bar", "C:/foo/bar"); this->TestLocalUri("file:///C:/foo/bar", "C:/foo/bar"); diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index d4186e62df675..d3e7e3b03f6d7 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -16,18 +16,12 @@ // under the License. #include "arrow/filesystem/filesystem.h" -<<<<<<< HEAD #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" #include "arrow/util/uri.h" #include -======= -#include "arrow/result.h" -#include "arrow/util/uri.h" - ->>>>>>> e447cfaac (GH-38309: [C++] build filesystems as separate modules) namespace arrow::fs { FileSystemRegistrar kExampleFileSystemModule{ @@ -35,10 +29,7 @@ FileSystemRegistrar kExampleFileSystemModule{ [](const Uri& uri, const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; -<<<<<<< HEAD EXPECT_EQ(uri.scheme(), kScheme); -======= ->>>>>>> e447cfaac (GH-38309: [C++] build filesystems as separate modules) auto local_uri = "file" + uri.ToString().substr(kScheme.size()); return FileSystemFromUri(local_uri, io_context, out_path); }, diff --git a/python/pyarrow/_fs.pxd b/python/pyarrow/_fs.pxd index 4504b78b837ea..0df75530bbd6e 100644 --- a/python/pyarrow/_fs.pxd +++ b/python/pyarrow/_fs.pxd @@ -67,9 +67,6 @@ cdef class FileSystem(_Weakrefable): cdef class LocalFileSystem(FileSystem): - cdef: - CLocalFileSystem* localfs - cdef init(self, const shared_ptr[CFileSystem]& wrapped) diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 86cf39e993c1b..0cf9ad774ffcd 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -18,7 +18,6 @@ # cython: language_level = 3 from cpython.datetime cimport datetime, PyDateTime_DateTime -from cython cimport binding from pyarrow.includes.common cimport * from pyarrow.includes.libarrow_python cimport PyDateTime_to_TimePoint @@ -421,6 +420,11 @@ cdef class FileSystem(_Weakrefable): "the subclasses instead: LocalFileSystem or " "SubTreeFileSystem") + @staticmethod + def _from_uri(uri): + fs, _path = FileSystem.from_uri(uri) + return fs + @staticmethod def from_uri(uri): """ @@ -1097,30 +1101,17 @@ cdef class LocalFileSystem(FileSystem): def __init__(self, *, use_mmap=False): cdef: - CLocalFileSystemOptions opts - shared_ptr[CLocalFileSystem] fs - - opts = CLocalFileSystemOptions.Defaults() - opts.use_mmap = use_mmap + shared_ptr[CFileSystem] fs + c_string c_uri - fs = make_shared[CLocalFileSystem](opts) + c_uri = tobytes(f"file:///_?use_mmap={int(use_mmap)}") + with nogil: + fs = GetResultValue(CFileSystemFromUri(c_uri)) self.init( fs) - cdef init(self, const shared_ptr[CFileSystem]& c_fs): - FileSystem.init(self, c_fs) - self.localfs = c_fs.get() - - @staticmethod - @binding(True) # Required for cython < 3 - def _reconstruct(kwargs): - # __reduce__ doesn't allow passing named arguments directly to the - # reconstructor, hence this wrapper. - return LocalFileSystem(**kwargs) - def __reduce__(self): - cdef CLocalFileSystemOptions opts = self.localfs.options() - return LocalFileSystem._reconstruct, (dict( - use_mmap=opts.use_mmap),) + uri = frombytes(GetResultValue(self.fs.MakeUri(b"/_"))) + return FileSystem._from_uri, (uri,) cdef class SubTreeFileSystem(FileSystem): diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index 328b426a498db..f1f2985f65394 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -61,6 +61,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: shared_ptr[CFileSystem] shared_from_this() c_string type_name() const CResult[c_string] NormalizePath(c_string path) + CResult[c_string] MakeUri(c_string path) CResult[CFileInfo] GetFileInfo(const c_string& path) CResult[vector[CFileInfo]] GetFileInfo( const vector[c_string]& paths) @@ -84,6 +85,8 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: c_bool Equals(const CFileSystem& other) c_bool Equals(shared_ptr[CFileSystem] other) + CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \ + "arrow::fs::FileSystemFromUri"(const c_string& uri) CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \ "arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path) CResult[shared_ptr[CFileSystem]] CFileSystemFromUriOrPath \ @@ -98,19 +101,6 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: CStatus CFileSystemsInitialize "arrow::fs::Initialize" \ (const CFileSystemGlobalOptions& options) - cdef cppclass CLocalFileSystemOptions "arrow::fs::LocalFileSystemOptions": - c_bool use_mmap - - @staticmethod - CLocalFileSystemOptions Defaults() - - c_bool Equals(const CLocalFileSystemOptions& other) - - cdef cppclass CLocalFileSystem "arrow::fs::LocalFileSystem"(CFileSystem): - CLocalFileSystem() - CLocalFileSystem(CLocalFileSystemOptions) - CLocalFileSystemOptions options() - cdef cppclass CSubTreeFileSystem \ "arrow::fs::SubTreeFileSystem"(CFileSystem): CSubTreeFileSystem(const c_string& base_path, diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index f4ff3ef894532..a043747faf232 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1348,10 +1348,6 @@ fs___FileSystem__type_name <- function(file_system) { .Call(`_arrow_fs___FileSystem__type_name`, file_system) } -fs___LocalFileSystem__create <- function() { - .Call(`_arrow_fs___LocalFileSystem__create`) -} - fs___SubTreeFileSystem__create <- function(base_path, base_fs) { .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs) } diff --git a/r/R/filesystem.R b/r/R/filesystem.R index 0e4484d1b583d..0b0fb6a6a4788 100644 --- a/r/R/filesystem.R +++ b/r/R/filesystem.R @@ -390,7 +390,7 @@ are_urls <- function(x) if (!is.character(x)) FALSE else grepl("://", x) #' @export LocalFileSystem <- R6Class("LocalFileSystem", inherit = FileSystem) LocalFileSystem$create <- function() { - fs___LocalFileSystem__create() + FileSystem$from_uri("file:///_")$fs } #' @usage NULL diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 75e0f27b4002e..8ccc345cc4572 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3464,13 +3464,6 @@ BEGIN_CPP11 END_CPP11 } // filesystem.cpp -std::shared_ptr fs___LocalFileSystem__create(); -extern "C" SEXP _arrow_fs___LocalFileSystem__create(){ -BEGIN_CPP11 - return cpp11::as_sexp(fs___LocalFileSystem__create()); -END_CPP11 -} -// filesystem.cpp std::shared_ptr fs___SubTreeFileSystem__create(const std::string& base_path, const std::shared_ptr& base_fs); extern "C" SEXP _arrow_fs___SubTreeFileSystem__create(SEXP base_path_sexp, SEXP base_fs_sexp){ BEGIN_CPP11 @@ -6005,7 +5998,6 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, - { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp index 23bcb81e8faae..2274a3d7ff7a2 100644 --- a/r/src/filesystem.cpp +++ b/r/src/filesystem.cpp @@ -238,13 +238,6 @@ std::string fs___FileSystem__type_name( return file_system->type_name(); } -// [[arrow::export]] -std::shared_ptr fs___LocalFileSystem__create() { - // Affects OpenInputFile/OpenInputStream - auto io_context = MainRThread::GetInstance().CancellableIOContext(); - return std::make_shared(io_context); -} - // [[arrow::export]] std::shared_ptr fs___SubTreeFileSystem__create( const std::string& base_path, const std::shared_ptr& base_fs) { @@ -268,9 +261,10 @@ cpp11::writable::list fs___FileSystemFromUri(const std::string& path) { using cpp11::literals::operator"" _nm; std::string out_path; - return cpp11::writable::list( - {"fs"_nm = cpp11::to_r6(ValueOrStop(fs::FileSystemFromUri(path, &out_path))), - "path"_nm = out_path}); + auto io_context = MainRThread::GetInstance().CancellableIOContext(); + return cpp11::writable::list({"fs"_nm = cpp11::to_r6(ValueOrStop( + fs::FileSystemFromUri(path, io_context, &out_path))), + "path"_nm = out_path}); } // [[arrow::export]]