Skip to content

Commit

Permalink
apacheGH-38309: [C++] build filesystems as separate modules
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Mar 1, 2024
1 parent 30e6d72 commit f016aa8
Show file tree
Hide file tree
Showing 49 changed files with 985 additions and 271 deletions.
15 changes: 15 additions & 0 deletions cpp/examples/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,18 @@ if(ARROW_GANDIVA)
endif()
add_arrow_example(gandiva_example EXTRA_LINK_LIBS ${GANDIVA_EXAMPLE_LINK_LIBS})
endif()

if(ARROW_FILESYSTEM)
add_library(filesystem_definition_example MODULE filesystem_definition_example.cc)

if(ARROW_BUILD_SHARED)
target_link_libraries(filesystem_definition_example arrow_shared)
else()
target_link_libraries(filesystem_definition_example arrow_static)
endif()

add_arrow_example(filesystem_usage_example)
target_compile_definitions(filesystem-usage-example
PUBLIC LIBPATH="$<TARGET_FILE:filesystem_definition_example>"
)
endif()
150 changes: 150 additions & 0 deletions cpp/examples/arrow/filesystem_definition_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <arrow/filesystem/filesystem.h>
#include <arrow/io/memory.h>
#include <arrow/result.h>
#include <arrow/util/uri.h>

// Demonstrate registering a user-defined Arrow FileSystem outside
// of the Arrow source tree.

using arrow::Result;
using arrow::Status;
namespace io = arrow::io;
namespace fs = arrow::fs;

class ExampleFileSystem : public fs::FileSystem {
public:
explicit ExampleFileSystem(const io::IOContext& io_context)
: fs::FileSystem{io_context} {}

// This is a mock filesystem whose root directory contains a single file.
// All operations which would mutate will simply raise an error.
static constexpr std::string_view kPath = "example_file";
static constexpr std::string_view kContents = "hello world";
static fs::FileInfo info() {
fs::FileInfo info;
info.set_path(std::string{kPath});
info.set_type(fs::FileType::File);
info.set_size(kContents.size());
return info;
}

static Status NotFound(std::string_view path) {
return Status::IOError("Path does not exist '", path, "'");
}

static Status NoMutation() {
return Status::IOError("operations which would mutate are not permitted");
}

Result<std::string> PathFromUri(const std::string& uri_string) const override {
ARROW_ASSIGN_OR_RAISE(auto uri, arrow::util::Uri::FromString(uri_string));
return uri.path();
}

std::string type_name() const override { return "example"; }

bool Equals(const FileSystem& other) const override {
return type_name() == other.type_name();
}

/// \cond FALSE
using FileSystem::CreateDir;
using FileSystem::DeleteDirContents;
using FileSystem::GetFileInfo;
using FileSystem::OpenAppendStream;
using FileSystem::OpenOutputStream;
/// \endcond

Result<fs::FileInfo> GetFileInfo(const std::string& path) override {
if (path == kPath) {
return info();
}
return NotFound(path);
}

Result<std::vector<fs::FileInfo>> GetFileInfo(const fs::FileSelector& select) override {
if (select.base_dir == "/" || select.base_dir == "") {
return std::vector<fs::FileInfo>{info()};
}
if (select.allow_not_found) {
return std::vector<fs::FileInfo>{};
}
return NotFound(select.base_dir);
}

Status CreateDir(const std::string& path, bool recursive) override {
return NoMutation();
}

Status DeleteDir(const std::string& path) override { return NoMutation(); }

Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override {
return NoMutation();
}

Status DeleteRootDirContents() override { return NoMutation(); }

Status DeleteFile(const std::string& path) override { return NoMutation(); }

Status Move(const std::string& src, const std::string& dest) override {
return NoMutation();
}

Status CopyFile(const std::string& src, const std::string& dest) override {
return NoMutation();
}

Result<std::shared_ptr<io::InputStream>> OpenInputStream(
const std::string& path) override {
return OpenInputFile(path);
}

Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
const std::string& path) override {
if (path == kPath) {
return io::BufferReader::FromString(std::string{kContents});
}
return NotFound(path);
}

Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
const std::string& path,
const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override {
return NoMutation();
}

Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override {
return NoMutation();
}
};

fs::FileSystemRegistrar kExampleFileSystemModule{
"example",
[](const arrow::util::Uri& uri, const io::IOContext& io_context,
std::string* out_path) -> Result<std::shared_ptr<fs::FileSystem>> {
auto fs = std::make_shared<ExampleFileSystem>(io_context);
if (out_path) {
ARROW_ASSIGN_OR_RAISE(*out_path, fs->PathFromUri(uri.ToString()));
}
return fs;
},
};
55 changes: 55 additions & 0 deletions cpp/examples/arrow/filesystem_usage_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <iostream>

#include <arrow/filesystem/filesystem.h>
#include <arrow/result.h>

namespace fs = arrow::fs;

// Demonstrate dynamically loading a user-defined Arrow FileSystem

arrow::Status Execute() {
ARROW_RETURN_NOT_OK(arrow::fs::LoadFileSystemFactories(LIBPATH));

std::string uri = "example:///example_file";
std::cout << "Uri: " << uri << std::endl;

std::string path;
ARROW_ASSIGN_OR_RAISE(auto fs, arrow::fs::FileSystemFromUri(uri, &path));
std::cout << "Path: " << path << std::endl;

fs::FileSelector sel;
sel.base_dir = "/";
ARROW_ASSIGN_OR_RAISE(auto infos, fs->GetFileInfo(sel));

std::cout << "Root directory contains:" << std::endl;
for (const auto& info : infos) {
std::cout << "- " << info << std::endl;
}
return arrow::Status::OK();
}

int main() {
auto status = Execute();
if (!status.ok()) {
std::cerr << "Error occurred : " << status.message() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
4 changes: 2 additions & 2 deletions cpp/src/arrow/dataset/partition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ namespace dataset {
namespace {
/// Apply UriUnescape, then ensure the results are valid UTF-8.
Result<std::string> SafeUriUnescape(std::string_view encoded) {
auto decoded = ::arrow::internal::UriUnescape(encoded);
auto decoded = ::arrow::util::UriUnescape(encoded);
if (!util::ValidateUTF8(decoded)) {
return Status::Invalid("Partition segment was not valid UTF-8 after URL decoding: ",
encoded);
Expand Down Expand Up @@ -755,7 +755,7 @@ Result<PartitionPathFormat> HivePartitioning::FormatValues(
// field_index <-> path nesting relation
segments[i] = name + "=" + hive_options_.null_fallback;
} else {
segments[i] = name + "=" + arrow::internal::UriEscape(values[i]->ToString());
segments[i] = name + "=" + arrow::util::UriEscape(values[i]->ToString());
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/dataset/partition_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ TEST_F(TestPartitioning, WriteHiveWithSlashesInValues) {
"experiment/A/f.csv", "experiment/B/f.csv", "experiment/C/k.csv",
"experiment/M/i.csv"};
for (auto partition : unique_partitions) {
encoded_paths.push_back("part=" + arrow::internal::UriEscape(partition));
encoded_paths.push_back("part=" + arrow::util::UriEscape(partition));
}

ASSERT_EQ(all_dirs.size(), encoded_paths.size());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/engine/substrait/relation_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace arrow {
using internal::checked_cast;
using internal::StartsWith;
using internal::ToChars;
using internal::UriFromAbsolutePath;
using util::UriFromAbsolutePath;

namespace engine {

Expand Down Expand Up @@ -463,7 +463,7 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel, const ExtensionSet&
}

// Extract and parse the read relation's source URI
::arrow::internal::Uri item_uri;
::arrow::util::Uri item_uri;
switch (item.path_type_case()) {
case substrait::ReadRel::LocalFiles::FileOrFiles::kUriPath:
RETURN_NOT_OK(item_uri.Parse(item.uri_path()));
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/filesystem/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ add_arrow_test(filesystem-test
EXTRA_LABELS
filesystem)

if(ARROW_BUILD_TESTS)
add_library(arrow_filesystem_example MODULE examplefs.cc)
target_link_libraries(arrow_filesystem_example arrow_${ARROW_TEST_LINKAGE})
target_compile_definitions(arrow-filesystem-test
PUBLIC ARROW_FILESYSTEM_EXAMPLE_LIBPATH="$<TARGET_FILE:arrow_filesystem_example>"
)
endif()

if(ARROW_BUILD_BENCHMARKS)
add_arrow_benchmark(localfs_benchmark
PREFIX
Expand Down
16 changes: 12 additions & 4 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,23 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {

bool Equals(const FileSystem& other) const override;

/// \cond FALSE
using FileSystem::CreateDir;
using FileSystem::DeleteDirContents;
using FileSystem::GetFileInfo;
using FileSystem::OpenAppendStream;
using FileSystem::OpenOutputStream;
/// \endcond

Result<FileInfo> GetFileInfo(const std::string& path) override;

Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;

Status CreateDir(const std::string& path, bool recursive = true) override;
Status CreateDir(const std::string& path, bool recursive) override;

Status DeleteDir(const std::string& path) override;

Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;

Status DeleteRootDirContents() override;

Expand Down Expand Up @@ -246,11 +254,11 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {

Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
const std::shared_ptr<const KeyValueMetadata>& metadata) override;

Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
const std::shared_ptr<const KeyValueMetadata>& metadata) override;
};

} // namespace arrow::fs
34 changes: 34 additions & 0 deletions cpp/src/arrow/filesystem/examplefs.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/filesystem/filesystem.h"
#include "arrow/result.h"
#include "arrow/util/uri.h"

namespace arrow::fs {

FileSystemRegistrar kExampleFileSystemModule{
"example",
[](const Uri& uri, const io::IOContext& io_context,
std::string* out_path) -> Result<std::shared_ptr<FileSystem>> {
constexpr std::string_view kScheme = "example";
auto local_uri = "file" + uri.ToString().substr(kScheme.size());
return FileSystemFromUri(local_uri, io_context, out_path);
},
};

} // namespace arrow::fs
Loading

0 comments on commit f016aa8

Please sign in to comment.