Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-6629: [Doc] [C++] Add filesystem docs #5487

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/travis_script_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ if [ "$ARROW_TRAVIS_PYTHON_DOCS" == "1" ]; then
doxygen
popd
cd ../docs
sphinx-build -q -b html -d _build/doctrees -W source _build/html
sphinx-build -q -b html -d _build/doctrees -W --keep-going source _build/html
fi

popd # $ARROW_PYTHON_DIR
Expand Down
29 changes: 17 additions & 12 deletions cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,16 @@ using TimePoint =

/// \brief EXPERIMENTAL: FileSystem entry type
enum class ARROW_EXPORT FileType : int8_t {
// Target does not exist
/// Entry does not exist
NonExistent,
// Target exists but its type is unknown (could be a special file such
// as a Unix socket or character device, or Windows NUL / CON / ...)
/// Entry exists but its type is unknown
///
/// This can designate a special file such as a Unix socket or character
/// device, or Windows NUL / CON / ...
Unknown,
// Target is a regular file
/// Entry is a regular file
File,
// Target is a directory
/// Entry is a directory
Directory
};

Expand All @@ -84,26 +86,27 @@ struct ARROW_EXPORT FileStats {
FileStats(const FileStats&) = default;
FileStats& operator=(const FileStats&) = default;

// The file type.
/// The file type
FileType type() const { return type_; }
void set_type(FileType type) { type_ = type; }

// The full file path in the filesystem.
/// The full file path in the filesystem
std::string path() const { return path_; }
void set_path(const std::string& path) { path_ = path; }

// The file base name (component after the last directory separator).
/// The file base name (component after the last directory separator)
std::string base_name() const;

// The size in bytes, if available. Only regular files are guaranteed
// to have a size.
/// The size in bytes, if available
///
/// Only regular files are guaranteed to have a size.
int64_t size() const { return size_; }
void set_size(int64_t size) { size_ = size; }

// The file extension
/// The file extension (excluding the dot)
std::string extension() const;

// The time of last modification, if available.
/// The time of last modification, if available
TimePoint mtime() const { return mtime_; }
void set_mtime(TimePoint mtime) { mtime_ = mtime; }

Expand Down Expand Up @@ -228,7 +231,9 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
std::shared_ptr<FileSystem> base_fs);
~SubTreeFileSystem() override;

/// \cond FALSE
using FileSystem::GetTargetStats;
/// \endcond
Status GetTargetStats(const std::string& path, FileStats* out) override;
Status GetTargetStats(const Selector& select, std::vector<FileStats>* out) override;

Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/filesystem/localfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,18 @@ namespace fs {
/// \brief EXPERIMENTAL: a FileSystem implementation accessing files
/// on the local machine.
///
/// Details such as symlinks are abstracted away (symlinks are always followed,
/// except when deleting an entry).
/// This class handles only `/`-separated paths. If desired, conversion
/// from Windows backslash-separated paths should be done by the caller.
/// Details such as symlinks are abstracted away (symlinks are always
/// followed, except when deleting an entry).
class ARROW_EXPORT LocalFileSystem : public FileSystem {
public:
LocalFileSystem();
~LocalFileSystem() override;

/// \cond FALSE
using FileSystem::GetTargetStats;
/// \endcond
Status GetTargetStats(const std::string& path, FileStats* out) override;
Status GetTargetStats(const Selector& select, std::vector<FileStats>* out) override;

Expand Down
5 changes: 4 additions & 1 deletion cpp/src/arrow/filesystem/s3fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace fs {

extern ARROW_EXPORT const char* kS3DefaultRegion;

/// Options for the S3 FileSystem implementation.
/// Options for the S3FileSystem implementation.
struct ARROW_EXPORT S3Options {
/// AWS region to connect to (default "us-east-1")
std::string region = kS3DefaultRegion;
Expand Down Expand Up @@ -79,7 +79,9 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
public:
~S3FileSystem() override;

/// \cond FALSE
using FileSystem::GetTargetStats;
/// \endcond
Status GetTargetStats(const std::string& path, FileStats* out) override;
Status GetTargetStats(const Selector& select, std::vector<FileStats>* out) override;

Expand Down Expand Up @@ -120,6 +122,7 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
Status OpenAppendStream(const std::string& path,
std::shared_ptr<io::OutputStream>* out) override;

/// Create a S3FileSystem instance from the given options.
static Status Make(const S3Options& options, std::shared_ptr<S3FileSystem>* out);

protected:
Expand Down
23 changes: 15 additions & 8 deletions cpp/src/arrow/io/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Status;

namespace io {

/// \brief An operating system file open in write-only mode.
class ARROW_EXPORT FileOutputStream : public OutputStream {
public:
~FileOutputStream() override;
Expand Down Expand Up @@ -95,8 +96,9 @@ class ARROW_EXPORT FileOutputStream : public OutputStream {

// Write bytes to the stream. Thread-safe
Status Write(const void* data, int64_t nbytes) override;

/// \cond FALSE
using Writable::Write;
/// \endcond

int file_descriptor() const;

Expand All @@ -107,7 +109,11 @@ class ARROW_EXPORT FileOutputStream : public OutputStream {
std::unique_ptr<FileOutputStreamImpl> impl_;
};

// Operating system file
/// \brief An operating system file open in read-only mode.
///
/// Reads through this implementation are unbuffered. If many small reads
/// need to be issued, it is recommended to use a buffering layer for good
/// performance.
class ARROW_EXPORT ReadableFile
: public internal::RandomAccessFileConcurrencyWrapper<ReadableFile> {
public:
Expand Down Expand Up @@ -173,12 +179,13 @@ class ARROW_EXPORT ReadableFile
std::unique_ptr<ReadableFileImpl> impl_;
};

// A file interface that uses memory-mapped files for memory interactions,
// supporting zero copy reads. The same class is used for both reading and
// writing.
//
// If opening a file in a writable mode, it is not truncated first as with
// FileOutputStream
/// \brief A file interface that uses memory-mapped files for memory interactions
///
/// This implementation supports zero-copy reads. The same class is used
/// for both reading and writing.
///
/// If opening a file in a writable mode, it is not truncated first as with
/// FileOutputStream.
class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
public:
~MemoryMappedFile() override;
Expand Down
12 changes: 9 additions & 3 deletions cpp/src/arrow/io/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class Status;

namespace io {

// \brief An output stream that writes to a resizable buffer
/// \brief An output stream that writes to a resizable buffer
class ARROW_EXPORT BufferOutputStream : public OutputStream {
public:
explicit BufferOutputStream(const std::shared_ptr<ResizableBuffer>& buffer);
Expand All @@ -61,7 +61,9 @@ class ARROW_EXPORT BufferOutputStream : public OutputStream {
Status Tell(int64_t* position) const override;
Status Write(const void* data, int64_t nbytes) override;

/// \cond FALSE
using OutputStream::Write;
/// \endcond

/// Close the stream and return the buffer
Status Finish(std::shared_ptr<Buffer>* result);
Expand All @@ -88,7 +90,11 @@ class ARROW_EXPORT BufferOutputStream : public OutputStream {
uint8_t* mutable_data_;
};

// \brief A helper class to tracks the size of allocations
/// \brief A helper class to tracks the size of allocations
///
/// Writes to this stream do not copy or retain any data, they just bump
/// a size counter that can be later used to know exactly which data size
/// needs to be allocated for actual writing.
class ARROW_EXPORT MockOutputStream : public OutputStream {
public:
MockOutputStream() : extent_bytes_written_(0), is_open_(true) {}
Expand All @@ -106,7 +112,7 @@ class ARROW_EXPORT MockOutputStream : public OutputStream {
bool is_open_;
};

/// \brief Enables random writes into a fixed-size mutable buffer
/// \brief An output stream that writes into a fixed-size mutable buffer
class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
public:
/// Input buffer must be mutable, will abort if not
Expand Down
25 changes: 19 additions & 6 deletions cpp/src/arrow/sparse_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ namespace arrow {

struct SparseTensorFormat {
/// EXPERIMENTAL: The index format type of SparseTensor
enum type { COO, CSR };
enum type {
/// Coordinate list (COO) format.
COO,
/// Compressed sparse row (CSR) format.
CSR
};
};

/// \brief EXPERIMENTAL: The base class for the index of a sparse tensor
Expand Down Expand Up @@ -83,10 +88,15 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase<SparseCOOIn
public:
static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::COO;

// Constructor with a column-major NumericTensor
/// \brief Construct SparseCOOIndex from column-major NumericTensor
explicit SparseCOOIndex(const std::shared_ptr<Tensor>& coords);

/// \brief Return a tensor that has the coordinates of the non-zero values
///
/// The returned tensor is a Nx3 tensor where N is the number of non-zero
/// values. Each 3-element column has the form `{row, column, index}`,
/// indicating that the value for the logical element at `{row, column}`
/// should be found at the given physical index.
const std::shared_ptr<Tensor>& indices() const { return coords_; }

/// \brief Return a string representation of the sparse index
Expand Down Expand Up @@ -120,7 +130,7 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase<SparseCSRIn
public:
static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSR;

// Constructor with two index vectors
/// \brief Construct SparseCSRIndex from two index vectors
explicit SparseCSRIndex(const std::shared_ptr<Tensor>& indptr,
const std::shared_ptr<Tensor>& indices);

Expand Down Expand Up @@ -231,20 +241,23 @@ class SparseTensorImpl : public SparseTensor {
public:
virtual ~SparseTensorImpl() = default;

// Constructor with all attributes
/// \brief Construct a sparse tensor from physical data buffer and logical index
SparseTensorImpl(const std::shared_ptr<SparseIndexType>& sparse_index,
const std::shared_ptr<DataType>& type,
const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
const std::vector<std::string>& dim_names)
: SparseTensor(type, data, shape, sparse_index, dim_names) {}

// Constructor for empty sparse tensor
/// \brief Construct an empty sparse tensor
SparseTensorImpl(const std::shared_ptr<DataType>& type,
const std::vector<int64_t>& shape,
const std::vector<std::string>& dim_names = {})
: SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {}

// Constructor with a dense tensor
/// \brief Construct a sparse tensor from a dense tensor
///
/// The dense tensor is re-encoded as a sparse index and a physical
/// data buffer for the non-zero value.
SparseTensorImpl(const Tensor& tensor,
const std::shared_ptr<DataType>& index_value_type)
: SparseTensorImpl(NULLPTR, tensor.type(), NULLPTR, tensor.shape(),
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/util/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace arrow {
class Status;

struct Compression {
/// \brief Compression algorithm
enum type { UNCOMPRESSED, SNAPPY, GZIP, BROTLI, ZSTD, LZ4, LZO, BZ2 };
};

Expand Down Expand Up @@ -96,6 +97,7 @@ class ARROW_EXPORT Decompressor {
// XXX add methods for buffer size heuristics?
};

/// \brief Compression codec
class ARROW_EXPORT Codec {
public:
virtual ~Codec();
Expand All @@ -107,7 +109,10 @@ class ARROW_EXPORT Codec {
/// \brief Return a string name for compression type
static std::string GetCodecAsString(Compression::type t);

/// \brief Create a codec for the given compression algorithm
static Status Create(Compression::type codec, std::unique_ptr<Codec>* out);

/// \brief Create a codec for the given compression algorithm and level
static Status Create(Compression::type codec, int compression_level,
std::unique_ptr<Codec>* out);

Expand Down
3 changes: 3 additions & 0 deletions docs/source/cpp/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ API Reference
api/array
api/builder
api/table
api/tensor
api/utilities
api/io
api/cuda
api/flight
api/filesystem
46 changes: 46 additions & 0 deletions docs/source/cpp/api/filesystem.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
.. Licensed to the Apache Software Foundation (ASF) under one
.. or more contributor license agreements. See the NOTICE file
.. distributed with this work for additional information
.. regarding copyright ownership. The ASF licenses this file
.. to you under the Apache License, Version 2.0 (the
.. "License"); you may not use this file except in compliance
.. with the License. You may obtain a copy of the License at

.. http://www.apache.org/licenses/LICENSE-2.0

.. Unless required by applicable law or agreed to in writing,
.. software distributed under the License is distributed on an
.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
.. KIND, either express or implied. See the License for the
.. specific language governing permissions and limitations
.. under the License.

===========
Filesystems
===========

Interface
=========

.. doxygenenum:: arrow::fs::FileType

.. doxygenstruct:: arrow::fs::FileStats
:members:

.. doxygenclass:: arrow::fs::FileSystem
:members:

Concrete implementations
========================

.. doxygenclass:: arrow::fs::SubTreeFileSystem
:members:

.. doxygenclass:: arrow::fs::LocalFileSystem
:members:

.. doxygenstruct:: arrow::fs::S3Options
:members:

.. doxygenclass:: arrow::fs::S3FileSystem
:members:
8 changes: 4 additions & 4 deletions docs/source/cpp/api/flight.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
Arrow Flight RPC
================

.. warning:: Flight is currently unstable. APIs are subject to change,
though we don't expect drastic changes.
.. note:: Flight is currently unstable. APIs are subject to change,
though we don't expect drastic changes.

.. warning:: Flight is currently only available when built from source
appropriately.
.. note:: Flight is currently only available when built from source
appropriately.

Common Types
============
Expand Down
Loading