Skip to content

Commit

Permalink
Remove not-currently-needed enum, add comment about an example partit…
Browse files Browse the repository at this point in the history
…ion structure
  • Loading branch information
wesm committed Jun 12, 2019
1 parent 68712f8 commit 2f6440a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
9 changes: 2 additions & 7 deletions cpp/src/arrow/dataset/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#pragma once

#include <memory>
#include <string>
#include <vector>

#include "arrow/dataset/type_fwd.h"
Expand Down Expand Up @@ -54,15 +55,9 @@ struct DataSelector {
/// DataFragments
class ARROW_DS_EXPORT DataSource {
public:
enum Type {
SIMPLE, // Flat collection
PARTITIONED, // Partitioned collection
GENERIC // All others
};

virtual ~DataSource() = default;

virtual Type type() const = 0;
virtual std::string type() const = 0;

virtual std::unique_ptr<DataFragmentIterator> GetFragments(
const DataSelector& selector) = 0;
Expand Down
30 changes: 28 additions & 2 deletions cpp/src/arrow/dataset/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,35 @@ class ARROW_DS_EXPORT HivePartitionScheme : public PartitionScheme {
// ----------------------------------------------------------------------
//

// Partitioned datasets come in different forms. Here is an example of
// a Hive-style partitioned dataset:
//
// dataset_root/
// key1=$k1_v1/
// key2=$k2_v1/
// 0.parquet
// 1.parquet
// 2.parquet
// 3.parquet
// key2=$k2_v2/
// 0.parquet
// 1.parquet
// key1=$k1_v2/
// key2=$k2_v1/
// 0.parquet
// 1.parquet
// key2=$k2_v2/
// 0.parquet
// 1.parquet
// 2.parquet
//
// In this case, the dataset has 11 fragments (11 files) to be
// scanned, or potentially more if it is configured to split Parquet
// files at the row group level

class ARROW_DS_EXPORT Partition : public DataSource {
public:
DataSource::Type type() const override;
std::string type() const override;

/// \brief The key for this partition source, may be nullptr,
/// e.g. for the top-level partitioned source container
Expand All @@ -129,7 +155,7 @@ class ARROW_DS_EXPORT Partition : public DataSource {
const Selector& selector) = 0;
};

/// \brief Container for a dataset partition, which consists of a
/// \brief Simple implementation of Partition, which consists of a
/// partition identifier, subpartitions, and some data fragments
class ARROW_DS_EXPORT SimplePartition : public Partition {
public:
Expand Down

0 comments on commit 2f6440a

Please sign in to comment.