Skip to content

Commit

Permalink
tsuba: add an interface to read number of rows
Browse files Browse the repository at this point in the history
Call should be cheap because it just looks at the metadata.

Signed-off-by: Tyler Hunt <[email protected]>
  • Loading branch information
Tyler Hunt authored and tylershunt committed Apr 16, 2021
1 parent e5cdbf5 commit 44ef570
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
4 changes: 4 additions & 0 deletions libtsuba/include/tsuba/ParquetReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class KATANA_EXPORT ParquetReader {
/// \param uri an identifier for a parquet file
katana::Result<int32_t> NumColumns(const katana::Uri& uri);

/// Get the number of rows for the table stored in a parquet file
/// \param uri an identifier for a parquet file
katana::Result<int64_t> NumRows(const katana::Uri& uri);

private:
ParquetReader(std::optional<Slice> slice, bool make_cannonical)
: slice_(slice), make_cannonical_{make_cannonical} {}
Expand Down
16 changes: 11 additions & 5 deletions libtsuba/src/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,19 @@ tsuba::ParquetReader::NumColumns(const katana::Uri& uri) {
}
std::unique_ptr<parquet::arrow::FileReader> reader(
std::move(reader_res.value()));
return reader->parquet_reader()->metadata()->num_columns();
}

std::shared_ptr<arrow::Schema> schema;
auto status = reader->GetSchema(&schema);
if (!status.ok()) {
return KATANA_ERROR(ErrorCode::ArrowError, "reading schema: {}", status);
Result<int64_t>
tsuba::ParquetReader::NumRows(const katana::Uri& uri) {
auto reader_res = MakeFileReader(uri, 0, 0);
if (!reader_res) {
return reader_res.error();
}
return schema->num_fields();
std::unique_ptr<parquet::arrow::FileReader> reader(
std::move(reader_res.value()));

return reader->parquet_reader()->metadata()->num_rows();
}

Result<std::shared_ptr<arrow::Table>>
Expand Down

0 comments on commit 44ef570

Please sign in to comment.