From 456c5d26031203e9efa134fc76f41aba4a5c4199 Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Wed, 19 Dec 2018 19:42:58 +0000 Subject: [PATCH] read parquet files --- r/R/read_parquet.R | 10 ++++++++++ r/src/parquetfilereader.cpp | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 r/R/read_parquet.R create mode 100644 r/src/parquetfilereader.cpp diff --git a/r/R/read_parquet.R b/r/R/read_parquet.R new file mode 100644 index 0000000000000..b6f6fb59f16e0 --- /dev/null +++ b/r/R/read_parquet.R @@ -0,0 +1,10 @@ +#' Read parquet file from disk +#' +#' @param files a vector of filenames +#' @export +read_parquet = function(files) { + tables = lapply(files, function(f) { + return (as_tibble(shared_ptr(`arrow::Table`, read_parquet_file(f)))) + }) + do.call('rbind', tables) +} diff --git a/r/src/parquetfilereader.cpp b/r/src/parquetfilereader.cpp new file mode 100644 index 0000000000000..05b1252eb6358 --- /dev/null +++ b/r/src/parquetfilereader.cpp @@ -0,0 +1,38 @@ +// // Licensed to the Apache Software Foundation (ASF) under one +// // or more contributor license agreements. See the NOTICE file +// // distributed with this work for additional information +// // regarding copyright ownership. The ASF licenses this file +// // to you under the Apache License, Version 2.0 (the +// // "License"); you may not use this file except in compliance +// // with the License. You may obtain a copy of the License at +// // +// // http://www.apache.org/licenses/LICENSE-2.0 +// // +// // Unless required by applicable law or agreed to in writing, +// // software distributed under the License is distributed on an +// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// // KIND, either express or implied. See the License for the +// // specific language governing permissions and limitations +// // under the License. +// +// +#include +#include +#include +#include +#include + +// [[Rcpp::export]] +std::shared_ptr read_parquet_file(std::string filename) { + std::shared_ptr infile; + PARQUET_THROW_NOT_OK(arrow::io::ReadableFile::Open( + filename, arrow::default_memory_pool(), &infile)); + + std::unique_ptr reader; + PARQUET_THROW_NOT_OK( + parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + std::shared_ptr table; + PARQUET_THROW_NOT_OK(reader->ReadTable(&table)); + + return table; +}