Skip to content

Commit

Permalink
Revert "Fix dataset-serialize benchmark by setting pre_buffer=False (#…
Browse files Browse the repository at this point in the history
…152)"

This reverts commit d412d2f.
  • Loading branch information
austin3dickey committed Nov 21, 2023
1 parent e3fd86d commit 642e215
Showing 1 changed file with 4 additions and 16 deletions.
20 changes: 4 additions & 16 deletions benchmarks/dataset_serialize_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def run(self, source, case=None, **kwargs):

yield self.benchmark(
f=self._get_benchmark_function(
case, source.name, source_ds, source.format_str, dirpath
case, source.name, source_ds, dirpath
),
extra_tags=tags,
options=kwargs,
Expand All @@ -220,12 +220,7 @@ def run(self, source, case=None, **kwargs):
self._report_dirsize_and_wipe(OUTPUT_DIR_PREFIX)

def _get_benchmark_function(
self,
case,
source_name: str,
source_ds: ds.Dataset,
source_fmt: str,
dirpath: str,
self, case, source_name: str, source_ds: ds.Dataset, dirpath: str
):
(selectivity, serialization_format) = case

Expand All @@ -239,13 +234,6 @@ def _get_benchmark_function(
except KeyError:
pass

# Need this or arrow#38438 will cause a segfault. TODO: remove once fixed.
data_read_kwargs = {}
if source_fmt == "parquet":
data_read_kwargs["fragment_scan_options"] = ds.ParquetFragmentScanOptions(
pre_buffer=False
)

if n_rows_only:
# Pragmatic method for reading only a subset of the data set. A
# different method for sub selection of rows could use a
Expand All @@ -254,10 +242,10 @@ def _get_benchmark_function(
# Note that `head()` returns a `Table` object, i.e. loads data
# into memory.
log.info("read %s rows of dataset %s into memory", n_rows_only, source_name)
data = source_ds.head(n_rows_only, **data_read_kwargs)
data = source_ds.head(n_rows_only)
else:
log.info("read complete dataset %s into memory", source_name)
data = source_ds.to_table(**data_read_kwargs)
data = source_ds.to_table()

log.info("read source dataset into memory in %.4f s", time.monotonic() - t0)

Expand Down

0 comments on commit 642e215

Please sign in to comment.