Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-37799: [C++] Compute: CommonTemporal support time32 and time64 casting #37949

Merged
merged 4 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 39 additions & 9 deletions cpp/src/arrow/compute/kernels/codegen_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
bool saw_date32 = false;
bool saw_date64 = false;
bool saw_duration = false;
bool saw_time32 = false;
bool saw_time64 = false;
mapleFU marked this conversation as resolved.
Show resolved Hide resolved
const TypeHolder* end = begin + count;
for (auto it = begin; it != end; it++) {
auto id = it->type->id();
Expand All @@ -271,6 +273,18 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
finest_unit = std::max(finest_unit, ty.unit());
continue;
}
case Type::TIME32: {
const auto& type = checked_cast<const Time32Type&>(*it->type);
finest_unit = std::max(finest_unit, type.unit());
saw_time32 = true;
continue;
}
case Type::TIME64: {
const auto& type = checked_cast<const Time64Type&>(*it->type);
finest_unit = std::max(finest_unit, type.unit());
saw_time64 = true;
continue;
}
case Type::DURATION: {
const auto& ty = checked_cast<const DurationType&>(*it->type);
finest_unit = std::max(finest_unit, ty.unit());
Expand All @@ -282,15 +296,31 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
}
}

if (timezone) {
// At least one timestamp seen
return timestamp(finest_unit, *timezone);
} else if (saw_date64) {
return date64();
} else if (saw_date32) {
return date32();
} else if (saw_duration) {
return duration(finest_unit);
bool has_saw_time_since_midnight = saw_time32 || saw_time64;
bool has_saw_time_or_date = timezone || saw_date64 || saw_date32 || saw_duration;
mapleFU marked this conversation as resolved.
Show resolved Hide resolved

if (has_saw_time_since_midnight && has_saw_time_or_date) {
bkietz marked this conversation as resolved.
Show resolved Hide resolved
// Cannot find common type
return TypeHolder(nullptr);
}
if (has_saw_time_or_date) {
if (timezone) {
// At least one timestamp seen
return timestamp(finest_unit, *timezone);
} else if (saw_date64) {
return date64();
} else if (saw_date32) {
return date32();
} else if (saw_duration) {
return duration(finest_unit);
}
}
if (has_saw_time_since_midnight) {
if (saw_time64) {
return time64(finest_unit);
} else if (saw_time32) {
return time32(finest_unit);
}
}
return TypeHolder(nullptr);
}
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/compute/kernels/codegen_internal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ TEST(TestDispatchBest, CommonTemporal) {
args = {timestamp(TimeUnit::SECOND, "America/Phoenix"),
timestamp(TimeUnit::SECOND, "UTC")};
ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);

bkietz marked this conversation as resolved.
Show resolved Hide resolved
args = {time32(TimeUnit::SECOND), time32(TimeUnit::MILLI)};
AssertTypeEqual(*time32(TimeUnit::MILLI), *CommonTemporal(args.data(), args.size()));

args = {time32(TimeUnit::SECOND), time64(TimeUnit::NANO)};
AssertTypeEqual(*time64(TimeUnit::NANO), *CommonTemporal(args.data(), args.size()));
}

TEST(TestDispatchBest, CommonTemporalResolution) {
Expand Down
27 changes: 27 additions & 0 deletions cpp/src/arrow/dataset/file_parquet_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,33 @@ TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragmentsUsingDuratio
CountRowGroupsInFragment(fragment, {0}, expr);
}

TEST_P(TestParquetFileFormatScan,
PredicatePushdownRowGroupFragmentsUsingTimestampColumn) {
// GH-37799: Parquet arrow will change TimeUnit::SECOND to TimeUnit::MILLI
// because parquet LogicalType don't support SECOND.
mapleFU marked this conversation as resolved.
Show resolved Hide resolved
for (auto time_unit : {TimeUnit::MILLI, TimeUnit::SECOND}) {
auto table = TableFromJSON(schema({field("t", time32(time_unit))}),
{
R"([{"t": 1}])",
R"([{"t": 2}, {"t": 3}])",
});
TableBatchReader table_reader(*table);
SCOPED_TRACE(
"TestParquetFileFormatScan."
"PredicatePushdownRowGroupFragmentsUsingTimestampColumn");
mapleFU marked this conversation as resolved.
Show resolved Hide resolved
ASSERT_OK_AND_ASSIGN(
auto buffer,
ParquetFormatHelper::Write(
&table_reader, ArrowWriterProperties::Builder().store_schema()->build()));
mapleFU marked this conversation as resolved.
Show resolved Hide resolved
auto source = std::make_shared<FileSource>(buffer);
SetSchema({field("t", time32(time_unit))});
ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));

auto expr = equal(field_ref("t"), literal(::arrow::Time32Scalar(1, time_unit)));
CountRowGroupsInFragment(fragment, {0}, expr);
}
}

// Tests projection with nested/indexed FieldRefs.
// https://github.com/apache/arrow/issues/35579
TEST_P(TestParquetFileFormatScan, ProjectWithNonNamedFieldRefs) {
Expand Down