Skip to content

Commit

Permalink
Always prune columns if schema is available
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia committed Oct 28, 2024
1 parent 6e978cc commit a28a32a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 24 deletions.
17 changes: 0 additions & 17 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,6 @@ private static native long readJSON(int[] numChildren, String[] columnNames,
boolean allowLeadingZeros,
boolean allowNonNumericNumbers,
boolean allowUnquotedControl,
boolean pruneColumns,
boolean experimental,
byte lineDelimiter) throws CudfException;

Expand All @@ -275,7 +274,6 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co
boolean allowLeadingZeros,
boolean allowNonNumericNumbers,
boolean allowUnquotedControl,
boolean pruneColumns,
boolean experimental,
byte lineDelimiter,
long dsHandle) throws CudfException;
Expand Down Expand Up @@ -1100,10 +1098,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer) {
* @return the file parsed as a table on the GPU.
*/
public static Table readJSON(Schema schema, JSONOptions opts, File path) {
// only prune the schema if one is provided
boolean cudfPruneSchema = schema.getColumnNames() != null &&
schema.getColumnNames().length != 0 &&
opts.shouldCudfPruneSchema();
try (TableWithMeta twm = new TableWithMeta(
readJSON(schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(),
schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(),
Expand All @@ -1118,7 +1112,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars(),
cudfPruneSchema,
opts.experimental(),
opts.getLineDelimiter()))) {

Expand Down Expand Up @@ -1278,10 +1271,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
assert len > 0;
assert len <= buffer.length - offset;
assert offset >= 0 && offset < buffer.length;
// only prune the schema if one is provided
boolean cudfPruneSchema = schema.getColumnNames() != null &&
schema.getColumnNames().length != 0 &&
opts.shouldCudfPruneSchema();
try (TableWithMeta twm = new TableWithMeta(readJSON(
schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(),
schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(), null,
Expand All @@ -1297,7 +1286,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars(),
cudfPruneSchema,
opts.experimental(),
opts.getLineDelimiter()))) {
return twm.releaseTable();
Expand Down Expand Up @@ -1330,10 +1318,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
@SuppressWarnings("unused")
public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) {
long dsHandle = DataSourceHelper.createWrapperDataSource(ds);
// only prune the schema if one is provided
boolean cudfPruneSchema = schema.getColumnNames() != null &&
schema.getColumnNames().length != 0 &&
opts.shouldCudfPruneSchema();
try (TableWithMeta twm = new TableWithMeta(readJSONFromDataSource(schema.getFlattenedNumChildren(),
schema.getFlattenedColumnNames(), schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(),
opts.isDayFirst(),
Expand All @@ -1347,7 +1331,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
opts.leadingZerosAllowed(),
opts.nonNumericNumbersAllowed(),
opts.unquotedControlChars(),
cudfPruneSchema,
opts.experimental(),
opts.getLineDelimiter(),
dsHandle))) {
Expand Down
14 changes: 7 additions & 7 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1826,7 +1826,6 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
jboolean allow_leading_zeros,
jboolean allow_nonnumeric_numbers,
jboolean allow_unquoted_control,
jboolean prune_columns,
jboolean experimental,
jbyte line_delimiter,
jlong ds_handle)
Expand Down Expand Up @@ -1855,6 +1854,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
cudf::io::json_recovery_mode_t recovery_mode =
recover_with_null ? cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL
: cudf::io::json_recovery_mode_t::FAIL;

cudf::io::json_reader_options_builder opts =
cudf::io::json_reader_options::builder(source)
.dayfirst(static_cast<bool>(day_first))
Expand All @@ -1866,7 +1866,6 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
.delimiter(static_cast<char>(line_delimiter))
.strict_validation(strict_validation)
.keep_quotes(keep_quotes)
.prune_columns(prune_columns)
.experimental(experimental);
if (strict_validation) {
opts.numeric_leading_zeros(allow_leading_zeros)
Expand Down Expand Up @@ -1896,10 +1895,11 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
name_order.push_back(name);
}

auto const prune_columns = data_types.size() != 0;
cudf::io::schema_element structs{
cudf::data_type{cudf::type_id::STRUCT}, std::move(data_types), {std::move(name_order)}};
opts.dtypes(structs);
opts.prune_columns(prune_columns).dtypes(structs);

} else {
// should infer the types
}
Expand Down Expand Up @@ -1932,7 +1932,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
jboolean allow_leading_zeros,
jboolean allow_nonnumeric_numbers,
jboolean allow_unquoted_control,
jboolean prune_columns,
jboolean experimental,
jbyte line_delimiter)
{
Expand Down Expand Up @@ -1975,6 +1974,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
cudf::io::json_recovery_mode_t recovery_mode =
recover_with_null ? cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL
: cudf::io::json_recovery_mode_t::FAIL;

cudf::io::json_reader_options_builder opts =
cudf::io::json_reader_options::builder(source)
.dayfirst(static_cast<bool>(day_first))
Expand All @@ -1986,7 +1986,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
.delimiter(static_cast<char>(line_delimiter))
.strict_validation(strict_validation)
.keep_quotes(keep_quotes)
.prune_columns(prune_columns)
.experimental(experimental);
if (strict_validation) {
opts.numeric_leading_zeros(allow_leading_zeros)
Expand Down Expand Up @@ -2017,9 +2016,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
name_order.emplace_back(std::move(name));
}
auto const prune_columns = data_types.size() != 0;
cudf::io::schema_element structs{
cudf::data_type{cudf::type_id::STRUCT}, std::move(data_types), {std::move(name_order)}};
opts.dtypes(structs);
opts.prune_columns(prune_columns).dtypes(structs);
} else {
// should infer the types
}
Expand Down

0 comments on commit a28a32a

Please sign in to comment.