From 4d9a9e0bebe3c76c6eb3df3c96a6eef915790af7 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiat@nvidia.com>
Date: Thu, 24 Oct 2024 11:41:59 -0700
Subject: [PATCH 1/4] Remove `gatherJSONColumns`

---
 java/src/main/java/ai/rapids/cudf/Table.java | 279 +------------------
 java/src/main/native/src/TableJni.cpp        |  24 +-
 2 files changed, 23 insertions(+), 280 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index dbee53640aa..6bc3082d1d3 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -1092,224 +1092,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer) {
     return readJSON(schema, opts, buffer, 0, buffer.length);
   }
 
-  private static class DidViewChange {
-    ColumnVector changeWasNeeded = null;
-    boolean noChangeNeeded = false;
-
-    public static DidViewChange yes(ColumnVector cv) {
-      DidViewChange ret = new DidViewChange();
-      ret.changeWasNeeded = cv;
-      return ret;
-    }
-
-    public static DidViewChange no() {
-      DidViewChange ret = new DidViewChange();
-      ret.noChangeNeeded = true;
-      return ret;
-    }
-  }
-
-  private static DidViewChange gatherJSONColumns(Schema schema, TableWithMeta.NestedChildren children,
-                                                 ColumnView cv) {
-    // We need to do this recursively to be sure it all matches as expected.
-    // If we run into problems where the data types don't match, we are not
-    // going to fix up the data types. We are only going to reorder the columns.
-    if (schema.getType() == DType.STRUCT) {
-      if (cv.getType() != DType.STRUCT) {
-        // The types don't match so just return the input unchanged...
-        return DidViewChange.no();
-      } else {
-        String[] foundNames;
-        if (children == null) {
-          foundNames = new String[0];
-        } else {
-          foundNames = children.getNames();
-        }
-        HashMap<String, Integer> indices = new HashMap<>();
-        for (int i = 0; i < foundNames.length; i++) {
-          indices.put(foundNames[i], i);
-        }
-        // We might need to rearrange the columns to match what we want.
-        DType[] types = schema.getChildTypes();
-        String[] neededNames = schema.getColumnNames();
-        ColumnView[] columns = new ColumnView[neededNames.length];
-        try {
-          boolean somethingChanged = false;
-          if (columns.length != foundNames.length) {
-            somethingChanged = true;
-          }
-          for (int i = 0; i < columns.length; i++) {
-            String neededColumnName = neededNames[i];
-            Integer index = indices.get(neededColumnName);
-            Schema childSchema = schema.getChild(i);
-            if (index != null) {
-              if (childSchema.isStructOrHasStructDescendant()) {
-                ColumnView child = cv.getChildColumnView(index);
-                boolean shouldCloseChild = true;
-                try {
-                  if (index != i) {
-                    somethingChanged = true;
-                  }
-                  DidViewChange childResult = gatherJSONColumns(schema.getChild(i),
-                      children.getChild(index), child);
-                  if (childResult.noChangeNeeded) {
-                    shouldCloseChild = false;
-                    columns[i] = child;
-                  } else {
-                    somethingChanged = true;
-                    columns[i] = childResult.changeWasNeeded;
-                  }
-                } finally {
-                  if (shouldCloseChild) {
-                    child.close();
-                  }
-                }
-              } else {
-                if (index != i) {
-                  somethingChanged = true;
-                }
-                columns[i] = cv.getChildColumnView(index);
-              }
-            } else {
-              somethingChanged = true;
-              if (types[i] == DType.LIST) {
-                try (Scalar s = Scalar.listFromNull(childSchema.getChild(0).asHostDataType())) {
-                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
-                }
-              } else if (types[i] == DType.STRUCT) {
-                int numStructChildren = childSchema.getNumChildren();
-                HostColumnVector.DataType[] structChildren = new HostColumnVector.DataType[numStructChildren];
-                for (int structChildIndex = 0; structChildIndex < numStructChildren; structChildIndex++) {
-                  structChildren[structChildIndex] = childSchema.getChild(structChildIndex).asHostDataType();
-                }
-                try (Scalar s = Scalar.structFromNull(structChildren)) {
-                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
-                }
-              } else {
-                try (Scalar s = Scalar.fromNull(types[i])) {
-                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
-                }
-              }
-            }
-          }
-          if (somethingChanged) {
-            try (ColumnView ret = new ColumnView(cv.type, cv.rows, Optional.of(cv.nullCount),
-                cv.getValid(), null, columns)) {
-              return DidViewChange.yes(ret.copyToColumnVector());
-            }
-          } else {
-            return DidViewChange.no();
-          }
-        } finally {
-          for (ColumnView c: columns) {
-            if (c != null) {
-              c.close();
-            }
-          }
-        }
-      }
-    } else if (schema.getType() == DType.LIST && cv.getType() == DType.LIST) {
-      if (schema.isStructOrHasStructDescendant()) {
-        String [] childNames = children.getNames();
-        if (childNames.length == 2 &&
-            "offsets".equals(childNames[0]) &&
-            "element".equals(childNames[1])) {
-          try (ColumnView child = cv.getChildColumnView(0)){
-            DidViewChange listResult = gatherJSONColumns(schema.getChild(0),
-                children.getChild(1), child);
-            if (listResult.noChangeNeeded) {
-              return DidViewChange.no();
-            } else {
-              try (ColumnView listView = new ColumnView(cv.type, cv.rows,
-                  Optional.of(cv.nullCount), cv.getValid(), cv.getOffsets(),
-                  new ColumnView[]{listResult.changeWasNeeded})) {
-                return DidViewChange.yes(listView.copyToColumnVector());
-              } finally {
-                listResult.changeWasNeeded.close();
-              }
-            }
-          }
-        }
-      }
-      // Nothing to change so just return the input, but we need to inc a ref count to really
-      // make it work, so for now we are going to turn it into a ColumnVector.
-      return DidViewChange.no();
-    } else {
-      // Nothing to change so just return the input, but we need to inc a ref count to really
-      // make it work, so for now we are going to turn it into a ColumnVector.
-      return DidViewChange.no();
-    }
-  }
-
-  private static Table gatherJSONColumns(Schema schema, TableWithMeta twm, int emptyRowCount) {
-    String[] neededColumns = schema.getColumnNames();
-    if (neededColumns == null || neededColumns.length == 0) {
-      return twm.releaseTable();
-    } else {
-      String[] foundNames = twm.getColumnNames();
-      HashMap<String, Integer> indices = new HashMap<>();
-      for (int i = 0; i < foundNames.length; i++) {
-        indices.put(foundNames[i], i);
-      }
-      // We might need to rearrange the columns to match what we want.
-      DType[] types = schema.getChildTypes();
-      ColumnVector[] columns = new ColumnVector[neededColumns.length];
-      try (Table tbl = twm.releaseTable()) {
-        int rowCount = tbl == null ? emptyRowCount : (int)tbl.getRowCount();
-        if (rowCount < 0) {
-          throw new IllegalStateException(
-              "No empty row count provided and the table read has no row count or columns");
-        }
-        for (int i = 0; i < columns.length; i++) {
-          String neededColumnName = neededColumns[i];
-          Integer index = indices.get(neededColumnName);
-          if (index != null) {
-            if (schema.getChild(i).isStructOrHasStructDescendant()) {
-              DidViewChange gathered = gatherJSONColumns(schema.getChild(i), twm.getChild(index),
-                  tbl.getColumn(index));
-              if (gathered.noChangeNeeded) {
-                columns[i] = tbl.getColumn(index).incRefCount();
-              } else {
-                columns[i] = gathered.changeWasNeeded;
-              }
-            } else {
-              columns[i] = tbl.getColumn(index).incRefCount();
-            }
-          } else {
-            if (types[i] == DType.LIST) {
-              Schema listSchema = schema.getChild(i);
-              Schema elementSchema = listSchema.getChild(0);
-              try (Scalar s = Scalar.listFromNull(elementSchema.asHostDataType())) {
-                columns[i] = ColumnVector.fromScalar(s, rowCount);
-              }
-            } else if (types[i] == DType.STRUCT) {
-              Schema structSchema = schema.getChild(i);
-              int numStructChildren = structSchema.getNumChildren();
-              DataType[] structChildrenTypes = new DataType[numStructChildren];
-              for (int j = 0; j < numStructChildren; j++) {
-                structChildrenTypes[j] = structSchema.getChild(j).asHostDataType();
-              }
-              try (Scalar s = Scalar.structFromNull(structChildrenTypes)) {
-                columns[i] = ColumnVector.fromScalar(s, rowCount);
-              }
-            } else {
-              try (Scalar s = Scalar.fromNull(types[i])) {
-                columns[i] = ColumnVector.fromScalar(s, rowCount);
-              }
-            }
-          }
-        }
-        return new Table(columns);
-      } finally {
-        for (ColumnVector c: columns) {
-          if (c != null) {
-            c.close();
-          }
-        }
-      }
-    }
-  }
-
   /**
    * Read a JSON file.
    * @param schema the schema of the file.  You may use Schema.INFERRED to infer the schema.
@@ -1339,8 +1121,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     cudfPruneSchema,
                     opts.experimental(),
                     opts.getLineDelimiter()))) {
-
-      return gatherJSONColumns(schema, twm, -1);
+      return twm.releaseTable();
     }
   }
 
@@ -1356,23 +1137,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
    */
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
                                long len, HostMemoryAllocator hostMemoryAllocator) {
-    return readJSON(schema, opts, buffer, offset, len, hostMemoryAllocator, -1);
-  }
-
-  /**
-   * Read JSON formatted data.
-   * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
-   * @param opts various JSON parsing options.
-   * @param buffer raw UTF8 formatted bytes.
-   * @param offset the starting offset into buffer.
-   * @param len the number of bytes to parse.
-   * @param hostMemoryAllocator allocator for host memory buffers
-   * @param emptyRowCount the number of rows to return if no columns were read.
-   * @return the data parsed as a table on the GPU.
-   */
-  public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
-                               long len, HostMemoryAllocator hostMemoryAllocator,
-                               int emptyRowCount) {
     if (len <= 0) {
       len = buffer.length - offset;
     }
@@ -1381,16 +1145,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, lon
     assert offset >= 0 && offset < buffer.length;
     try (HostMemoryBuffer newBuf = hostMemoryAllocator.allocate(len)) {
       newBuf.setBytes(0, buffer, offset, len);
-      return readJSON(schema, opts, newBuf, 0, len, emptyRowCount);
+      return readJSON(schema, opts, newBuf, 0, len);
     }
   }
 
-  public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
-                               long len, int emptyRowCount) {
-    return readJSON(schema, opts, buffer, offset, len, DefaultHostMemoryAllocator.get(),
-        emptyRowCount);
-  }
-
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
                                long len) {
     return readJSON(schema, opts, buffer, offset, len, DefaultHostMemoryAllocator.get());
@@ -1464,22 +1222,7 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) {
    * @return the data parsed as a table on the GPU.
    */
   public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer buffer,
-                              long offset, long len) {
-    return readJSON(schema, opts, buffer, offset, len, -1);
-  }
-
-  /**
-   * Read JSON formatted data.
-   * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
-   * @param opts various JSON parsing options.
-   * @param buffer raw UTF8 formatted bytes.
-   * @param offset the starting offset into buffer.
-   * @param len the number of bytes to parse.
-   * @param emptyRowCount the number of rows to use if no columns were found.
-   * @return the data parsed as a table on the GPU.
-   */
-  public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer buffer,
-                               long offset, long len, int emptyRowCount) {
+                               long offset, long len) {
     if (len <= 0) {
       len = buffer.length - offset;
     }
@@ -1508,7 +1251,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             cudfPruneSchema,
             opts.experimental(),
             opts.getLineDelimiter()))) {
-      return gatherJSONColumns(schema, twm, emptyRowCount);
+      return twm.releaseTable();
     }
   }
 
@@ -1520,18 +1263,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
    * @return the data parsed as a table on the GPU.
    */
   public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
-    return readJSON(schema, opts, ds, -1);
-  }
-
-  /**
-   * Read JSON formatted data.
-   * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
-   * @param opts various JSON parsing options.
-   * @param ds the DataSource to read from.
-   * @param emptyRowCount the number of rows to return if no columns were read.
-   * @return the data parsed as a table on the GPU.
-   */
-  public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) {
     long dsHandle = DataSourceHelper.createWrapperDataSource(ds);
     // only prune the schema if one is provided
     boolean cudfPruneSchema = schema.getColumnNames() != null &&
@@ -1554,7 +1285,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
         opts.experimental(),
         opts.getLineDelimiter(),
         dsHandle))) {
-      return gatherJSONColumns(schema, twm, emptyRowCount);
+      return twm.releaseTable();
     } finally {
       DataSourceHelper.destroyWrapperDataSource(dsHandle);
     }
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 0a667978ca3..566ac0b972d 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1037,21 +1037,23 @@ cudf::io::schema_element read_schema_element(int& index,
   if (d_type.id() == cudf::type_id::STRUCT || d_type.id() == cudf::type_id::LIST) {
     std::map<std::string, cudf::io::schema_element> child_elems;
     int num_children = children[index];
+    std::vector<std::string> child_names(num_children);
     // go to the next entry, so recursion can parse it.
     index++;
     for (int i = 0; i < num_children; i++) {
-      auto const name = std::string{names.get(index).get()};
+      auto name = std::string{names.get(index).get()};
       child_elems.insert(
         std::pair{name, cudf::jni::read_schema_element(index, children, names, types, scales)});
+      child_names[i] = std::move(name);
     }
-    return cudf::io::schema_element{d_type, std::move(child_elems)};
+    return cudf::io::schema_element{d_type, std::move(child_elems), {std::move(child_names)}};
   } else {
     if (children[index] != 0) {
       throw std::invalid_argument("found children for a type that should have none");
     }
     // go to the next entry before returning...
     index++;
-    return cudf::io::schema_element{d_type, {}};
+    return cudf::io::schema_element{d_type, {}, std::nullopt};
   }
 }
 
@@ -1886,13 +1888,18 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
       }
 
       std::map<std::string, cudf::io::schema_element> data_types;
+      std::vector<std::string> name_order;
       int at = 0;
       while (at < n_types.size()) {
         auto const name = std::string{n_col_names.get(at).get()};
         data_types.insert(std::pair{
           name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
+        name_order.push_back(name);
       }
-      opts.dtypes(data_types);
+
+      cudf::io::schema_element structs{
+        cudf::data_type{cudf::type_id::STRUCT}, std::move(data_types), {std::move(name_order)}};
+      opts.dtypes(structs);
     } else {
       // should infer the types
     }
@@ -2001,13 +2008,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
       }
 
       std::map<std::string, cudf::io::schema_element> data_types;
+      std::vector<std::string> name_order;
+      name_order.reserve(n_types.size());
       int at = 0;
       while (at < n_types.size()) {
-        auto const name = std::string{n_col_names.get(at).get()};
+        auto name = std::string{n_col_names.get(at).get()};
         data_types.insert(std::pair{
           name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
+        name_order.emplace_back(std::move(name));
       }
-      opts.dtypes(data_types);
+      cudf::io::schema_element structs{
+        cudf::data_type{cudf::type_id::STRUCT}, std::move(data_types), {std::move(name_order)}};
+      opts.dtypes(structs);
     } else {
       // should infer the types
     }

From 764a7a25cd22ca0adfa788794d010fb4093a0a81 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiat@nvidia.com>
Date: Fri, 25 Oct 2024 08:56:13 -0700
Subject: [PATCH 2/4] Revert "Auxiliary commit to revert individual files from
 4d9a9e0bebe3c76c6eb3df3c96a6eef915790af7"

This reverts commit a82fdb699a13008b878deaab18ae85a440cf05af.
---
 java/src/main/java/ai/rapids/cudf/Table.java | 279 ++++++++++++++++++-
 1 file changed, 274 insertions(+), 5 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 6bc3082d1d3..dbee53640aa 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -1092,6 +1092,224 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer) {
     return readJSON(schema, opts, buffer, 0, buffer.length);
   }
 
+  private static class DidViewChange {
+    ColumnVector changeWasNeeded = null;
+    boolean noChangeNeeded = false;
+
+    public static DidViewChange yes(ColumnVector cv) {
+      DidViewChange ret = new DidViewChange();
+      ret.changeWasNeeded = cv;
+      return ret;
+    }
+
+    public static DidViewChange no() {
+      DidViewChange ret = new DidViewChange();
+      ret.noChangeNeeded = true;
+      return ret;
+    }
+  }
+
+  private static DidViewChange gatherJSONColumns(Schema schema, TableWithMeta.NestedChildren children,
+                                                 ColumnView cv) {
+    // We need to do this recursively to be sure it all matches as expected.
+    // If we run into problems where the data types don't match, we are not
+    // going to fix up the data types. We are only going to reorder the columns.
+    if (schema.getType() == DType.STRUCT) {
+      if (cv.getType() != DType.STRUCT) {
+        // The types don't match so just return the input unchanged...
+        return DidViewChange.no();
+      } else {
+        String[] foundNames;
+        if (children == null) {
+          foundNames = new String[0];
+        } else {
+          foundNames = children.getNames();
+        }
+        HashMap<String, Integer> indices = new HashMap<>();
+        for (int i = 0; i < foundNames.length; i++) {
+          indices.put(foundNames[i], i);
+        }
+        // We might need to rearrange the columns to match what we want.
+        DType[] types = schema.getChildTypes();
+        String[] neededNames = schema.getColumnNames();
+        ColumnView[] columns = new ColumnView[neededNames.length];
+        try {
+          boolean somethingChanged = false;
+          if (columns.length != foundNames.length) {
+            somethingChanged = true;
+          }
+          for (int i = 0; i < columns.length; i++) {
+            String neededColumnName = neededNames[i];
+            Integer index = indices.get(neededColumnName);
+            Schema childSchema = schema.getChild(i);
+            if (index != null) {
+              if (childSchema.isStructOrHasStructDescendant()) {
+                ColumnView child = cv.getChildColumnView(index);
+                boolean shouldCloseChild = true;
+                try {
+                  if (index != i) {
+                    somethingChanged = true;
+                  }
+                  DidViewChange childResult = gatherJSONColumns(schema.getChild(i),
+                      children.getChild(index), child);
+                  if (childResult.noChangeNeeded) {
+                    shouldCloseChild = false;
+                    columns[i] = child;
+                  } else {
+                    somethingChanged = true;
+                    columns[i] = childResult.changeWasNeeded;
+                  }
+                } finally {
+                  if (shouldCloseChild) {
+                    child.close();
+                  }
+                }
+              } else {
+                if (index != i) {
+                  somethingChanged = true;
+                }
+                columns[i] = cv.getChildColumnView(index);
+              }
+            } else {
+              somethingChanged = true;
+              if (types[i] == DType.LIST) {
+                try (Scalar s = Scalar.listFromNull(childSchema.getChild(0).asHostDataType())) {
+                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+                }
+              } else if (types[i] == DType.STRUCT) {
+                int numStructChildren = childSchema.getNumChildren();
+                HostColumnVector.DataType[] structChildren = new HostColumnVector.DataType[numStructChildren];
+                for (int structChildIndex = 0; structChildIndex < numStructChildren; structChildIndex++) {
+                  structChildren[structChildIndex] = childSchema.getChild(structChildIndex).asHostDataType();
+                }
+                try (Scalar s = Scalar.structFromNull(structChildren)) {
+                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+                }
+              } else {
+                try (Scalar s = Scalar.fromNull(types[i])) {
+                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
+                }
+              }
+            }
+          }
+          if (somethingChanged) {
+            try (ColumnView ret = new ColumnView(cv.type, cv.rows, Optional.of(cv.nullCount),
+                cv.getValid(), null, columns)) {
+              return DidViewChange.yes(ret.copyToColumnVector());
+            }
+          } else {
+            return DidViewChange.no();
+          }
+        } finally {
+          for (ColumnView c: columns) {
+            if (c != null) {
+              c.close();
+            }
+          }
+        }
+      }
+    } else if (schema.getType() == DType.LIST && cv.getType() == DType.LIST) {
+      if (schema.isStructOrHasStructDescendant()) {
+        String [] childNames = children.getNames();
+        if (childNames.length == 2 &&
+            "offsets".equals(childNames[0]) &&
+            "element".equals(childNames[1])) {
+          try (ColumnView child = cv.getChildColumnView(0)){
+            DidViewChange listResult = gatherJSONColumns(schema.getChild(0),
+                children.getChild(1), child);
+            if (listResult.noChangeNeeded) {
+              return DidViewChange.no();
+            } else {
+              try (ColumnView listView = new ColumnView(cv.type, cv.rows,
+                  Optional.of(cv.nullCount), cv.getValid(), cv.getOffsets(),
+                  new ColumnView[]{listResult.changeWasNeeded})) {
+                return DidViewChange.yes(listView.copyToColumnVector());
+              } finally {
+                listResult.changeWasNeeded.close();
+              }
+            }
+          }
+        }
+      }
+      // Nothing to change so just return the input, but we need to inc a ref count to really
+      // make it work, so for now we are going to turn it into a ColumnVector.
+      return DidViewChange.no();
+    } else {
+      // Nothing to change so just return the input, but we need to inc a ref count to really
+      // make it work, so for now we are going to turn it into a ColumnVector.
+      return DidViewChange.no();
+    }
+  }
+
+  private static Table gatherJSONColumns(Schema schema, TableWithMeta twm, int emptyRowCount) {
+    String[] neededColumns = schema.getColumnNames();
+    if (neededColumns == null || neededColumns.length == 0) {
+      return twm.releaseTable();
+    } else {
+      String[] foundNames = twm.getColumnNames();
+      HashMap<String, Integer> indices = new HashMap<>();
+      for (int i = 0; i < foundNames.length; i++) {
+        indices.put(foundNames[i], i);
+      }
+      // We might need to rearrange the columns to match what we want.
+      DType[] types = schema.getChildTypes();
+      ColumnVector[] columns = new ColumnVector[neededColumns.length];
+      try (Table tbl = twm.releaseTable()) {
+        int rowCount = tbl == null ? emptyRowCount : (int)tbl.getRowCount();
+        if (rowCount < 0) {
+          throw new IllegalStateException(
+              "No empty row count provided and the table read has no row count or columns");
+        }
+        for (int i = 0; i < columns.length; i++) {
+          String neededColumnName = neededColumns[i];
+          Integer index = indices.get(neededColumnName);
+          if (index != null) {
+            if (schema.getChild(i).isStructOrHasStructDescendant()) {
+              DidViewChange gathered = gatherJSONColumns(schema.getChild(i), twm.getChild(index),
+                  tbl.getColumn(index));
+              if (gathered.noChangeNeeded) {
+                columns[i] = tbl.getColumn(index).incRefCount();
+              } else {
+                columns[i] = gathered.changeWasNeeded;
+              }
+            } else {
+              columns[i] = tbl.getColumn(index).incRefCount();
+            }
+          } else {
+            if (types[i] == DType.LIST) {
+              Schema listSchema = schema.getChild(i);
+              Schema elementSchema = listSchema.getChild(0);
+              try (Scalar s = Scalar.listFromNull(elementSchema.asHostDataType())) {
+                columns[i] = ColumnVector.fromScalar(s, rowCount);
+              }
+            } else if (types[i] == DType.STRUCT) {
+              Schema structSchema = schema.getChild(i);
+              int numStructChildren = structSchema.getNumChildren();
+              DataType[] structChildrenTypes = new DataType[numStructChildren];
+              for (int j = 0; j < numStructChildren; j++) {
+                structChildrenTypes[j] = structSchema.getChild(j).asHostDataType();
+              }
+              try (Scalar s = Scalar.structFromNull(structChildrenTypes)) {
+                columns[i] = ColumnVector.fromScalar(s, rowCount);
+              }
+            } else {
+              try (Scalar s = Scalar.fromNull(types[i])) {
+                columns[i] = ColumnVector.fromScalar(s, rowCount);
+              }
+            }
+          }
+        }
+        return new Table(columns);
+      } finally {
+        for (ColumnVector c: columns) {
+          if (c != null) {
+            c.close();
+          }
+        }
+      }
+    }
+  }
+
   /**
    * Read a JSON file.
    * @param schema the schema of the file.  You may use Schema.INFERRED to infer the schema.
@@ -1121,7 +1339,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     cudfPruneSchema,
                     opts.experimental(),
                     opts.getLineDelimiter()))) {
-      return twm.releaseTable();
+
+      return gatherJSONColumns(schema, twm, -1);
     }
   }
 
@@ -1137,6 +1356,23 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
    */
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
                                long len, HostMemoryAllocator hostMemoryAllocator) {
+    return readJSON(schema, opts, buffer, offset, len, hostMemoryAllocator, -1);
+  }
+
+  /**
+   * Read JSON formatted data.
+   * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
+   * @param opts various JSON parsing options.
+   * @param buffer raw UTF8 formatted bytes.
+   * @param offset the starting offset into buffer.
+   * @param len the number of bytes to parse.
+   * @param hostMemoryAllocator allocator for host memory buffers
+   * @param emptyRowCount the number of rows to return if no columns were read.
+   * @return the data parsed as a table on the GPU.
+   */
+  public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
+                               long len, HostMemoryAllocator hostMemoryAllocator,
+                               int emptyRowCount) {
     if (len <= 0) {
       len = buffer.length - offset;
     }
@@ -1145,10 +1381,16 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, lon
     assert offset >= 0 && offset < buffer.length;
     try (HostMemoryBuffer newBuf = hostMemoryAllocator.allocate(len)) {
       newBuf.setBytes(0, buffer, offset, len);
-      return readJSON(schema, opts, newBuf, 0, len);
+      return readJSON(schema, opts, newBuf, 0, len, emptyRowCount);
     }
   }
 
+  public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
+                               long len, int emptyRowCount) {
+    return readJSON(schema, opts, buffer, offset, len, DefaultHostMemoryAllocator.get(),
+        emptyRowCount);
+  }
+
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
                                long len) {
     return readJSON(schema, opts, buffer, offset, len, DefaultHostMemoryAllocator.get());
@@ -1222,7 +1464,22 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) {
    * @return the data parsed as a table on the GPU.
    */
   public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer buffer,
-                               long offset, long len) {
+                              long offset, long len) {
+    return readJSON(schema, opts, buffer, offset, len, -1);
+  }
+
+  /**
+   * Read JSON formatted data.
+   * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
+   * @param opts various JSON parsing options.
+   * @param buffer raw UTF8 formatted bytes.
+   * @param offset the starting offset into buffer.
+   * @param len the number of bytes to parse.
+   * @param emptyRowCount the number of rows to use if no columns were found.
+   * @return the data parsed as a table on the GPU.
+   */
+  public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer buffer,
+                               long offset, long len, int emptyRowCount) {
     if (len <= 0) {
       len = buffer.length - offset;
     }
@@ -1251,7 +1508,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             cudfPruneSchema,
             opts.experimental(),
             opts.getLineDelimiter()))) {
-      return twm.releaseTable();
+      return gatherJSONColumns(schema, twm, emptyRowCount);
     }
   }
 
@@ -1263,6 +1520,18 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
    * @return the data parsed as a table on the GPU.
    */
   public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
+    return readJSON(schema, opts, ds, -1);
+  }
+
+  /**
+   * Read JSON formatted data.
+   * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
+   * @param opts various JSON parsing options.
+   * @param ds the DataSource to read from.
+   * @param emptyRowCount the number of rows to return if no columns were read.
+   * @return the data parsed as a table on the GPU.
+   */
+  public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) {
     long dsHandle = DataSourceHelper.createWrapperDataSource(ds);
     // only prune the schema if one is provided
     boolean cudfPruneSchema = schema.getColumnNames() != null &&
@@ -1285,7 +1554,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
         opts.experimental(),
         opts.getLineDelimiter(),
         dsHandle))) {
-      return twm.releaseTable();
+      return gatherJSONColumns(schema, twm, emptyRowCount);
     } finally {
       DataSourceHelper.destroyWrapperDataSource(dsHandle);
     }

From 6e978cc3057de3a4973262824614aa049033028e Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiat@nvidia.com>
Date: Fri, 25 Oct 2024 10:37:19 -0700
Subject: [PATCH 3/4] Deprecate Java methods

Signed-off-by: Nghia Truong <nghiat@nvidia.com>
---
 java/src/main/java/ai/rapids/cudf/Table.java | 245 ++-----------------
 1 file changed, 21 insertions(+), 224 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index dbee53640aa..ac531c3c763 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -1092,224 +1092,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer) {
     return readJSON(schema, opts, buffer, 0, buffer.length);
   }
 
-  private static class DidViewChange {
-    ColumnVector changeWasNeeded = null;
-    boolean noChangeNeeded = false;
-
-    public static DidViewChange yes(ColumnVector cv) {
-      DidViewChange ret = new DidViewChange();
-      ret.changeWasNeeded = cv;
-      return ret;
-    }
-
-    public static DidViewChange no() {
-      DidViewChange ret = new DidViewChange();
-      ret.noChangeNeeded = true;
-      return ret;
-    }
-  }
-
-  private static DidViewChange gatherJSONColumns(Schema schema, TableWithMeta.NestedChildren children,
-                                                 ColumnView cv) {
-    // We need to do this recursively to be sure it all matches as expected.
-    // If we run into problems where the data types don't match, we are not
-    // going to fix up the data types. We are only going to reorder the columns.
-    if (schema.getType() == DType.STRUCT) {
-      if (cv.getType() != DType.STRUCT) {
-        // The types don't match so just return the input unchanged...
-        return DidViewChange.no();
-      } else {
-        String[] foundNames;
-        if (children == null) {
-          foundNames = new String[0];
-        } else {
-          foundNames = children.getNames();
-        }
-        HashMap<String, Integer> indices = new HashMap<>();
-        for (int i = 0; i < foundNames.length; i++) {
-          indices.put(foundNames[i], i);
-        }
-        // We might need to rearrange the columns to match what we want.
-        DType[] types = schema.getChildTypes();
-        String[] neededNames = schema.getColumnNames();
-        ColumnView[] columns = new ColumnView[neededNames.length];
-        try {
-          boolean somethingChanged = false;
-          if (columns.length != foundNames.length) {
-            somethingChanged = true;
-          }
-          for (int i = 0; i < columns.length; i++) {
-            String neededColumnName = neededNames[i];
-            Integer index = indices.get(neededColumnName);
-            Schema childSchema = schema.getChild(i);
-            if (index != null) {
-              if (childSchema.isStructOrHasStructDescendant()) {
-                ColumnView child = cv.getChildColumnView(index);
-                boolean shouldCloseChild = true;
-                try {
-                  if (index != i) {
-                    somethingChanged = true;
-                  }
-                  DidViewChange childResult = gatherJSONColumns(schema.getChild(i),
-                      children.getChild(index), child);
-                  if (childResult.noChangeNeeded) {
-                    shouldCloseChild = false;
-                    columns[i] = child;
-                  } else {
-                    somethingChanged = true;
-                    columns[i] = childResult.changeWasNeeded;
-                  }
-                } finally {
-                  if (shouldCloseChild) {
-                    child.close();
-                  }
-                }
-              } else {
-                if (index != i) {
-                  somethingChanged = true;
-                }
-                columns[i] = cv.getChildColumnView(index);
-              }
-            } else {
-              somethingChanged = true;
-              if (types[i] == DType.LIST) {
-                try (Scalar s = Scalar.listFromNull(childSchema.getChild(0).asHostDataType())) {
-                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
-                }
-              } else if (types[i] == DType.STRUCT) {
-                int numStructChildren = childSchema.getNumChildren();
-                HostColumnVector.DataType[] structChildren = new HostColumnVector.DataType[numStructChildren];
-                for (int structChildIndex = 0; structChildIndex < numStructChildren; structChildIndex++) {
-                  structChildren[structChildIndex] = childSchema.getChild(structChildIndex).asHostDataType();
-                }
-                try (Scalar s = Scalar.structFromNull(structChildren)) {
-                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
-                }
-              } else {
-                try (Scalar s = Scalar.fromNull(types[i])) {
-                  columns[i] = ColumnVector.fromScalar(s, (int) cv.getRowCount());
-                }
-              }
-            }
-          }
-          if (somethingChanged) {
-            try (ColumnView ret = new ColumnView(cv.type, cv.rows, Optional.of(cv.nullCount),
-                cv.getValid(), null, columns)) {
-              return DidViewChange.yes(ret.copyToColumnVector());
-            }
-          } else {
-            return DidViewChange.no();
-          }
-        } finally {
-          for (ColumnView c: columns) {
-            if (c != null) {
-              c.close();
-            }
-          }
-        }
-      }
-    } else if (schema.getType() == DType.LIST && cv.getType() == DType.LIST) {
-      if (schema.isStructOrHasStructDescendant()) {
-        String [] childNames = children.getNames();
-        if (childNames.length == 2 &&
-            "offsets".equals(childNames[0]) &&
-            "element".equals(childNames[1])) {
-          try (ColumnView child = cv.getChildColumnView(0)){
-            DidViewChange listResult = gatherJSONColumns(schema.getChild(0),
-                children.getChild(1), child);
-            if (listResult.noChangeNeeded) {
-              return DidViewChange.no();
-            } else {
-              try (ColumnView listView = new ColumnView(cv.type, cv.rows,
-                  Optional.of(cv.nullCount), cv.getValid(), cv.getOffsets(),
-                  new ColumnView[]{listResult.changeWasNeeded})) {
-                return DidViewChange.yes(listView.copyToColumnVector());
-              } finally {
-                listResult.changeWasNeeded.close();
-              }
-            }
-          }
-        }
-      }
-      // Nothing to change so just return the input, but we need to inc a ref count to really
-      // make it work, so for now we are going to turn it into a ColumnVector.
-      return DidViewChange.no();
-    } else {
-      // Nothing to change so just return the input, but we need to inc a ref count to really
-      // make it work, so for now we are going to turn it into a ColumnVector.
-      return DidViewChange.no();
-    }
-  }
-
-  private static Table gatherJSONColumns(Schema schema, TableWithMeta twm, int emptyRowCount) {
-    String[] neededColumns = schema.getColumnNames();
-    if (neededColumns == null || neededColumns.length == 0) {
-      return twm.releaseTable();
-    } else {
-      String[] foundNames = twm.getColumnNames();
-      HashMap<String, Integer> indices = new HashMap<>();
-      for (int i = 0; i < foundNames.length; i++) {
-        indices.put(foundNames[i], i);
-      }
-      // We might need to rearrange the columns to match what we want.
-      DType[] types = schema.getChildTypes();
-      ColumnVector[] columns = new ColumnVector[neededColumns.length];
-      try (Table tbl = twm.releaseTable()) {
-        int rowCount = tbl == null ? emptyRowCount : (int)tbl.getRowCount();
-        if (rowCount < 0) {
-          throw new IllegalStateException(
-              "No empty row count provided and the table read has no row count or columns");
-        }
-        for (int i = 0; i < columns.length; i++) {
-          String neededColumnName = neededColumns[i];
-          Integer index = indices.get(neededColumnName);
-          if (index != null) {
-            if (schema.getChild(i).isStructOrHasStructDescendant()) {
-              DidViewChange gathered = gatherJSONColumns(schema.getChild(i), twm.getChild(index),
-                  tbl.getColumn(index));
-              if (gathered.noChangeNeeded) {
-                columns[i] = tbl.getColumn(index).incRefCount();
-              } else {
-                columns[i] = gathered.changeWasNeeded;
-              }
-            } else {
-              columns[i] = tbl.getColumn(index).incRefCount();
-            }
-          } else {
-            if (types[i] == DType.LIST) {
-              Schema listSchema = schema.getChild(i);
-              Schema elementSchema = listSchema.getChild(0);
-              try (Scalar s = Scalar.listFromNull(elementSchema.asHostDataType())) {
-                columns[i] = ColumnVector.fromScalar(s, rowCount);
-              }
-            } else if (types[i] == DType.STRUCT) {
-              Schema structSchema = schema.getChild(i);
-              int numStructChildren = structSchema.getNumChildren();
-              DataType[] structChildrenTypes = new DataType[numStructChildren];
-              for (int j = 0; j < numStructChildren; j++) {
-                structChildrenTypes[j] = structSchema.getChild(j).asHostDataType();
-              }
-              try (Scalar s = Scalar.structFromNull(structChildrenTypes)) {
-                columns[i] = ColumnVector.fromScalar(s, rowCount);
-              }
-            } else {
-              try (Scalar s = Scalar.fromNull(types[i])) {
-                columns[i] = ColumnVector.fromScalar(s, rowCount);
-              }
-            }
-          }
-        }
-        return new Table(columns);
-      } finally {
-        for (ColumnVector c: columns) {
-          if (c != null) {
-            c.close();
-          }
-        }
-      }
-    }
-  }
-
   /**
    * Read a JSON file.
    * @param schema the schema of the file.  You may use Schema.INFERRED to infer the schema.
@@ -1340,7 +1122,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     opts.experimental(),
                     opts.getLineDelimiter()))) {
 
-      return gatherJSONColumns(schema, twm, -1);
+      return twm.releaseTable();
     }
   }
 
@@ -1361,6 +1143,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, lon
 
   /**
    * Read JSON formatted data.
+   *
+   * @deprecated This method is deprecated since emptyRowCount is not used. Use the method without
+   * emptyRowCount instead.
+   *
    * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
    * @param opts various JSON parsing options.
    * @param buffer raw UTF8 formatted bytes.
@@ -1370,6 +1156,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, lon
    * @param emptyRowCount the number of rows to return if no columns were read.
    * @return the data parsed as a table on the GPU.
    */
+  @SuppressWarnings("unused")
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
                                long len, HostMemoryAllocator hostMemoryAllocator,
                                int emptyRowCount) {
@@ -1381,14 +1168,14 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, lon
     assert offset >= 0 && offset < buffer.length;
     try (HostMemoryBuffer newBuf = hostMemoryAllocator.allocate(len)) {
       newBuf.setBytes(0, buffer, offset, len);
-      return readJSON(schema, opts, newBuf, 0, len, emptyRowCount);
+      return readJSON(schema, opts, newBuf, 0, len);
     }
   }
 
+  @SuppressWarnings("unused")
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
                                long len, int emptyRowCount) {
-    return readJSON(schema, opts, buffer, offset, len, DefaultHostMemoryAllocator.get(),
-        emptyRowCount);
+    return readJSON(schema, opts, buffer, offset, len, DefaultHostMemoryAllocator.get());
   }
 
   public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, long offset,
@@ -1470,6 +1257,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
 
   /**
    * Read JSON formatted data.
+   *
+   * @deprecated This method is deprecated since emptyRowCount is not used. Use the method without
+   * emptyRowCount instead.
+   *
    * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
    * @param opts various JSON parsing options.
    * @param buffer raw UTF8 formatted bytes.
@@ -1478,6 +1269,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
    * @param emptyRowCount the number of rows to use if no columns were found.
    * @return the data parsed as a table on the GPU.
    */
+  @SuppressWarnings("unused")
   public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer buffer,
                                long offset, long len, int emptyRowCount) {
     if (len <= 0) {
@@ -1508,7 +1300,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             cudfPruneSchema,
             opts.experimental(),
             opts.getLineDelimiter()))) {
-      return gatherJSONColumns(schema, twm, emptyRowCount);
+      return twm.releaseTable();
     }
   }
 
@@ -1525,12 +1317,17 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
 
   /**
    * Read JSON formatted data.
+   *
+   * @deprecated This method is deprecated since emptyRowCount is not used. Use the method without
+   * emptyRowCount instead.
+   *
    * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
    * @param opts various JSON parsing options.
    * @param ds the DataSource to read from.
    * @param emptyRowCount the number of rows to return if no columns were read.
    * @return the data parsed as a table on the GPU.
    */
+  @SuppressWarnings("unused")
   public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) {
     long dsHandle = DataSourceHelper.createWrapperDataSource(ds);
     // only prune the schema if one is provided
@@ -1554,7 +1351,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
         opts.experimental(),
         opts.getLineDelimiter(),
         dsHandle))) {
-      return gatherJSONColumns(schema, twm, emptyRowCount);
+      return twm.releaseTable();
     } finally {
       DataSourceHelper.destroyWrapperDataSource(dsHandle);
     }

From a28a32a5bcd78922cd94de4bc940270fd9353f4f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiat@nvidia.com>
Date: Mon, 28 Oct 2024 10:53:09 -0700
Subject: [PATCH 4/4] Always prune columns if schema is available

Signed-off-by: Nghia Truong <nghiat@nvidia.com>
---
 java/src/main/java/ai/rapids/cudf/Table.java | 17 -----------------
 java/src/main/native/src/TableJni.cpp        | 14 +++++++-------
 2 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index ac531c3c763..b01ce31b1f3 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -259,7 +259,6 @@ private static native long readJSON(int[] numChildren, String[] columnNames,
                                         boolean allowLeadingZeros,
                                         boolean allowNonNumericNumbers,
                                         boolean allowUnquotedControl,
-                                        boolean pruneColumns,
                                         boolean experimental,
                                         byte lineDelimiter) throws CudfException;
 
@@ -275,7 +274,6 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co
                                       boolean allowLeadingZeros,
                                       boolean allowNonNumericNumbers,
                                       boolean allowUnquotedControl,
-                                      boolean pruneColumns,
                                       boolean experimental,
                                       byte lineDelimiter,
                                       long dsHandle) throws CudfException;
@@ -1100,10 +1098,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer) {
    * @return the file parsed as a table on the GPU.
    */
   public static Table readJSON(Schema schema, JSONOptions opts, File path) {
-    // only prune the schema if one is provided
-    boolean cudfPruneSchema = schema.getColumnNames() != null &&
-        schema.getColumnNames().length != 0 &&
-        opts.shouldCudfPruneSchema();
     try (TableWithMeta twm = new TableWithMeta(
             readJSON(schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(),
                     schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(),
@@ -1118,7 +1112,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     opts.leadingZerosAllowed(),
                     opts.nonNumericNumbersAllowed(),
                     opts.unquotedControlChars(),
-                    cudfPruneSchema,
                     opts.experimental(),
                     opts.getLineDelimiter()))) {
 
@@ -1278,10 +1271,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
     assert len > 0;
     assert len <= buffer.length - offset;
     assert offset >= 0 && offset < buffer.length;
-    // only prune the schema if one is provided
-    boolean cudfPruneSchema = schema.getColumnNames() != null &&
-        schema.getColumnNames().length != 0 &&
-        opts.shouldCudfPruneSchema();
     try (TableWithMeta twm = new TableWithMeta(readJSON(
             schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(),
             schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(), null,
@@ -1297,7 +1286,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             opts.leadingZerosAllowed(),
             opts.nonNumericNumbersAllowed(),
             opts.unquotedControlChars(),
-            cudfPruneSchema,
             opts.experimental(),
             opts.getLineDelimiter()))) {
       return twm.releaseTable();
@@ -1330,10 +1318,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
   @SuppressWarnings("unused")
   public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) {
     long dsHandle = DataSourceHelper.createWrapperDataSource(ds);
-    // only prune the schema if one is provided
-    boolean cudfPruneSchema = schema.getColumnNames() != null &&
-        schema.getColumnNames().length != 0 &&
-        opts.shouldCudfPruneSchema();
     try (TableWithMeta twm = new TableWithMeta(readJSONFromDataSource(schema.getFlattenedNumChildren(),
         schema.getFlattenedColumnNames(), schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(),
         opts.isDayFirst(),
@@ -1347,7 +1331,6 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
         opts.leadingZerosAllowed(),
         opts.nonNumericNumbersAllowed(),
         opts.unquotedControlChars(),
-        cudfPruneSchema,
         opts.experimental(),
         opts.getLineDelimiter(),
         dsHandle))) {
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 566ac0b972d..1f8b1ea207d 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1826,7 +1826,6 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
                                                  jboolean allow_leading_zeros,
                                                  jboolean allow_nonnumeric_numbers,
                                                  jboolean allow_unquoted_control,
-                                                 jboolean prune_columns,
                                                  jboolean experimental,
                                                  jbyte line_delimiter,
                                                  jlong ds_handle)
@@ -1855,6 +1854,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
     cudf::io::json_recovery_mode_t recovery_mode =
       recover_with_null ? cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL
                         : cudf::io::json_recovery_mode_t::FAIL;
+
     cudf::io::json_reader_options_builder opts =
       cudf::io::json_reader_options::builder(source)
         .dayfirst(static_cast<bool>(day_first))
@@ -1866,7 +1866,6 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes)
-        .prune_columns(prune_columns)
         .experimental(experimental);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
@@ -1896,10 +1895,11 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
           name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
         name_order.push_back(name);
       }
-
+      auto const prune_columns = data_types.size() != 0;
       cudf::io::schema_element structs{
         cudf::data_type{cudf::type_id::STRUCT}, std::move(data_types), {std::move(name_order)}};
-      opts.dtypes(structs);
+      opts.prune_columns(prune_columns).dtypes(structs);
+
     } else {
       // should infer the types
     }
@@ -1932,7 +1932,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
                                                            jboolean allow_leading_zeros,
                                                            jboolean allow_nonnumeric_numbers,
                                                            jboolean allow_unquoted_control,
-                                                           jboolean prune_columns,
                                                            jboolean experimental,
                                                            jbyte line_delimiter)
 {
@@ -1975,6 +1974,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
     cudf::io::json_recovery_mode_t recovery_mode =
       recover_with_null ? cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL
                         : cudf::io::json_recovery_mode_t::FAIL;
+
     cudf::io::json_reader_options_builder opts =
       cudf::io::json_reader_options::builder(source)
         .dayfirst(static_cast<bool>(day_first))
@@ -1986,7 +1986,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes)
-        .prune_columns(prune_columns)
         .experimental(experimental);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
@@ -2017,9 +2016,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
           name, cudf::jni::read_schema_element(at, n_children, n_col_names, n_types, n_scales)});
         name_order.emplace_back(std::move(name));
       }
+      auto const prune_columns = data_types.size() != 0;
       cudf::io::schema_element structs{
         cudf::data_type{cudf::type_id::STRUCT}, std::move(data_types), {std::move(name_order)}};
-      opts.dtypes(structs);
+      opts.prune_columns(prune_columns).dtypes(structs);
     } else {
       // should infer the types
     }