diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 310bc99b279..61d584f26be 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -394,6 +394,8 @@ add_library(
   src/io/functions.cpp
   src/io/json/byte_range_info.cu
   src/io/json/json_column.cu
+  src/io/json/column_tree_construction.cu
+  src/io/json/device_column_tree_extraction.cu
   src/io/json/json_normalization.cu
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu
new file mode 100644
index 00000000000..e3c3e367361
--- /dev/null
+++ b/cpp/src/io/json/column_tree_construction.cu
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "json_utils.hpp"
+#include "nested_json.hpp"
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/detail/utilities/visitor_overload.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cub/device/device_segmented_reduce.cuh>
+#include <cuda/atomic>
+#include <cuda/functional>
+#include <thrust/count.h>
+#include <thrust/for_each.h>
+#include <thrust/functional.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/transform_scan.h>
+#include <thrust/unique.h>
+
+namespace cudf::io::json {
+
+template <typename T>
+void print(device_span<T const> d_vec, std::string name, rmm::cuda_stream_view stream)
+{
+  stream.synchronize();
+  auto h_vec = cudf::detail::make_std_vector_async(d_vec, stream);
+  stream.synchronize();
+  std::cout << name << " = ";
+  for (auto e : h_vec) {
+    std::cout << e << " ";
+  }
+  std::cout << std::endl;
+}
+
+namespace experimental::detail {
+
+using row_offset_t = size_type;
+
+struct level_ordering {
+  device_span<TreeDepthT> node_levels;
+  device_span<NodeIndexT> col_ids;
+  __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const
+  {
+    return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) ||
+           (node_levels[lhs_node_id] == node_levels[rhs_node_id] &&
+            col_ids[lhs_node_id] < col_ids[rhs_node_id]);
+  }
+};
+
+struct parent_nodeids_to_colids {
+  device_span<NodeIndexT> col_ids;
+  device_span<NodeIndexT> rev_mapped_col_ids;
+  __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT
+  {
+    return parent_node_id == parent_node_sentinel ? parent_node_sentinel
+                                                  : rev_mapped_col_ids[col_ids[parent_node_id]];
+  }
+};
+
+/**
+ * @brief Reduces node tree representation to column tree CSR representation.
+ *
+ * @param tree Node tree representation of JSON string
+ * @param original_col_ids Column ids of nodes
+ * @param sorted_col_ids Sorted column ids of nodes
+ * @param ordered_node_ids Node ids of nodes sorted by column ids
+ * @param row_offsets Row offsets of nodes
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A tuple of column tree representation of JSON string, column ids of columns, and
+ * max row offsets of columns
+ */
+std::tuple<csr, column_tree_properties> reduce_to_column_tree(
+  tree_meta_t& tree,
+  device_span<NodeIndexT> col_ids,
+  device_span<row_offset_t> row_offsets,
+  bool is_array_of_arrays,
+  NodeIndexT const row_array_parent_col_id,
+  rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+
+  rmm::device_uvector<NodeIndexT> level_ordered_col_ids(col_ids.size(), stream);
+  rmm::device_uvector<NodeIndexT> level_ordered_node_ids(col_ids.size(), stream);
+  thrust::copy(
+    rmm::exec_policy_nosync(stream), col_ids.begin(), col_ids.end(), level_ordered_col_ids.begin());
+  thrust::sequence(
+    rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end());
+
+  // Reorder nodes and column ids in level-wise fashion
+  thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream),
+                             level_ordered_node_ids.begin(),
+                             level_ordered_node_ids.end(),
+                             level_ordered_col_ids.begin(),
+                             level_ordering{tree.node_levels, col_ids});
+
+  // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and
+  // the node id of first row in each column
+  auto const num_columns = thrust::unique_count(
+    rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end());
+  rmm::device_uvector<NodeIndexT> level_ordered_unique_node_ids(num_columns, stream);
+  rmm::device_uvector<NodeIndexT> mapped_col_ids(num_columns, stream);
+  rmm::device_uvector<NodeIndexT> rev_mapped_col_ids(num_columns, stream);
+  thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream),
+                             level_ordered_col_ids.begin(),
+                             level_ordered_node_ids.end(),
+                             level_ordered_node_ids.begin(),
+                             mapped_col_ids.begin(),
+                             level_ordered_unique_node_ids.begin());
+  auto* dev_num_levels_ptr = thrust::max_element(
+    rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end());
+  rmm::device_scalar<NodeIndexT> num_levels(stream);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    num_levels.data(), dev_num_levels_ptr, sizeof(NodeIndexT), cudaMemcpyDeviceToDevice, stream));
+
+  rmm::device_uvector<NodeIndexT> mapped_col_ids_copy(num_columns, stream);
+  thrust::copy(rmm::exec_policy_nosync(stream),
+               mapped_col_ids.begin(),
+               mapped_col_ids.end(),
+               mapped_col_ids_copy.begin());
+  thrust::sequence(
+    rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end());
+  thrust::sort_by_key(rmm::exec_policy_nosync(stream),
+                      mapped_col_ids_copy.begin(),
+                      mapped_col_ids_copy.end(),
+                      rev_mapped_col_ids.begin());
+
+  // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max.
+  // 3. category for each column node by aggregating all nodes in node tree corresponding to same
+  // column:
+  //    reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E)
+  rmm::device_uvector<row_offset_t> max_row_offsets(num_columns, stream);
+  rmm::device_uvector<NodeT> column_categories(num_columns, stream);
+  auto ordered_row_offsets =
+    thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin());
+  auto ordered_node_categories =
+    thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin());
+  thrust::reduce_by_key(
+    rmm::exec_policy_nosync(stream),
+    level_ordered_col_ids.begin(),
+    level_ordered_col_ids.end(),
+    thrust::make_zip_iterator(ordered_row_offsets, ordered_node_categories),
+    thrust::make_discard_iterator(),
+    thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()),
+    thrust::equal_to<NodeIndexT>(),
+    [] __device__(auto a, auto b) {
+      auto row_offset_a = thrust::get<0>(a);
+      auto row_offset_b = thrust::get<0>(b);
+      auto type_a       = thrust::get<1>(a);
+      auto type_b       = thrust::get<1>(b);
+
+      NodeT ctg;
+      auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR);
+      auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR);
+      // (v+v=v, *+*=*,  *+v=*, *+#=E, NESTED+VAL=NESTED)
+      // *+*=*, v+v=v
+      if (type_a == type_b) {
+        ctg = type_a;
+      } else if (is_a_leaf) {
+        // *+v=*, N+V=N
+        // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR
+        ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b));
+      } else if (is_b_leaf) {
+        ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a));
+      } else
+        ctg = NC_ERR;
+
+      thrust::maximum<row_offset_t> row_offset_op;
+      return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg);
+    });
+
+  // 4. construct parent_col_ids using permutation iterator
+  rmm::device_uvector<NodeIndexT> parent_col_ids(num_columns, stream);
+  thrust::transform_output_iterator parent_col_ids_it(
+    parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids});
+  thrust::copy_n(rmm::exec_policy_nosync(stream),
+                 thrust::make_permutation_iterator(tree.parent_node_ids.begin(),
+                                                   level_ordered_unique_node_ids.begin()),
+                 num_columns,
+                 parent_col_ids_it);
+
+  /*
+    5. CSR construction:
+      a. Sort column levels and get their ordering
+      b. For each column node coln iterated according to sorted_column_levels; do
+          i. Find nodes that have coln as the parent node -> set adj_coln
+          ii. row idx[coln] = size of adj_coln + 1
+          iii. col idx[coln] = adj_coln U {parent_col_id[coln]}
+  */
+
+  rmm::device_uvector<NodeIndexT> rowidx(num_columns + 1, stream);
+  thrust::fill(rmm::exec_policy_nosync(stream), rowidx.begin(), rowidx.end(), 0);
+  // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel)
+  // children adjacency
+  auto num_non_leaf_columns = thrust::unique_count(
+    rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end());
+  thrust::reduce_by_key(rmm::exec_policy_nosync(stream),
+                        parent_col_ids.begin() + 1,
+                        parent_col_ids.end(),
+                        thrust::make_constant_iterator(1),
+                        thrust::make_discard_iterator(),
+                        rowidx.begin() + 1,
+                        thrust::equal_to<TreeDepthT>());
+  thrust::transform_inclusive_scan(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1),
+    thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()),
+    rowidx.begin() + 1,
+    cuda::proclaim_return_type<NodeIndexT>([] __device__(auto a) {
+      auto n   = thrust::get<0>(a);
+      auto idx = thrust::get<1>(a);
+      return n == 1 ? idx : idx + 1;
+    }),
+    thrust::plus<NodeIndexT>{});
+
+  rmm::device_uvector<NodeIndexT> colidx((num_columns - 1) * 2, stream);
+  // Skip the parent of root node
+  thrust::scatter(rmm::exec_policy_nosync(stream),
+                  parent_col_ids.begin() + 1,
+                  parent_col_ids.end(),
+                  rowidx.begin() + 1,
+                  colidx.begin());
+  // excluding root node, construct scatter map
+  rmm::device_uvector<NodeIndexT> map(num_columns - 1, stream);
+  thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream),
+                                parent_col_ids.begin() + 1,
+                                parent_col_ids.end(),
+                                thrust::make_constant_iterator(1),
+                                map.begin());
+  thrust::for_each_n(rmm::exec_policy_nosync(stream),
+                     thrust::make_counting_iterator(1),
+                     num_columns - 1,
+                     [rowidx         = rowidx.begin(),
+                      map            = map.begin(),
+                      parent_col_ids = parent_col_ids.begin()] __device__(auto i) {
+                       auto parent_col_id = parent_col_ids[i];
+                       if (parent_col_id == 0)
+                         map[i - 1]--;
+                       else
+                         map[i - 1] += rowidx[parent_col_id];
+                     });
+  thrust::scatter(rmm::exec_policy_nosync(stream),
+                  thrust::make_counting_iterator(1),
+                  thrust::make_counting_iterator(1) + num_columns - 1,
+                  map.begin(),
+                  colidx.begin());
+
+  // Mixed types in List children go to different columns,
+  // so all immediate children of list column should have same max_row_offsets.
+  //   create list's children max_row_offsets array
+  //   gather the max_row_offsets from children row offset array.
+  {
+    auto max_row_offsets_it =
+      thrust::make_permutation_iterator(max_row_offsets.begin(), colidx.begin());
+    rmm::device_uvector<row_offset_t> max_children_max_row_offsets(num_columns, stream);
+    size_t temp_storage_bytes = 0;
+    cub::DeviceSegmentedReduce::Max(nullptr,
+                                    temp_storage_bytes,
+                                    max_row_offsets_it,
+                                    max_children_max_row_offsets.begin(),
+                                    num_columns,
+                                    rowidx.begin(),
+                                    rowidx.begin() + 1,
+                                    stream.value());
+    rmm::device_buffer d_temp_storage(temp_storage_bytes, stream);
+    cub::DeviceSegmentedReduce::Max(d_temp_storage.data(),
+                                    temp_storage_bytes,
+                                    max_row_offsets_it,
+                                    max_children_max_row_offsets.begin(),
+                                    num_columns,
+                                    rowidx.begin(),
+                                    rowidx.begin() + 1,
+                                    stream.value());
+
+    rmm::device_uvector<NodeIndexT> list_ancestors(num_columns, stream);
+    thrust::for_each_n(
+      rmm::exec_policy_nosync(stream),
+      thrust::make_counting_iterator(0),
+      num_columns,
+      [rowidx            = rowidx.begin(),
+       colidx            = colidx.begin(),
+       column_categories = column_categories.begin(),
+       dev_num_levels_ptr,
+       list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) {
+        auto num_levels      = *dev_num_levels_ptr;
+        list_ancestors[node] = node;
+        for (int level = 0; level <= num_levels; level++) {
+          if (list_ancestors[node] > 0) list_ancestors[node] = colidx[rowidx[list_ancestors[node]]];
+          if (list_ancestors[node] == 0 || column_categories[list_ancestors[node]] == NC_LIST)
+            break;
+        }
+      });
+
+    thrust::gather_if(rmm::exec_policy_nosync(stream),
+                      list_ancestors.begin(),
+                      list_ancestors.end(),
+                      list_ancestors.begin(),
+                      max_children_max_row_offsets.begin(),
+                      max_row_offsets.begin(),
+                      [] __device__(auto ancestor) { return ancestor != -1; });
+  }
+
+  return std::tuple{csr{std::move(rowidx), std::move(colidx)},
+                    column_tree_properties{std::move(num_levels),
+                                           std::move(column_categories),
+                                           std::move(max_row_offsets),
+                                           std::move(mapped_col_ids)}};
+}
+
+}  // namespace experimental::detail
+
+namespace detail {
+/**
+ * @brief Reduces node tree representation to column tree representation.
+ *
+ * @param tree Node tree representation of JSON string
+ * @param original_col_ids Column ids of nodes
+ * @param sorted_col_ids Sorted column ids of nodes
+ * @param ordered_node_ids Node ids of nodes sorted by column ids
+ * @param row_offsets Row offsets of nodes
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A tuple of column tree representation of JSON string, column ids of columns, and
+ * max row offsets of columns
+ */
+std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
+reduce_to_column_tree(tree_meta_t& tree,
+                      device_span<NodeIndexT> original_col_ids,
+                      device_span<NodeIndexT> sorted_col_ids,
+                      device_span<NodeIndexT> ordered_node_ids,
+                      device_span<size_type> row_offsets,
+                      bool is_array_of_arrays,
+                      NodeIndexT const row_array_parent_col_id,
+                      rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  // 1. column count for allocation
+  auto const num_columns =
+    thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end());
+
+  // 2. reduce_by_key {col_id}, {row_offset}, max.
+  rmm::device_uvector<NodeIndexT> unique_col_ids(num_columns, stream);
+  rmm::device_uvector<size_type> max_row_offsets(num_columns, stream);
+  auto ordered_row_offsets =
+    thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin());
+  thrust::reduce_by_key(rmm::exec_policy(stream),
+                        sorted_col_ids.begin(),
+                        sorted_col_ids.end(),
+                        ordered_row_offsets,
+                        unique_col_ids.begin(),
+                        max_row_offsets.begin(),
+                        thrust::equal_to<size_type>(),
+                        thrust::maximum<size_type>());
+
+  // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E)
+  rmm::device_uvector<NodeT> column_categories(num_columns, stream);
+  thrust::reduce_by_key(
+    rmm::exec_policy(stream),
+    sorted_col_ids.begin(),
+    sorted_col_ids.end(),
+    thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()),
+    unique_col_ids.begin(),
+    column_categories.begin(),
+    thrust::equal_to<size_type>(),
+    [] __device__(NodeT type_a, NodeT type_b) -> NodeT {
+      auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR);
+      auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR);
+      // (v+v=v, *+*=*,  *+v=*, *+#=E, NESTED+VAL=NESTED)
+      // *+*=*, v+v=v
+      if (type_a == type_b) {
+        return type_a;
+      } else if (is_a_leaf) {
+        // *+v=*, N+V=N
+        // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR
+        return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b);
+      } else if (is_b_leaf) {
+        return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a);
+      }
+      // *+#=E
+      return NC_ERR;
+    });
+
+  // 4. unique_copy parent_node_ids, ranges
+  rmm::device_uvector<TreeDepthT> column_levels(0, stream);  // not required
+  rmm::device_uvector<NodeIndexT> parent_col_ids(num_columns, stream);
+  rmm::device_uvector<SymbolOffsetT> col_range_begin(num_columns, stream);  // Field names
+  rmm::device_uvector<SymbolOffsetT> col_range_end(num_columns, stream);
+  rmm::device_uvector<size_type> unique_node_ids(num_columns, stream);
+  thrust::unique_by_key_copy(rmm::exec_policy(stream),
+                             sorted_col_ids.begin(),
+                             sorted_col_ids.end(),
+                             ordered_node_ids.begin(),
+                             thrust::make_discard_iterator(),
+                             unique_node_ids.begin());
+  thrust::copy_n(
+    rmm::exec_policy(stream),
+    thrust::make_zip_iterator(
+      thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()),
+      thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()),
+      thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())),
+    unique_node_ids.size(),
+    thrust::make_zip_iterator(
+      parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin()));
+
+  // convert parent_node_ids to parent_col_ids
+  thrust::transform(
+    rmm::exec_policy(stream),
+    parent_col_ids.begin(),
+    parent_col_ids.end(),
+    parent_col_ids.begin(),
+    [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type {
+      return parent_node_id == parent_node_sentinel ? parent_node_sentinel
+                                                    : col_ids[parent_node_id];
+    });
+
+  // condition is true if parent is not a list, or sentinel/root
+  // Special case to return true if parent is a list and is_array_of_arrays is true
+  auto is_non_list_parent = [column_categories = column_categories.begin(),
+                             is_array_of_arrays,
+                             row_array_parent_col_id] __device__(auto parent_col_id) -> bool {
+    return !(parent_col_id == parent_node_sentinel ||
+             column_categories[parent_col_id] == NC_LIST &&
+               (!is_array_of_arrays || parent_col_id != row_array_parent_col_id));
+    return (parent_col_id != parent_node_sentinel) &&
+             (column_categories[parent_col_id] != NC_LIST) ||
+           (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id);
+  };
+
+  // Mixed types in List children go to different columns,
+  // so all immediate children of list column should have same max_row_offsets.
+  //   create list's children max_row_offsets array. (initialize to zero)
+  //   atomicMax on  children max_row_offsets array.
+  //   gather the max_row_offsets from children row offset array.
+  {
+    rmm::device_uvector<NodeIndexT> list_parents_children_max_row_offsets(num_columns, stream);
+    thrust::fill(rmm::exec_policy(stream),
+                 list_parents_children_max_row_offsets.begin(),
+                 list_parents_children_max_row_offsets.end(),
+                 0);
+    thrust::for_each(rmm::exec_policy(stream),
+                     unique_col_ids.begin(),
+                     unique_col_ids.end(),
+                     [column_categories = column_categories.begin(),
+                      parent_col_ids    = parent_col_ids.begin(),
+                      max_row_offsets   = max_row_offsets.begin(),
+                      list_parents_children_max_row_offsets =
+                        list_parents_children_max_row_offsets.begin()] __device__(auto col_id) {
+                       auto parent_col_id = parent_col_ids[col_id];
+                       if (parent_col_id != parent_node_sentinel and
+                           column_categories[parent_col_id] == node_t::NC_LIST) {
+                         cuda::atomic_ref<NodeIndexT, cuda::thread_scope_device> ref{
+                           *(list_parents_children_max_row_offsets + parent_col_id)};
+                         ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed);
+                       }
+                     });
+    thrust::gather_if(
+      rmm::exec_policy(stream),
+      parent_col_ids.begin(),
+      parent_col_ids.end(),
+      parent_col_ids.begin(),
+      list_parents_children_max_row_offsets.begin(),
+      max_row_offsets.begin(),
+      [column_categories = column_categories.begin()] __device__(size_type parent_col_id) {
+        return parent_col_id != parent_node_sentinel and
+               column_categories[parent_col_id] == node_t::NC_LIST;
+      });
+  }
+
+  // copy lists' max_row_offsets to children.
+  // all structs should have same size.
+  thrust::transform_if(
+    rmm::exec_policy(stream),
+    unique_col_ids.begin(),
+    unique_col_ids.end(),
+    max_row_offsets.begin(),
+    [column_categories = column_categories.begin(),
+     is_non_list_parent,
+     parent_col_ids  = parent_col_ids.begin(),
+     max_row_offsets = max_row_offsets.begin()] __device__(size_type col_id) {
+      auto parent_col_id = parent_col_ids[col_id];
+      // condition is true if parent is not a list, or sentinel/root
+      while (is_non_list_parent(parent_col_id)) {
+        col_id        = parent_col_id;
+        parent_col_id = parent_col_ids[parent_col_id];
+      }
+      return max_row_offsets[col_id];
+    },
+    [column_categories = column_categories.begin(),
+     is_non_list_parent,
+     parent_col_ids = parent_col_ids.begin()] __device__(size_type col_id) {
+      auto parent_col_id = parent_col_ids[col_id];
+      // condition is true if parent is not a list, or sentinel/root
+      return is_non_list_parent(parent_col_id);
+    });
+
+  // For Struct and List (to avoid copying entire strings when mixed type as string is enabled)
+  thrust::transform_if(
+    rmm::exec_policy(stream),
+    col_range_begin.begin(),
+    col_range_begin.end(),
+    column_categories.begin(),
+    col_range_end.begin(),
+    [] __device__(auto i) { return i + 1; },
+    [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; });
+
+  return std::tuple{tree_meta_t{std::move(column_categories),
+                                std::move(parent_col_ids),
+                                std::move(column_levels),
+                                std::move(col_range_begin),
+                                std::move(col_range_end)},
+                    std::move(unique_col_ids),
+                    std::move(max_row_offsets)};
+}
+
+}  // namespace detail
+}  // namespace cudf::io::json
diff --git a/cpp/src/io/json/device_column_tree_extraction.cu b/cpp/src/io/json/device_column_tree_extraction.cu
new file mode 100644
index 00000000000..b15d327233c
--- /dev/null
+++ b/cpp/src/io/json/device_column_tree_extraction.cu
@@ -0,0 +1,1046 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "io/utilities/parsing_utils.cuh"
+#include "io/utilities/string_parsing.hpp"
+#include "json_utils.hpp"
+#include "nested_json.hpp"
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/detail/utilities/visitor_overload.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cub/device/device_segmented_reduce.cuh>
+#include <cuda/atomic>
+#include <cuda/functional>
+#include <thrust/adjacent_difference.h>
+#include <thrust/binary_search.h>
+#include <thrust/count.h>
+#include <thrust/for_each.h>
+#include <thrust/functional.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/logical.h>
+#include <thrust/partition.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/transform_scan.h>
+#include <thrust/unique.h>
+
+namespace cudf::io::json {
+
+namespace experimental::detail {
+
+using row_offset_t = size_type;
+
+rmm::device_uvector<row_offset_t> extract_device_column_subtree(
+  const csr& adjacency,
+  const column_tree_properties& props,
+  cudf::io::json_reader_options reader_options,
+  rmm::cuda_stream_view stream)
+{
+  // What are the cases in which estimation works?
+  CUDF_EXPECTS(reader_options.is_enabled_mixed_types_as_string() == false,
+               "mixed type as string has not yet been implemented");
+  CUDF_EXPECTS(reader_options.is_enabled_prune_columns() == false,
+               "column pruning has not yet been implemented");
+
+  auto& rowidx          = adjacency.rowidx;
+  auto& colidx          = adjacency.colidx;
+  auto& categories      = props.categories;
+  auto& max_row_offsets = props.max_row_offsets;
+  auto& num_levels      = props.num_levels;
+
+  // Traversing the column tree and annotating the device column subtree
+  auto num_columns = rowidx.size() - 1;
+  rmm::device_uvector<row_offset_t> subtree_nrows(max_row_offsets, stream);
+
+  // 1. removing NC_ERR nodes and their descendants i.e.
+  // removing the entire subtree rooted at the nodes with category NC_ERR
+  {
+    rmm::device_uvector<NodeIndexT> err_ancestors(num_columns, stream);
+    thrust::for_each_n(
+      rmm::exec_policy_nosync(stream),
+      thrust::make_counting_iterator(0),
+      num_columns,
+      [rowidx         = rowidx.begin(),
+       colidx         = colidx.begin(),
+       num_levels_ptr = num_levels.data(),
+       categories     = categories.begin(),
+       err_ancestors  = err_ancestors.begin()] __device__(NodeIndexT node) {
+        auto num_levels     = *num_levels_ptr;
+        err_ancestors[node] = node;
+        for (int level = 0; level <= num_levels; level++) {
+          if (err_ancestors[node] == -1 || categories[err_ancestors[node]] == NC_ERR) break;
+          if (err_ancestors[node] > 0)
+            err_ancestors[node] = colidx[rowidx[err_ancestors[node]]];
+          else
+            err_ancestors[node] = -1;
+        }
+      });
+    thrust::gather_if(rmm::exec_policy_nosync(stream),
+                      err_ancestors.begin(),
+                      err_ancestors.end(),
+                      err_ancestors.begin(),
+                      thrust::make_constant_iterator(0),
+                      subtree_nrows.begin(),
+                      [] __device__(auto ancestor) { return ancestor != -1; });
+  }
+
+  // 2. Let's do some validation of the column tree based on its properties.
+  // We will be using these properties to filter nodes later on.
+  // ===========================================================================
+  // (i) Every node v is of type string, val, field name, list or struct.
+  // (ii) String and val cannot have any children i.e. they can only be leaf nodes
+  // (iii) If v is a field name, it can have struct, list, string and val as children.
+  // (iv) If v is a struct, it can have a field name as child
+  // (v) If v is a list, it can have string, val, list or struct as child
+  // (vi) There can only be at most one string and one val child for a given node, but many struct,
+  //      list and field name children.
+  // (vii) When mixed type support is disabled -
+  //       (a) A mix of lists and structs in the same column is not supported i.e a field name and
+  //       list node cannot have both list and struct as children
+  //       (b) If there is a mix of str/val
+  //       and list/struct in the same column, then str/val is discarded
+
+  // Validation of (vii)(a)
+  {
+    if (!reader_options.is_enabled_mixed_types_as_string()) {
+      auto num_field_and_list_nodes =
+        thrust::count_if(rmm::exec_policy_nosync(stream),
+                         categories.begin(),
+                         categories.end(),
+                         [] __device__(auto const ctg) { return ctg == NC_FN || ctg == NC_LIST; });
+      rmm::device_uvector<NodeIndexT> field_and_list_nodes(num_field_and_list_nodes, stream);
+      thrust::partition_copy(rmm::exec_policy_nosync(stream),
+                             thrust::make_counting_iterator(0),
+                             thrust::make_counting_iterator(0) + num_columns,
+                             field_and_list_nodes.begin(),
+                             thrust::make_discard_iterator(),
+                             [categories = categories.begin()] __device__(NodeIndexT node) {
+                               return categories[node] == NC_LIST || categories[node] == NC_FN;
+                             });
+      bool is_valid_tree = thrust::all_of(
+        rmm::exec_policy_nosync(stream),
+        field_and_list_nodes.begin(),
+        field_and_list_nodes.end(),
+        [rowidx     = rowidx.begin(),
+         colidx     = colidx.begin(),
+         categories = categories.begin()] __device__(NodeIndexT node) {
+          NodeIndexT first_child_pos = rowidx[node] + 1;
+          NodeIndexT last_child_pos  = rowidx[node + 1] - 1;
+          bool has_struct_child      = false;
+          bool has_list_child        = false;
+          for (NodeIndexT child_pos = first_child_pos; child_pos <= last_child_pos; child_pos++) {
+            if (categories[colidx[child_pos]] == NC_STRUCT) has_struct_child = true;
+            if (categories[colidx[child_pos]] == NC_LIST) has_list_child = true;
+          }
+          return !has_struct_child && !has_list_child;
+        });
+
+      CUDF_EXPECTS(is_valid_tree,
+                   "Property 7a is not satisfied i.e. mix of LIST and STRUCT in same column is not "
+                   "supported when mixed type support is disabled");
+    }
+  }
+
+  // Validation of (vii)(b) i.e. ignore_vals in previous implementation
+  // We need to identify leaf nodes that have non-leaf sibling nodes
+  // i.e. we need to ignore leaf nodes at level above the last level
+  // idea: leaf nodes have adjacency 1. So if there is an adjacency 1 inbetween non-one
+  // adjacencies, then found the leaf node. Corner case: consider the last set of consecutive
+  // ones. If the leftmost of those ones (say node u) has a non-leaf sibling
+  // (can be found by looking at the adjacencies of the siblings
+  // (which are in turn found from the colidx of the parent u), then this leaf node should be
+  // ignored, otherwise all good.
+  {
+    if (!reader_options.is_enabled_mixed_types_as_string()) {
+      // TODO: use cub segmented reduce here!
+      rmm::device_uvector<NodeIndexT> num_adjacent_nodes(
+        num_columns + 1,
+        stream);  // since adjacent_difference requires that the output have the same length as
+                  // input
+      thrust::adjacent_difference(
+        rmm::exec_policy_nosync(stream), rowidx.begin(), rowidx.end(), num_adjacent_nodes.begin());
+      auto num_leaf_nodes = thrust::count_if(rmm::exec_policy_nosync(stream),
+                                             num_adjacent_nodes.begin() + 1,
+                                             num_adjacent_nodes.end(),
+                                             [] __device__(auto const adj) { return adj == 1; });
+      rmm::device_uvector<NodeIndexT> leaf_nodes(num_leaf_nodes, stream);
+      thrust::copy_if(rmm::exec_policy_nosync(stream),
+                      thrust::make_counting_iterator(0),
+                      thrust::make_counting_iterator(0) + num_columns,
+                      leaf_nodes.begin(),
+                      [num_adjacent_nodes = num_adjacent_nodes.begin()] __device__(size_t node) {
+                        return num_adjacent_nodes[node] == 1;
+                      });
+
+      auto rev_node_it =
+        thrust::make_reverse_iterator(thrust::make_counting_iterator(0) + num_columns);
+      auto rev_leaf_nodes_it = thrust::make_reverse_iterator(leaf_nodes.begin());
+      // the node number that could be the leftmost leaf node is given by u =
+      // *(is_leftmost_leaf.second
+      // - 1)
+      auto is_leftmost_leaf = thrust::mismatch(
+        rmm::exec_policy_nosync(stream), rev_node_it, rev_node_it + num_columns, rev_leaf_nodes_it);
+      NodeIndexT leftmost_leaf_node = leaf_nodes.element(
+        num_leaf_nodes - thrust::distance(rev_leaf_nodes_it, is_leftmost_leaf.second - 1) - 1,
+        stream);
+
+      // upper_bound search for u in rowidx for parent node v. Now check if any of the other child
+      // nodes of v is non-leaf i.e check if u is the first child of v. If yes, then
+      // leafmost_leaf_node is the leftmost leaf node. Otherwise, discard all children of v after
+      // and including u
+      auto parent_it = thrust::upper_bound(
+        rmm::exec_policy_nosync(stream), rowidx.begin(), rowidx.end(), leftmost_leaf_node);
+      NodeIndexT parent           = thrust::distance(rowidx.begin(), parent_it - 1);
+      NodeIndexT parent_adj_start = rowidx.element(parent, stream);
+      NodeIndexT parent_adj_end   = rowidx.element(parent + 1, stream);
+      auto childnum_it            = thrust::lower_bound(rmm::exec_policy_nosync(stream),
+                                             colidx.begin() + parent_adj_start,
+                                             colidx.begin() + parent_adj_end,
+                                             leftmost_leaf_node);
+
+      auto retained_leaf_nodes_it =
+        leaf_nodes.begin() + num_leaf_nodes -
+        thrust::distance(rev_leaf_nodes_it, is_leftmost_leaf.second - 1) - 1;
+      if (childnum_it != colidx.begin() + parent_adj_start + 1) {
+        // discarding from u to last child of parent
+        retained_leaf_nodes_it += thrust::distance(childnum_it, colidx.begin() + parent_adj_end);
+      }
+      // now, all nodes from leaf_nodes.begin() to retained_leaf_nodes_it need to be discarded i.e.
+      // they are part of ignore_vals
+      thrust::scatter(rmm::exec_policy_nosync(stream),
+                      thrust::make_constant_iterator(0),
+                      thrust::make_constant_iterator(0) +
+                        thrust::distance(leaf_nodes.begin(), retained_leaf_nodes_it),
+                      leaf_nodes.begin(),
+                      subtree_nrows.begin());
+    }
+  }
+
+  // (Optional?) TODO: Validation of the remaining column tree properties
+
+  return subtree_nrows;
+}
+
+device_column_subtree_properties allocate_device_column_subtree_properties(
+  device_span<row_offset_t> subtree_nrows,
+  const column_tree_properties& props,
+  rmm::cuda_stream_view stream)
+{
+  auto num_columns      = subtree_nrows.size();
+  auto& categories      = props.categories;
+  auto& max_row_offsets = props.max_row_offsets;
+
+  auto num_subtree_nodes = thrust::count_if(rmm::exec_policy_nosync(stream),
+                                            subtree_nrows.begin(),
+                                            subtree_nrows.end(),
+                                            [] __device__(auto mro) { return mro != 0; });
+  // For the subtree, we allocate memory for device column subtree properties
+  rmm::device_uvector<NodeIndexT> subtree_properties_map(num_subtree_nodes, stream);
+  thrust::copy_if(rmm::exec_policy_nosync(stream),
+                  thrust::make_counting_iterator(0),
+                  thrust::make_counting_iterator(0) + num_columns,
+                  subtree_nrows.begin(),
+                  subtree_properties_map.begin(),
+                  [] __device__(auto mro) { return mro != 0; });
+  // TODO: three way partitioning in cub::If
+  auto str_partitioning_idx_it =
+    thrust::partition(rmm::exec_policy(stream),
+                      subtree_properties_map.begin(),
+                      subtree_properties_map.end(),
+                      [categories = categories.begin()] __device__(NodeIndexT node) {
+                        return categories[node] == NC_STR || categories[node] == NC_VAL;
+                      });
+  auto str_val_end = thrust::distance(subtree_properties_map.begin(), str_partitioning_idx_it);
+  auto max_row_offsets_it =
+    thrust::make_permutation_iterator(max_row_offsets.begin(), subtree_properties_map.begin());
+  size_type string_offsets_size =
+    thrust::reduce(rmm::exec_policy(stream), max_row_offsets_it, max_row_offsets_it + str_val_end) +
+    str_val_end;
+  rmm::device_uvector<SymbolOffsetT> string_offsets(string_offsets_size, stream);
+  rmm::device_uvector<SymbolOffsetT> string_lengths(string_offsets_size, stream);
+
+  auto list_partitioning_idx_it =
+    thrust::partition(rmm::exec_policy(stream),
+                      str_partitioning_idx_it,
+                      subtree_properties_map.end(),
+                      [categories = categories.begin()] __device__(NodeIndexT node) {
+                        return categories[node] == NC_LIST;
+                      });
+  auto list_end = thrust::distance(subtree_properties_map.begin(), list_partitioning_idx_it);
+  max_row_offsets_it =
+    thrust::make_permutation_iterator(max_row_offsets.begin(), subtree_properties_map.begin()) +
+    str_val_end;
+  size_type child_offsets_size =
+    thrust::reduce(
+      rmm::exec_policy(stream), max_row_offsets_it, max_row_offsets_it + (list_end - str_val_end)) +
+    2 * (list_end - str_val_end);
+  rmm::device_uvector<SymbolOffsetT> child_offsets(child_offsets_size, stream);
+
+  auto validity_buffer_size =
+    thrust::reduce(rmm::exec_policy(stream), subtree_nrows.begin(), subtree_nrows.end());
+  auto validity = cudf::detail::create_null_mask(validity_buffer_size,
+                                                 cudf::mask_state::ALL_NULL,
+                                                 stream,
+                                                 rmm::mr::get_current_device_resource());
+
+  return device_column_subtree_properties{std::move(string_offsets),
+                                          std::move(string_lengths),
+                                          std::move(child_offsets),
+                                          std::move(validity)};
+}
+
+void initialize_device_column_subtree_properties(device_column_subtree_properties& d_props,
+                                                 device_span<row_offset_t> subtree_nrows,
+                                                 tree_meta_t& tree,
+                                                 device_span<NodeIndexT> original_col_ids,
+                                                 device_span<row_offset_t> row_offsets,
+                                                 column_tree_properties& c_props,
+                                                 rmm::cuda_stream_view stream)
+{
+  auto num_nodes   = tree.node_levels.size();
+  auto num_columns = c_props.categories.size();
+  // now we actually do the annotation
+  // relabel original_col_ids with the positions of the csr_unique_col_ids with same element. How do
+  // we accomplish this? one idea is to sort the row offsets by node level. Just the way we did this
+  // for the csr_column_ids sort original_col_ids, extract subtree based on the annotation above,
+  // and then initialize.
+  auto [sorted_node_levels, sorted_node_levels_order] =
+    cudf::io::json::detail::stable_sorted_key_order<size_t, TreeDepthT>(tree.node_levels, stream);
+  auto row_offsets_it =
+    thrust::make_permutation_iterator(row_offsets.begin(), sorted_node_levels_order.begin());
+  auto node_range_begin_it   = thrust::make_permutation_iterator(tree.node_range_begin.begin(),
+                                                               sorted_node_levels_order.begin());
+  auto node_range_end_it     = thrust::make_permutation_iterator(tree.node_range_end.begin(),
+                                                             sorted_node_levels_order.begin());
+  auto node_range_lengths_it = thrust::make_transform_iterator(
+    thrust::make_zip_iterator(node_range_begin_it, node_range_end_it),
+    cuda::proclaim_return_type<SymbolOffsetT>([] __device__(auto range_it) {
+      return thrust::get<1>(range_it) - thrust::get<0>(range_it);
+    }));
+
+  auto node_col_ids_it =
+    thrust::make_permutation_iterator(original_col_ids.begin(), sorted_node_levels_order.begin());
+  auto node_categories_it = thrust::make_permutation_iterator(tree.node_categories.begin(),
+                                                              sorted_node_levels_order.begin());
+
+  rmm::device_uvector<row_offset_t> sorted_subtree_nrows(num_columns, stream);
+  thrust::copy(rmm::exec_policy_nosync(stream),
+               subtree_nrows.begin(),
+               subtree_nrows.end(),
+               sorted_subtree_nrows.begin());
+  thrust::sort_by_key(rmm::exec_policy_nosync(stream),
+                      c_props.mapped_ids.begin(),
+                      c_props.mapped_ids.end(),
+                      sorted_subtree_nrows.begin());
+
+  thrust::copy_if(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_zip_iterator(node_range_begin_it, node_range_lengths_it),
+    thrust::make_zip_iterator(node_range_begin_it + num_nodes, node_range_lengths_it + num_nodes),
+    thrust::make_counting_iterator(0),
+    thrust::make_zip_iterator(d_props.string_offsets.begin(), d_props.string_lengths.begin()),
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(),
+     node_col_ids_it,
+     node_categories_it] __device__(NodeIndexT node) {
+      return sorted_subtree_nrows[node_col_ids_it[node]] &&
+             (node_categories_it[node] == NC_STR || node_categories_it[node] == NC_VAL);
+    });
+
+  // row_offsets need to be prefix summed across columns for validity initialization
+  // TODO: replace replace_if with a transform input iterator and pass that to inclusive scan
+  thrust::replace_if(
+    rmm::exec_policy_nosync(stream),
+    row_offsets_it,
+    row_offsets_it + num_nodes,
+    thrust::make_counting_iterator(0),
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(), node_col_ids_it] __device__(
+      NodeIndexT node) { return sorted_subtree_nrows[node_col_ids_it[node]] == 0; },
+    0);
+  thrust::inclusive_scan(
+    rmm::exec_policy_nosync(stream), row_offsets_it, row_offsets_it + num_nodes, row_offsets_it);
+  thrust::for_each_n(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_counting_iterator(0),
+    num_nodes,
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(),
+     node_col_ids_it,
+     node_categories_it,
+     row_offsets_it,
+     validity = static_cast<bitmask_type*>(d_props.validity.data())] __device__(NodeIndexT node) {
+      if (sorted_subtree_nrows[node_col_ids_it[node]] && node_categories_it[node] != NC_LIST)
+        cudf::set_bit(validity, row_offsets_it[node]);
+    });
+
+  // scatter list offsets
+}
+
+}  // namespace experimental::detail
+
+namespace detail {
+/**
+ * @brief Checks if all strings in each string column in the tree are nulls.
+ * For non-string columns, it's set as true. If any of rows in a string column is false, it's set as
+ * false.
+ *
+ * @param input Input JSON string device data
+ * @param d_column_tree column tree representation of JSON string
+ * @param tree Node tree representation of the JSON string
+ * @param col_ids Column ids of the nodes in the tree
+ * @param options Parsing options specifying the parsing behaviour
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return Array of bytes where each byte indicate if it is all nulls string column.
+ */
+rmm::device_uvector<uint8_t> is_all_nulls_each_column(device_span<SymbolT const> input,
+                                                      tree_meta_t const& d_column_tree,
+                                                      tree_meta_t const& tree,
+                                                      device_span<NodeIndexT> col_ids,
+                                                      cudf::io::json_reader_options const& options,
+                                                      rmm::cuda_stream_view stream)
+{
+  auto const num_nodes = col_ids.size();
+  auto const num_cols  = d_column_tree.node_categories.size();
+  rmm::device_uvector<uint8_t> is_all_nulls(num_cols, stream);
+  thrust::fill(rmm::exec_policy(stream), is_all_nulls.begin(), is_all_nulls.end(), true);
+
+  auto parse_opt = parsing_options(options, stream);
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::counting_iterator<size_type>(0),
+    num_nodes,
+    [options           = parse_opt.view(),
+     data              = input.data(),
+     column_categories = d_column_tree.node_categories.begin(),
+     col_ids           = col_ids.begin(),
+     range_begin       = tree.node_range_begin.begin(),
+     range_end         = tree.node_range_end.begin(),
+     is_all_nulls      = is_all_nulls.begin()] __device__(size_type i) {
+      auto const node_category = column_categories[col_ids[i]];
+      if (node_category == NC_STR or node_category == NC_VAL) {
+        auto const is_null_literal = serialized_trie_contains(
+          options.trie_na,
+          {data + range_begin[i], static_cast<size_t>(range_end[i] - range_begin[i])});
+        if (!is_null_literal) is_all_nulls[col_ids[i]] = false;
+      }
+    });
+  return is_all_nulls;
+}
+
+/**
+ * @brief Get the column indices for the values column for array of arrays rows
+ *
+ * @param row_array_children_level The level of the row array's children
+ * @param d_tree The tree metadata
+ * @param col_ids The column ids
+ * @param num_columns The number of columns
+ * @param stream The stream to use
+ * @return The value columns' indices
+ */
+rmm::device_uvector<NodeIndexT> get_values_column_indices(TreeDepthT const row_array_children_level,
+                                                          tree_meta_t const& d_tree,
+                                                          device_span<NodeIndexT> col_ids,
+                                                          size_type const num_columns,
+                                                          rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  auto [level2_nodes, level2_indices] = get_array_children_indices(
+    row_array_children_level, d_tree.node_levels, d_tree.parent_node_ids, stream);
+  auto col_id_location = thrust::make_permutation_iterator(col_ids.begin(), level2_nodes.begin());
+  rmm::device_uvector<NodeIndexT> values_column_indices(num_columns, stream);
+  thrust::scatter(rmm::exec_policy(stream),
+                  level2_indices.begin(),
+                  level2_indices.end(),
+                  col_id_location,
+                  values_column_indices.begin());
+  return values_column_indices;
+}
+
+/**
+ * @brief Copies strings specified by pair of begin, end offsets to host vector of strings.
+ *
+ * @param input String device buffer
+ * @param node_range_begin Begin offset of the strings
+ * @param node_range_end End offset of the strings
+ * @param stream CUDA stream
+ * @return Vector of strings
+ */
+std::vector<std::string> copy_strings_to_host_sync(
+  device_span<SymbolT const> input,
+  device_span<SymbolOffsetT const> node_range_begin,
+  device_span<SymbolOffsetT const> node_range_end,
+  rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  auto const num_strings = node_range_begin.size();
+  rmm::device_uvector<size_type> string_offsets(num_strings, stream);
+  rmm::device_uvector<size_type> string_lengths(num_strings, stream);
+  auto d_offset_pairs = thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin());
+  thrust::transform(rmm::exec_policy(stream),
+                    d_offset_pairs,
+                    d_offset_pairs + num_strings,
+                    thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()),
+                    [] __device__(auto const& offsets) {
+                      // Note: first character for non-field columns
+                      return thrust::make_tuple(
+                        static_cast<size_type>(thrust::get<0>(offsets)),
+                        static_cast<size_type>(thrust::get<1>(offsets) - thrust::get<0>(offsets)));
+                    });
+
+  cudf::io::parse_options_view options_view{};
+  options_view.quotechar  = '\0';  // no quotes
+  options_view.keepquotes = true;
+  auto d_offset_length_it =
+    thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin());
+  auto d_column_names = parse_data(input.data(),
+                                   d_offset_length_it,
+                                   num_strings,
+                                   data_type{type_id::STRING},
+                                   rmm::device_buffer{},
+                                   0,
+                                   options_view,
+                                   stream,
+                                   rmm::mr::get_current_device_resource());
+  auto to_host        = [stream](auto const& col) {
+    if (col.is_empty()) return std::vector<std::string>{};
+    auto const scv     = cudf::strings_column_view(col);
+    auto const h_chars = cudf::detail::make_std_vector_async<char>(
+      cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
+    auto const h_offsets = cudf::detail::make_std_vector_async(
+      cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
+                                               scv.size() + 1),
+      stream);
+    stream.synchronize();
+
+    // build std::string vector from chars and offsets
+    std::vector<std::string> host_data;
+    host_data.reserve(col.size());
+    std::transform(
+      std::begin(h_offsets),
+      std::end(h_offsets) - 1,
+      std::begin(h_offsets) + 1,
+      std::back_inserter(host_data),
+      [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); });
+    return host_data;
+  };
+  return to_host(d_column_names->view());
+}
+
+/**
+ * @brief Constructs `d_json_column` from node tree representation
+ * Newly constructed columns are insert into `root`'s children.
+ * `root` must be a list type.
+ *
+ * @param input Input JSON string device data
+ * @param tree Node tree representation of the JSON string
+ * @param col_ids Column ids of the nodes in the tree
+ * @param row_offsets Row offsets of the nodes in the tree
+ * @param root Root node of the `d_json_column` tree
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param options Parsing options specifying the parsing behaviour
+ * options affecting behaviour are
+ *   is_enabled_lines: Whether the input is a line-delimited JSON
+ *   is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the device memory
+ * of child_offets and validity members of `d_json_column`
+ */
+void make_device_json_column(device_span<SymbolT const> input,
+                             tree_meta_t& tree,
+                             device_span<NodeIndexT> col_ids,
+                             device_span<size_type> row_offsets,
+                             device_json_column& root,
+                             bool is_array_of_arrays,
+                             cudf::io::json_reader_options const& options,
+                             rmm::cuda_stream_view stream,
+                             rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+
+  bool const is_enabled_lines                 = options.is_enabled_lines();
+  bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
+  auto const num_nodes                        = col_ids.size();
+  rmm::device_uvector<NodeIndexT> sorted_col_ids(col_ids.size(), stream);  // make a copy
+  thrust::copy(rmm::exec_policy(stream), col_ids.begin(), col_ids.end(), sorted_col_ids.begin());
+
+  // sort by {col_id} on {node_ids} stable
+  rmm::device_uvector<NodeIndexT> node_ids(col_ids.size(), stream);
+  thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end());
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin());
+
+  NodeIndexT const row_array_parent_col_id = [&]() {
+    NodeIndexT value = parent_node_sentinel;
+    if (!col_ids.empty()) {
+      auto const list_node_index = is_enabled_lines ? 0 : 1;
+      CUDF_CUDA_TRY(cudaMemcpyAsync(&value,
+                                    col_ids.data() + list_node_index,
+                                    sizeof(NodeIndexT),
+                                    cudaMemcpyDefault,
+                                    stream.value()));
+      stream.synchronize();
+    }
+    return value;
+  }();
+
+  // 1. gather column information.
+  auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] =
+    reduce_to_column_tree(tree,
+                          col_ids,
+                          sorted_col_ids,
+                          node_ids,
+                          row_offsets,
+                          is_array_of_arrays,
+                          row_array_parent_col_id,
+                          stream);
+  auto num_columns    = d_unique_col_ids.size();
+  auto unique_col_ids = cudf::detail::make_std_vector_async(d_unique_col_ids, stream);
+  auto column_categories =
+    cudf::detail::make_std_vector_async(d_column_tree.node_categories, stream);
+  auto column_parent_ids =
+    cudf::detail::make_std_vector_async(d_column_tree.parent_node_ids, stream);
+  auto column_range_beg =
+    cudf::detail::make_std_vector_async(d_column_tree.node_range_begin, stream);
+  auto max_row_offsets = cudf::detail::make_std_vector_async(d_max_row_offsets, stream);
+  std::vector<std::string> column_names = copy_strings_to_host_sync(
+    input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream);
+  stream.synchronize();
+  // array of arrays column names
+  if (is_array_of_arrays) {
+    TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2;
+    auto values_column_indices =
+      get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream);
+    auto h_values_column_indices =
+      cudf::detail::make_std_vector_async(values_column_indices, stream);
+    stream.synchronize();
+    std::transform(unique_col_ids.begin(),
+                   unique_col_ids.end(),
+                   column_names.begin(),
+                   column_names.begin(),
+                   [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id](
+                     auto col_id, auto name) mutable {
+                     return column_parent_ids[col_id] == row_array_parent_col_id
+                              ? std::to_string(h_values_column_indices[col_id])
+                              : name;
+                   });
+  }
+
+  auto to_json_col_type = [](auto category) {
+    switch (category) {
+      case NC_STRUCT: return json_col_t::StructColumn;
+      case NC_LIST: return json_col_t::ListColumn;
+      case NC_STR: [[fallthrough]];
+      case NC_VAL: return json_col_t::StringColumn;
+      default: return json_col_t::Unknown;
+    }
+  };
+  auto init_to_zero = [stream](auto& v) {
+    thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0);
+  };
+
+  auto initialize_json_columns = [&](auto i, auto& col) {
+    if (column_categories[i] == NC_ERR || column_categories[i] == NC_FN) {
+      return;
+    } else if (column_categories[i] == NC_VAL || column_categories[i] == NC_STR) {
+      col.string_offsets.resize(max_row_offsets[i] + 1, stream);
+      col.string_lengths.resize(max_row_offsets[i] + 1, stream);
+      init_to_zero(col.string_offsets);
+      init_to_zero(col.string_lengths);
+    } else if (column_categories[i] == NC_LIST) {
+      col.child_offsets.resize(max_row_offsets[i] + 2, stream);
+      init_to_zero(col.child_offsets);
+    }
+    col.num_rows = max_row_offsets[i] + 1;
+    col.validity =
+      cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr);
+    col.type = to_json_col_type(column_categories[i]);
+  };
+
+  auto reinitialize_as_string = [&](auto i, auto& col) {
+    col.string_offsets.resize(max_row_offsets[i] + 1, stream);
+    col.string_lengths.resize(max_row_offsets[i] + 1, stream);
+    init_to_zero(col.string_offsets);
+    init_to_zero(col.string_lengths);
+    col.num_rows = max_row_offsets[i] + 1;
+    col.validity =
+      cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr);
+    col.type = json_col_t::StringColumn;
+    // destroy references of all child columns after this step, by calling remove_child_columns
+  };
+
+  path_from_tree tree_path{column_categories,
+                           column_parent_ids,
+                           column_names,
+                           is_array_of_arrays,
+                           row_array_parent_col_id};
+
+  // 2. generate nested columns tree and its device_memory
+  // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order.
+  auto h_range_col_id_it =
+    thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin());
+  std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) {
+    return thrust::get<0>(a) < thrust::get<0>(b);
+  });
+
+  std::vector<uint8_t> is_str_column_all_nulls{};
+  if (is_enabled_mixed_types_as_string) {
+    is_str_column_all_nulls = cudf::detail::make_std_vector_sync(
+      is_all_nulls_each_column(input, d_column_tree, tree, col_ids, options, stream), stream);
+  }
+
+  // use hash map because we may skip field name's col_ids
+  std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>> columns;
+  // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking
+  std::map<std::pair<NodeIndexT, std::string>, NodeIndexT> mapped_columns;
+  // find column_ids which are values, but should be ignored in validity
+  auto ignore_vals = cudf::detail::make_host_vector<uint8_t>(num_columns, stream);
+  std::vector<uint8_t> is_mixed_type_column(num_columns, 0);
+  std::vector<uint8_t> is_pruned(num_columns, 0);
+  columns.try_emplace(parent_node_sentinel, std::ref(root));
+
+  std::function<void(NodeIndexT, device_json_column&)> remove_child_columns =
+    [&](NodeIndexT this_col_id, device_json_column& col) {
+      for (auto col_name : col.column_order) {
+        auto child_id                  = mapped_columns[{this_col_id, col_name}];
+        is_mixed_type_column[child_id] = 1;
+        remove_child_columns(child_id, col.child_columns.at(col_name));
+        mapped_columns.erase({this_col_id, col_name});
+        columns.erase(child_id);
+      }
+      col.child_columns.clear();  // their references are deleted above.
+      col.column_order.clear();
+    };
+
+  auto name_and_parent_index = [&is_array_of_arrays,
+                                &row_array_parent_col_id,
+                                &column_parent_ids,
+                                &column_categories,
+                                &column_names](auto this_col_id) {
+    std::string name   = "";
+    auto parent_col_id = column_parent_ids[this_col_id];
+    if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) {
+      if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) {
+        name = column_names[this_col_id];
+      } else {
+        name = list_child_name;
+      }
+    } else if (column_categories[parent_col_id] == NC_FN) {
+      auto field_name_col_id = parent_col_id;
+      parent_col_id          = column_parent_ids[parent_col_id];
+      name                   = column_names[field_name_col_id];
+    } else {
+      CUDF_FAIL("Unexpected parent column category");
+    }
+    return std::pair{name, parent_col_id};
+  };
+
+  // Prune columns that are not required to be parsed.
+  if (options.is_enabled_prune_columns()) {
+    for (auto const this_col_id : unique_col_ids) {
+      if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) {
+        continue;
+      }
+      // Struct, List, String, Value
+      auto [name, parent_col_id] = name_and_parent_index(this_col_id);
+      // get path of this column, and get its dtype if present in options
+      auto const nt                             = tree_path.get_path(this_col_id);
+      std::optional<data_type> const user_dtype = get_path_data_type(nt, options);
+      if (!user_dtype.has_value() and parent_col_id != parent_node_sentinel) {
+        is_pruned[this_col_id] = 1;
+        continue;
+      } else {
+        // make sure all its parents are not pruned.
+        while (parent_col_id != parent_node_sentinel and is_pruned[parent_col_id] == 1) {
+          is_pruned[parent_col_id] = 0;
+          parent_col_id            = column_parent_ids[parent_col_id];
+        }
+      }
+    }
+  }
+
+  // Build the column tree, also, handles mixed types.
+  for (auto const this_col_id : unique_col_ids) {
+    if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) {
+      continue;
+    }
+    // Struct, List, String, Value
+    auto [name, parent_col_id] = name_and_parent_index(this_col_id);
+
+    // if parent is mixed type column or this column is pruned, ignore this column.
+    if (parent_col_id != parent_node_sentinel &&
+        (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id])) {
+      ignore_vals[this_col_id] = 1;
+      if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; }
+      continue;
+    }
+
+    // If the child is already found,
+    // replace if this column is a nested column and the existing was a value column
+    // ignore this column if this column is a value column and the existing was a nested column
+    auto it = columns.find(parent_col_id);
+    CUDF_EXPECTS(it != columns.end(), "Parent column not found");
+    auto& parent_col = it->second.get();
+    bool replaced    = false;
+    if (mapped_columns.count({parent_col_id, name}) > 0) {
+      auto const old_col_id = mapped_columns[{parent_col_id, name}];
+      // If mixed type as string is enabled, make both of them strings and merge them.
+      // All child columns will be ignored when parsing.
+      if (is_enabled_mixed_types_as_string) {
+        bool const is_mixed_type = [&]() {
+          // If new or old is STR and they are all not null, make it mixed type, else ignore.
+          if (column_categories[this_col_id] == NC_VAL ||
+              column_categories[this_col_id] == NC_STR) {
+            if (is_str_column_all_nulls[this_col_id]) return false;
+          }
+          if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) {
+            if (is_str_column_all_nulls[old_col_id]) return false;
+          }
+          return true;
+        }();
+        if (is_mixed_type) {
+          is_mixed_type_column[this_col_id] = 1;
+          is_mixed_type_column[old_col_id]  = 1;
+          // if old col type (not cat) is list or struct, replace with string.
+          auto& col = columns.at(old_col_id).get();
+          if (col.type == json_col_t::ListColumn or col.type == json_col_t::StructColumn) {
+            reinitialize_as_string(old_col_id, col);
+            remove_child_columns(old_col_id, col);
+            // all its children (which are already inserted) are ignored later.
+          }
+          col.forced_as_string_column = true;
+          columns.try_emplace(this_col_id, columns.at(old_col_id));
+          continue;
+        }
+      }
+
+      if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) {
+        ignore_vals[this_col_id] = 1;
+        continue;
+      }
+      if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) {
+        // remap
+        ignore_vals[old_col_id] = 1;
+        mapped_columns.erase({parent_col_id, name});
+        columns.erase(old_col_id);
+        parent_col.child_columns.erase(name);
+        replaced = true;  // to skip duplicate name in column_order
+      } else {
+        // If this is a nested column but we're trying to insert either (a) a list node into a
+        // struct column or (b) a struct node into a list column, we fail
+        CUDF_EXPECTS(not((column_categories[old_col_id] == NC_LIST and
+                          column_categories[this_col_id] == NC_STRUCT) or
+                         (column_categories[old_col_id] == NC_STRUCT and
+                          column_categories[this_col_id] == NC_LIST)),
+                     "A mix of lists and structs within the same column is not supported");
+      }
+    }
+
+    if (is_enabled_mixed_types_as_string) {
+      // get path of this column, check if it is a struct forced as string, and enforce it
+      auto const nt                             = tree_path.get_path(this_col_id);
+      std::optional<data_type> const user_dtype = get_path_data_type(nt, options);
+      if (column_categories[this_col_id] == NC_STRUCT and user_dtype.has_value() and
+          user_dtype.value().id() == type_id::STRING) {
+        is_mixed_type_column[this_col_id] = 1;
+        column_categories[this_col_id]    = NC_STR;
+      }
+    }
+
+    CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name);
+    // move into parent
+    device_json_column col(stream, mr);
+    initialize_json_columns(this_col_id, col);
+    auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second;
+    CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent");
+    if (not replaced) parent_col.column_order.push_back(name);
+    columns.try_emplace(this_col_id, std::ref(parent_col.child_columns.at(name)));
+    mapped_columns.try_emplace(std::make_pair(parent_col_id, name), this_col_id);
+  }
+
+  if (is_enabled_mixed_types_as_string) {
+    // ignore all children of mixed type columns
+    for (auto const this_col_id : unique_col_ids) {
+      auto parent_col_id = column_parent_ids[this_col_id];
+      if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) {
+        is_mixed_type_column[this_col_id] = 1;
+        ignore_vals[this_col_id]          = 1;
+        columns.erase(this_col_id);
+      }
+      // Convert only mixed type columns as string (so to copy), but not its children
+      if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 0 and
+          is_mixed_type_column[this_col_id] == 1)
+        column_categories[this_col_id] = NC_STR;
+    }
+    cudaMemcpyAsync(d_column_tree.node_categories.begin(),
+                    column_categories.data(),
+                    column_categories.size() * sizeof(column_categories[0]),
+                    cudaMemcpyDefault,
+                    stream.value());
+  }
+
+  // restore unique_col_ids order
+  std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) {
+    return thrust::get<1>(a) < thrust::get<1>(b);
+  });
+  // move columns data to device.
+  auto columns_data = cudf::detail::make_host_vector<json_column_data>(num_columns, stream);
+  for (auto& [col_id, col_ref] : columns) {
+    if (col_id == parent_node_sentinel) continue;
+    auto& col            = col_ref.get();
+    columns_data[col_id] = json_column_data{col.string_offsets.data(),
+                                            col.string_lengths.data(),
+                                            col.child_offsets.data(),
+                                            static_cast<bitmask_type*>(col.validity.data())};
+  }
+
+  auto d_ignore_vals = cudf::detail::make_device_uvector_async(
+    ignore_vals, stream, rmm::mr::get_current_device_resource());
+  auto d_columns_data = cudf::detail::make_device_uvector_async(
+    columns_data, stream, rmm::mr::get_current_device_resource());
+
+  // 3. scatter string offsets to respective columns, set validity bits
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::counting_iterator<size_type>(0),
+    num_nodes,
+    [column_categories = d_column_tree.node_categories.begin(),
+     col_ids           = col_ids.begin(),
+     row_offsets       = row_offsets.begin(),
+     range_begin       = tree.node_range_begin.begin(),
+     range_end         = tree.node_range_end.begin(),
+     d_ignore_vals     = d_ignore_vals.begin(),
+     d_columns_data    = d_columns_data.begin()] __device__(size_type i) {
+      if (d_ignore_vals[col_ids[i]]) return;
+      auto const node_category = column_categories[col_ids[i]];
+      switch (node_category) {
+        case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break;
+        case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break;
+        case NC_STR: [[fallthrough]];
+        case NC_VAL:
+          if (d_ignore_vals[col_ids[i]]) break;
+          set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]);
+          d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i];
+          d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i];
+          break;
+        default: break;
+      }
+    });
+
+  // 4. scatter List offset
+  // copy_if only node's whose parent is list, (node_id, parent_col_id)
+  // stable_sort by parent_col_id of {node_id}.
+  // For all unique parent_node_id of (i==0, i-1!=i), write start offset.
+  //                                  (i==last, i+1!=i), write end offset.
+  //    unique_copy_by_key {parent_node_id} {row_offset} to
+  //    col[parent_col_id].child_offsets[row_offset[parent_node_id]]
+
+  auto& parent_col_ids = sorted_col_ids;  // reuse sorted_col_ids
+  auto parent_col_id   = thrust::make_transform_iterator(
+    thrust::make_counting_iterator<size_type>(0),
+    cuda::proclaim_return_type<NodeIndexT>(
+      [col_ids         = col_ids.begin(),
+       parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) {
+        return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel
+                                                                  : col_ids[parent_node_ids[node_id]];
+      }));
+  auto const list_children_end = thrust::copy_if(
+    rmm::exec_policy(stream),
+    thrust::make_zip_iterator(thrust::make_counting_iterator<size_type>(0), parent_col_id),
+    thrust::make_zip_iterator(thrust::make_counting_iterator<size_type>(0), parent_col_id) +
+      num_nodes,
+    thrust::make_counting_iterator<size_type>(0),
+    thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()),
+    [d_ignore_vals     = d_ignore_vals.begin(),
+     parent_node_ids   = tree.parent_node_ids.begin(),
+     column_categories = d_column_tree.node_categories.begin(),
+     col_ids           = col_ids.begin()] __device__(size_type node_id) {
+      auto parent_node_id = parent_node_ids[node_id];
+      return parent_node_id != parent_node_sentinel and
+             column_categories[col_ids[parent_node_id]] == NC_LIST and
+             (!d_ignore_vals[col_ids[parent_node_id]]);
+    });
+
+  auto const num_list_children =
+    list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin());
+  thrust::stable_sort_by_key(rmm::exec_policy(stream),
+                             parent_col_ids.begin(),
+                             parent_col_ids.begin() + num_list_children,
+                             node_ids.begin());
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    num_list_children,
+    [node_ids        = node_ids.begin(),
+     parent_node_ids = tree.parent_node_ids.begin(),
+     parent_col_ids  = parent_col_ids.begin(),
+     row_offsets     = row_offsets.begin(),
+     d_columns_data  = d_columns_data.begin(),
+     num_list_children] __device__(size_type i) {
+      auto const node_id        = node_ids[i];
+      auto const parent_node_id = parent_node_ids[node_id];
+      // scatter to list_offset
+      if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) {
+        d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] =
+          row_offsets[node_id];
+      }
+      // last value of list child_offset is its size.
+      if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) {
+        d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] =
+          row_offsets[node_id] + 1;
+      }
+    });
+
+  // 5. scan on offsets.
+  for (auto& [id, col_ref] : columns) {
+    auto& col = col_ref.get();
+    if (col.type == json_col_t::StringColumn) {
+      thrust::inclusive_scan(rmm::exec_policy_nosync(stream),
+                             col.string_offsets.begin(),
+                             col.string_offsets.end(),
+                             col.string_offsets.begin(),
+                             thrust::maximum<json_column::row_offset_t>{});
+    } else if (col.type == json_col_t::ListColumn) {
+      thrust::inclusive_scan(rmm::exec_policy_nosync(stream),
+                             col.child_offsets.begin(),
+                             col.child_offsets.end(),
+                             col.child_offsets.begin(),
+                             thrust::maximum<json_column::row_offset_t>{});
+    }
+  }
+  stream.synchronize();
+}
+
+}  // namespace detail
+}  // namespace cudf::io::json
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index 17fa7abdffe..f1bf43b5e85 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -16,6 +16,7 @@
 
 #include "io/utilities/parsing_utils.cuh"
 #include "io/utilities/string_parsing.hpp"
+#include "json_utils.hpp"
 #include "nested_json.hpp"
 
 #include <cudf/column/column_factories.hpp>
@@ -97,848 +98,6 @@ void print_tree(host_span<SymbolT const> input,
   printf(" (JSON)\n");
 }
 
-/**
- * @brief Reduces node tree representation to column tree representation.
- *
- * @param tree Node tree representation of JSON string
- * @param original_col_ids Column ids of nodes
- * @param sorted_col_ids Sorted column ids of nodes
- * @param ordered_node_ids Node ids of nodes sorted by column ids
- * @param row_offsets Row offsets of nodes
- * @param is_array_of_arrays Whether the tree is an array of arrays
- * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @return A tuple of column tree representation of JSON string, column ids of columns, and
- * max row offsets of columns
- */
-std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
-reduce_to_column_tree(tree_meta_t& tree,
-                      device_span<NodeIndexT> original_col_ids,
-                      device_span<NodeIndexT> sorted_col_ids,
-                      device_span<NodeIndexT> ordered_node_ids,
-                      device_span<size_type> row_offsets,
-                      bool is_array_of_arrays,
-                      NodeIndexT const row_array_parent_col_id,
-                      rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-  // 1. column count for allocation
-  auto const num_columns =
-    thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end());
-
-  // 2. reduce_by_key {col_id}, {row_offset}, max.
-  rmm::device_uvector<NodeIndexT> unique_col_ids(num_columns, stream);
-  rmm::device_uvector<size_type> max_row_offsets(num_columns, stream);
-  auto ordered_row_offsets =
-    thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin());
-  thrust::reduce_by_key(rmm::exec_policy(stream),
-                        sorted_col_ids.begin(),
-                        sorted_col_ids.end(),
-                        ordered_row_offsets,
-                        unique_col_ids.begin(),
-                        max_row_offsets.begin(),
-                        thrust::equal_to<size_type>(),
-                        thrust::maximum<size_type>());
-
-  // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E)
-  rmm::device_uvector<NodeT> column_categories(num_columns, stream);
-  thrust::reduce_by_key(
-    rmm::exec_policy(stream),
-    sorted_col_ids.begin(),
-    sorted_col_ids.end(),
-    thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()),
-    unique_col_ids.begin(),
-    column_categories.begin(),
-    thrust::equal_to<size_type>(),
-    [] __device__(NodeT type_a, NodeT type_b) -> NodeT {
-      auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR);
-      auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR);
-      // (v+v=v, *+*=*,  *+v=*, *+#=E, NESTED+VAL=NESTED)
-      // *+*=*, v+v=v
-      if (type_a == type_b) {
-        return type_a;
-      } else if (is_a_leaf) {
-        // *+v=*, N+V=N
-        // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR
-        return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b);
-      } else if (is_b_leaf) {
-        return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a);
-      }
-      // *+#=E
-      return NC_ERR;
-    });
-
-  // 4. unique_copy parent_node_ids, ranges
-  rmm::device_uvector<TreeDepthT> column_levels(0, stream);  // not required
-  rmm::device_uvector<NodeIndexT> parent_col_ids(num_columns, stream);
-  rmm::device_uvector<SymbolOffsetT> col_range_begin(num_columns, stream);  // Field names
-  rmm::device_uvector<SymbolOffsetT> col_range_end(num_columns, stream);
-  rmm::device_uvector<size_type> unique_node_ids(num_columns, stream);
-  thrust::unique_by_key_copy(rmm::exec_policy(stream),
-                             sorted_col_ids.begin(),
-                             sorted_col_ids.end(),
-                             ordered_node_ids.begin(),
-                             thrust::make_discard_iterator(),
-                             unique_node_ids.begin());
-  thrust::copy_n(
-    rmm::exec_policy(stream),
-    thrust::make_zip_iterator(
-      thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()),
-      thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()),
-      thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())),
-    unique_node_ids.size(),
-    thrust::make_zip_iterator(
-      parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin()));
-
-  // convert parent_node_ids to parent_col_ids
-  thrust::transform(
-    rmm::exec_policy(stream),
-    parent_col_ids.begin(),
-    parent_col_ids.end(),
-    parent_col_ids.begin(),
-    [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type {
-      return parent_node_id == parent_node_sentinel ? parent_node_sentinel
-                                                    : col_ids[parent_node_id];
-    });
-
-  // condition is true if parent is not a list, or sentinel/root
-  // Special case to return true if parent is a list and is_array_of_arrays is true
-  auto is_non_list_parent = [column_categories = column_categories.begin(),
-                             is_array_of_arrays,
-                             row_array_parent_col_id] __device__(auto parent_col_id) -> bool {
-    return !(parent_col_id == parent_node_sentinel ||
-             column_categories[parent_col_id] == NC_LIST &&
-               (!is_array_of_arrays || parent_col_id != row_array_parent_col_id));
-  };
-  // Mixed types in List children go to different columns,
-  // so all immediate children of list column should have same max_row_offsets.
-  //   create list's children max_row_offsets array. (initialize to zero)
-  //   atomicMax on  children max_row_offsets array.
-  //   gather the max_row_offsets from children row offset array.
-  {
-    rmm::device_uvector<NodeIndexT> list_parents_children_max_row_offsets(num_columns, stream);
-    thrust::fill(rmm::exec_policy(stream),
-                 list_parents_children_max_row_offsets.begin(),
-                 list_parents_children_max_row_offsets.end(),
-                 0);
-    thrust::for_each(rmm::exec_policy(stream),
-                     unique_col_ids.begin(),
-                     unique_col_ids.end(),
-                     [column_categories = column_categories.begin(),
-                      parent_col_ids    = parent_col_ids.begin(),
-                      max_row_offsets   = max_row_offsets.begin(),
-                      list_parents_children_max_row_offsets =
-                        list_parents_children_max_row_offsets.begin()] __device__(auto col_id) {
-                       auto parent_col_id = parent_col_ids[col_id];
-                       if (parent_col_id != parent_node_sentinel and
-                           column_categories[parent_col_id] == node_t::NC_LIST) {
-                         cuda::atomic_ref<NodeIndexT, cuda::thread_scope_device> ref{
-                           *(list_parents_children_max_row_offsets + parent_col_id)};
-                         ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed);
-                       }
-                     });
-    thrust::gather_if(
-      rmm::exec_policy(stream),
-      parent_col_ids.begin(),
-      parent_col_ids.end(),
-      parent_col_ids.begin(),
-      list_parents_children_max_row_offsets.begin(),
-      max_row_offsets.begin(),
-      [column_categories = column_categories.begin()] __device__(size_type parent_col_id) {
-        return parent_col_id != parent_node_sentinel and
-               column_categories[parent_col_id] == node_t::NC_LIST;
-      });
-  }
-
-  // copy lists' max_row_offsets to children.
-  // all structs should have same size.
-  thrust::transform_if(
-    rmm::exec_policy(stream),
-    unique_col_ids.begin(),
-    unique_col_ids.end(),
-    max_row_offsets.begin(),
-    [column_categories = column_categories.begin(),
-     is_non_list_parent,
-     parent_col_ids  = parent_col_ids.begin(),
-     max_row_offsets = max_row_offsets.begin()] __device__(size_type col_id) {
-      auto parent_col_id = parent_col_ids[col_id];
-      // condition is true if parent is not a list, or sentinel/root
-      while (is_non_list_parent(parent_col_id)) {
-        col_id        = parent_col_id;
-        parent_col_id = parent_col_ids[parent_col_id];
-      }
-      return max_row_offsets[col_id];
-    },
-    [column_categories = column_categories.begin(),
-     is_non_list_parent,
-     parent_col_ids = parent_col_ids.begin()] __device__(size_type col_id) {
-      auto parent_col_id = parent_col_ids[col_id];
-      // condition is true if parent is not a list, or sentinel/root
-      return is_non_list_parent(parent_col_id);
-    });
-
-  // For Struct and List (to avoid copying entire strings when mixed type as string is enabled)
-  thrust::transform_if(
-    rmm::exec_policy(stream),
-    col_range_begin.begin(),
-    col_range_begin.end(),
-    column_categories.begin(),
-    col_range_end.begin(),
-    [] __device__(auto i) { return i + 1; },
-    [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; });
-
-  return std::tuple{tree_meta_t{std::move(column_categories),
-                                std::move(parent_col_ids),
-                                std::move(column_levels),
-                                std::move(col_range_begin),
-                                std::move(col_range_end)},
-                    std::move(unique_col_ids),
-                    std::move(max_row_offsets)};
-}
-
-/**
- * @brief Get the column indices for the values column for array of arrays rows
- *
- * @param row_array_children_level The level of the row array's children
- * @param d_tree The tree metadata
- * @param col_ids The column ids
- * @param num_columns The number of columns
- * @param stream The stream to use
- * @return The value columns' indices
- */
-rmm::device_uvector<NodeIndexT> get_values_column_indices(TreeDepthT const row_array_children_level,
-                                                          tree_meta_t const& d_tree,
-                                                          device_span<NodeIndexT> col_ids,
-                                                          size_type const num_columns,
-                                                          rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-  auto [level2_nodes, level2_indices] = get_array_children_indices(
-    row_array_children_level, d_tree.node_levels, d_tree.parent_node_ids, stream);
-  auto col_id_location = thrust::make_permutation_iterator(col_ids.begin(), level2_nodes.begin());
-  rmm::device_uvector<NodeIndexT> values_column_indices(num_columns, stream);
-  thrust::scatter(rmm::exec_policy(stream),
-                  level2_indices.begin(),
-                  level2_indices.end(),
-                  col_id_location,
-                  values_column_indices.begin());
-  return values_column_indices;
-}
-
-/**
- * @brief Copies strings specified by pair of begin, end offsets to host vector of strings.
- *
- * @param input String device buffer
- * @param node_range_begin Begin offset of the strings
- * @param node_range_end End offset of the strings
- * @param stream CUDA stream
- * @return Vector of strings
- */
-std::vector<std::string> copy_strings_to_host_sync(
-  device_span<SymbolT const> input,
-  device_span<SymbolOffsetT const> node_range_begin,
-  device_span<SymbolOffsetT const> node_range_end,
-  rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-  auto const num_strings = node_range_begin.size();
-  rmm::device_uvector<size_type> string_offsets(num_strings, stream);
-  rmm::device_uvector<size_type> string_lengths(num_strings, stream);
-  auto d_offset_pairs = thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin());
-  thrust::transform(rmm::exec_policy(stream),
-                    d_offset_pairs,
-                    d_offset_pairs + num_strings,
-                    thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()),
-                    [] __device__(auto const& offsets) {
-                      // Note: first character for non-field columns
-                      return thrust::make_tuple(
-                        static_cast<size_type>(thrust::get<0>(offsets)),
-                        static_cast<size_type>(thrust::get<1>(offsets) - thrust::get<0>(offsets)));
-                    });
-
-  cudf::io::parse_options_view options_view{};
-  options_view.quotechar  = '\0';  // no quotes
-  options_view.keepquotes = true;
-  auto d_offset_length_it =
-    thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin());
-  auto d_column_names = parse_data(input.data(),
-                                   d_offset_length_it,
-                                   num_strings,
-                                   data_type{type_id::STRING},
-                                   rmm::device_buffer{},
-                                   0,
-                                   options_view,
-                                   stream,
-                                   rmm::mr::get_current_device_resource());
-  auto to_host        = [stream](auto const& col) {
-    if (col.is_empty()) return std::vector<std::string>{};
-    auto const scv     = cudf::strings_column_view(col);
-    auto const h_chars = cudf::detail::make_std_vector_async<char>(
-      cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
-    auto const h_offsets = cudf::detail::make_std_vector_async(
-      cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
-                                               scv.size() + 1),
-      stream);
-    stream.synchronize();
-
-    // build std::string vector from chars and offsets
-    std::vector<std::string> host_data;
-    host_data.reserve(col.size());
-    std::transform(
-      std::begin(h_offsets),
-      std::end(h_offsets) - 1,
-      std::begin(h_offsets) + 1,
-      std::back_inserter(host_data),
-      [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); });
-    return host_data;
-  };
-  return to_host(d_column_names->view());
-}
-
-/**
- * @brief Checks if all strings in each string column in the tree are nulls.
- * For non-string columns, it's set as true. If any of rows in a string column is false, it's set as
- * false.
- *
- * @param input Input JSON string device data
- * @param d_column_tree column tree representation of JSON string
- * @param tree Node tree representation of the JSON string
- * @param col_ids Column ids of the nodes in the tree
- * @param options Parsing options specifying the parsing behaviour
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @return Array of bytes where each byte indicate if it is all nulls string column.
- */
-rmm::device_uvector<uint8_t> is_all_nulls_each_column(device_span<SymbolT const> input,
-                                                      tree_meta_t const& d_column_tree,
-                                                      tree_meta_t const& tree,
-                                                      device_span<NodeIndexT> col_ids,
-                                                      cudf::io::json_reader_options const& options,
-                                                      rmm::cuda_stream_view stream)
-{
-  auto const num_nodes = col_ids.size();
-  auto const num_cols  = d_column_tree.node_categories.size();
-  rmm::device_uvector<uint8_t> is_all_nulls(num_cols, stream);
-  thrust::fill(rmm::exec_policy(stream), is_all_nulls.begin(), is_all_nulls.end(), true);
-
-  auto parse_opt = parsing_options(options, stream);
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::counting_iterator<size_type>(0),
-    num_nodes,
-    [options           = parse_opt.view(),
-     data              = input.data(),
-     column_categories = d_column_tree.node_categories.begin(),
-     col_ids           = col_ids.begin(),
-     range_begin       = tree.node_range_begin.begin(),
-     range_end         = tree.node_range_end.begin(),
-     is_all_nulls      = is_all_nulls.begin()] __device__(size_type i) {
-      auto const node_category = column_categories[col_ids[i]];
-      if (node_category == NC_STR or node_category == NC_VAL) {
-        auto const is_null_literal = serialized_trie_contains(
-          options.trie_na,
-          {data + range_begin[i], static_cast<size_t>(range_end[i] - range_begin[i])});
-        if (!is_null_literal) is_all_nulls[col_ids[i]] = false;
-      }
-    });
-  return is_all_nulls;
-}
-
-/**
- * @brief Holds member data pointers of `d_json_column`
- *
- */
-struct json_column_data {
-  using row_offset_t = json_column::row_offset_t;
-  row_offset_t* string_offsets;
-  row_offset_t* string_lengths;
-  row_offset_t* child_offsets;
-  bitmask_type* validity;
-};
-
-/**
- * @brief Constructs `d_json_column` from node tree representation
- * Newly constructed columns are insert into `root`'s children.
- * `root` must be a list type.
- *
- * @param input Input JSON string device data
- * @param tree Node tree representation of the JSON string
- * @param col_ids Column ids of the nodes in the tree
- * @param row_offsets Row offsets of the nodes in the tree
- * @param root Root node of the `d_json_column` tree
- * @param is_array_of_arrays Whether the tree is an array of arrays
- * @param options Parsing options specifying the parsing behaviour
- * options affecting behaviour are
- *   is_enabled_lines: Whether the input is a line-delimited JSON
- *   is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the device memory
- * of child_offets and validity members of `d_json_column`
- */
-void make_device_json_column(device_span<SymbolT const> input,
-                             tree_meta_t& tree,
-                             device_span<NodeIndexT> col_ids,
-                             device_span<size_type> row_offsets,
-                             device_json_column& root,
-                             bool is_array_of_arrays,
-                             cudf::io::json_reader_options const& options,
-                             rmm::cuda_stream_view stream,
-                             rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-
-  bool const is_enabled_lines                 = options.is_enabled_lines();
-  bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
-  auto const num_nodes                        = col_ids.size();
-  rmm::device_uvector<NodeIndexT> sorted_col_ids(col_ids.size(), stream);  // make a copy
-  thrust::copy(rmm::exec_policy(stream), col_ids.begin(), col_ids.end(), sorted_col_ids.begin());
-
-  // sort by {col_id} on {node_ids} stable
-  rmm::device_uvector<NodeIndexT> node_ids(col_ids.size(), stream);
-  thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end());
-  thrust::stable_sort_by_key(
-    rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin());
-
-  NodeIndexT const row_array_parent_col_id = [&]() {
-    NodeIndexT value = parent_node_sentinel;
-    if (!col_ids.empty()) {
-      auto const list_node_index = is_enabled_lines ? 0 : 1;
-      CUDF_CUDA_TRY(cudaMemcpyAsync(&value,
-                                    col_ids.data() + list_node_index,
-                                    sizeof(NodeIndexT),
-                                    cudaMemcpyDefault,
-                                    stream.value()));
-      stream.synchronize();
-    }
-    return value;
-  }();
-
-  // 1. gather column information.
-  auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] =
-    reduce_to_column_tree(tree,
-                          col_ids,
-                          sorted_col_ids,
-                          node_ids,
-                          row_offsets,
-                          is_array_of_arrays,
-                          row_array_parent_col_id,
-                          stream);
-  auto num_columns    = d_unique_col_ids.size();
-  auto unique_col_ids = cudf::detail::make_std_vector_async(d_unique_col_ids, stream);
-  auto column_categories =
-    cudf::detail::make_std_vector_async(d_column_tree.node_categories, stream);
-  auto column_parent_ids =
-    cudf::detail::make_std_vector_async(d_column_tree.parent_node_ids, stream);
-  auto column_range_beg =
-    cudf::detail::make_std_vector_async(d_column_tree.node_range_begin, stream);
-  auto max_row_offsets = cudf::detail::make_std_vector_async(d_max_row_offsets, stream);
-  std::vector<std::string> column_names = copy_strings_to_host_sync(
-    input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream);
-  stream.synchronize();
-  // array of arrays column names
-  if (is_array_of_arrays) {
-    TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2;
-    auto values_column_indices =
-      get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream);
-    auto h_values_column_indices =
-      cudf::detail::make_std_vector_async(values_column_indices, stream);
-    stream.synchronize();
-    std::transform(unique_col_ids.begin(),
-                   unique_col_ids.end(),
-                   column_names.begin(),
-                   column_names.begin(),
-                   [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id](
-                     auto col_id, auto name) mutable {
-                     return column_parent_ids[col_id] == row_array_parent_col_id
-                              ? std::to_string(h_values_column_indices[col_id])
-                              : name;
-                   });
-  }
-
-  auto to_json_col_type = [](auto category) {
-    switch (category) {
-      case NC_STRUCT: return json_col_t::StructColumn;
-      case NC_LIST: return json_col_t::ListColumn;
-      case NC_STR: [[fallthrough]];
-      case NC_VAL: return json_col_t::StringColumn;
-      default: return json_col_t::Unknown;
-    }
-  };
-  auto init_to_zero = [stream](auto& v) {
-    thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0);
-  };
-
-  auto initialize_json_columns = [&](auto i, auto& col) {
-    if (column_categories[i] == NC_ERR || column_categories[i] == NC_FN) {
-      return;
-    } else if (column_categories[i] == NC_VAL || column_categories[i] == NC_STR) {
-      col.string_offsets.resize(max_row_offsets[i] + 1, stream);
-      col.string_lengths.resize(max_row_offsets[i] + 1, stream);
-      init_to_zero(col.string_offsets);
-      init_to_zero(col.string_lengths);
-    } else if (column_categories[i] == NC_LIST) {
-      col.child_offsets.resize(max_row_offsets[i] + 2, stream);
-      init_to_zero(col.child_offsets);
-    }
-    col.num_rows = max_row_offsets[i] + 1;
-    col.validity =
-      cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr);
-    col.type = to_json_col_type(column_categories[i]);
-  };
-
-  auto reinitialize_as_string = [&](auto i, auto& col) {
-    col.string_offsets.resize(max_row_offsets[i] + 1, stream);
-    col.string_lengths.resize(max_row_offsets[i] + 1, stream);
-    init_to_zero(col.string_offsets);
-    init_to_zero(col.string_lengths);
-    col.num_rows = max_row_offsets[i] + 1;
-    col.validity =
-      cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr);
-    col.type = json_col_t::StringColumn;
-    // destroy references of all child columns after this step, by calling remove_child_columns
-  };
-
-  path_from_tree tree_path{column_categories,
-                           column_parent_ids,
-                           column_names,
-                           is_array_of_arrays,
-                           row_array_parent_col_id};
-
-  // 2. generate nested columns tree and its device_memory
-  // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order.
-  auto h_range_col_id_it =
-    thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin());
-  std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) {
-    return thrust::get<0>(a) < thrust::get<0>(b);
-  });
-
-  std::vector<uint8_t> is_str_column_all_nulls{};
-  if (is_enabled_mixed_types_as_string) {
-    is_str_column_all_nulls = cudf::detail::make_std_vector_sync(
-      is_all_nulls_each_column(input, d_column_tree, tree, col_ids, options, stream), stream);
-  }
-
-  // use hash map because we may skip field name's col_ids
-  std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>> columns;
-  // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking
-  std::map<std::pair<NodeIndexT, std::string>, NodeIndexT> mapped_columns;
-  // find column_ids which are values, but should be ignored in validity
-  auto ignore_vals = cudf::detail::make_host_vector<uint8_t>(num_columns, stream);
-  std::vector<uint8_t> is_mixed_type_column(num_columns, 0);
-  std::vector<uint8_t> is_pruned(num_columns, 0);
-  columns.try_emplace(parent_node_sentinel, std::ref(root));
-
-  std::function<void(NodeIndexT, device_json_column&)> remove_child_columns =
-    [&](NodeIndexT this_col_id, device_json_column& col) {
-      for (auto col_name : col.column_order) {
-        auto child_id                  = mapped_columns[{this_col_id, col_name}];
-        is_mixed_type_column[child_id] = 1;
-        remove_child_columns(child_id, col.child_columns.at(col_name));
-        mapped_columns.erase({this_col_id, col_name});
-        columns.erase(child_id);
-      }
-      col.child_columns.clear();  // their references are deleted above.
-      col.column_order.clear();
-    };
-
-  auto name_and_parent_index = [&is_array_of_arrays,
-                                &row_array_parent_col_id,
-                                &column_parent_ids,
-                                &column_categories,
-                                &column_names](auto this_col_id) {
-    std::string name   = "";
-    auto parent_col_id = column_parent_ids[this_col_id];
-    if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) {
-      if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) {
-        name = column_names[this_col_id];
-      } else {
-        name = list_child_name;
-      }
-    } else if (column_categories[parent_col_id] == NC_FN) {
-      auto field_name_col_id = parent_col_id;
-      parent_col_id          = column_parent_ids[parent_col_id];
-      name                   = column_names[field_name_col_id];
-    } else {
-      CUDF_FAIL("Unexpected parent column category");
-    }
-    return std::pair{name, parent_col_id};
-  };
-
-  // Prune columns that are not required to be parsed.
-  if (options.is_enabled_prune_columns()) {
-    for (auto const this_col_id : unique_col_ids) {
-      if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) {
-        continue;
-      }
-      // Struct, List, String, Value
-      auto [name, parent_col_id] = name_and_parent_index(this_col_id);
-      // get path of this column, and get its dtype if present in options
-      auto const nt                             = tree_path.get_path(this_col_id);
-      std::optional<data_type> const user_dtype = get_path_data_type(nt, options);
-      if (!user_dtype.has_value() and parent_col_id != parent_node_sentinel) {
-        is_pruned[this_col_id] = 1;
-        continue;
-      } else {
-        // make sure all its parents are not pruned.
-        while (parent_col_id != parent_node_sentinel and is_pruned[parent_col_id] == 1) {
-          is_pruned[parent_col_id] = 0;
-          parent_col_id            = column_parent_ids[parent_col_id];
-        }
-      }
-    }
-  }
-
-  // Build the column tree, also, handles mixed types.
-  for (auto const this_col_id : unique_col_ids) {
-    if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) {
-      continue;
-    }
-    // Struct, List, String, Value
-    auto [name, parent_col_id] = name_and_parent_index(this_col_id);
-
-    // if parent is mixed type column or this column is pruned, ignore this column.
-    if (parent_col_id != parent_node_sentinel &&
-        (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id])) {
-      ignore_vals[this_col_id] = 1;
-      if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; }
-      continue;
-    }
-
-    // If the child is already found,
-    // replace if this column is a nested column and the existing was a value column
-    // ignore this column if this column is a value column and the existing was a nested column
-    auto it = columns.find(parent_col_id);
-    CUDF_EXPECTS(it != columns.end(), "Parent column not found");
-    auto& parent_col = it->second.get();
-    bool replaced    = false;
-    if (mapped_columns.count({parent_col_id, name}) > 0) {
-      auto const old_col_id = mapped_columns[{parent_col_id, name}];
-      // If mixed type as string is enabled, make both of them strings and merge them.
-      // All child columns will be ignored when parsing.
-      if (is_enabled_mixed_types_as_string) {
-        bool const is_mixed_type = [&]() {
-          // If new or old is STR and they are all not null, make it mixed type, else ignore.
-          if (column_categories[this_col_id] == NC_VAL ||
-              column_categories[this_col_id] == NC_STR) {
-            if (is_str_column_all_nulls[this_col_id]) return false;
-          }
-          if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) {
-            if (is_str_column_all_nulls[old_col_id]) return false;
-          }
-          return true;
-        }();
-        if (is_mixed_type) {
-          is_mixed_type_column[this_col_id] = 1;
-          is_mixed_type_column[old_col_id]  = 1;
-          // if old col type (not cat) is list or struct, replace with string.
-          auto& col = columns.at(old_col_id).get();
-          if (col.type == json_col_t::ListColumn or col.type == json_col_t::StructColumn) {
-            reinitialize_as_string(old_col_id, col);
-            remove_child_columns(old_col_id, col);
-            // all its children (which are already inserted) are ignored later.
-          }
-          col.forced_as_string_column = true;
-          columns.try_emplace(this_col_id, columns.at(old_col_id));
-          continue;
-        }
-      }
-
-      if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) {
-        ignore_vals[this_col_id] = 1;
-        continue;
-      }
-      if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) {
-        // remap
-        ignore_vals[old_col_id] = 1;
-        mapped_columns.erase({parent_col_id, name});
-        columns.erase(old_col_id);
-        parent_col.child_columns.erase(name);
-        replaced = true;  // to skip duplicate name in column_order
-      } else {
-        // If this is a nested column but we're trying to insert either (a) a list node into a
-        // struct column or (b) a struct node into a list column, we fail
-        CUDF_EXPECTS(not((column_categories[old_col_id] == NC_LIST and
-                          column_categories[this_col_id] == NC_STRUCT) or
-                         (column_categories[old_col_id] == NC_STRUCT and
-                          column_categories[this_col_id] == NC_LIST)),
-                     "A mix of lists and structs within the same column is not supported");
-      }
-    }
-
-    if (is_enabled_mixed_types_as_string) {
-      // get path of this column, check if it is a struct forced as string, and enforce it
-      auto const nt                             = tree_path.get_path(this_col_id);
-      std::optional<data_type> const user_dtype = get_path_data_type(nt, options);
-      if (column_categories[this_col_id] == NC_STRUCT and user_dtype.has_value() and
-          user_dtype.value().id() == type_id::STRING) {
-        is_mixed_type_column[this_col_id] = 1;
-        column_categories[this_col_id]    = NC_STR;
-      }
-    }
-
-    CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name);
-    // move into parent
-    device_json_column col(stream, mr);
-    initialize_json_columns(this_col_id, col);
-    auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second;
-    CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent");
-    if (not replaced) parent_col.column_order.push_back(name);
-    columns.try_emplace(this_col_id, std::ref(parent_col.child_columns.at(name)));
-    mapped_columns.try_emplace(std::make_pair(parent_col_id, name), this_col_id);
-  }
-
-  if (is_enabled_mixed_types_as_string) {
-    // ignore all children of mixed type columns
-    for (auto const this_col_id : unique_col_ids) {
-      auto parent_col_id = column_parent_ids[this_col_id];
-      if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) {
-        is_mixed_type_column[this_col_id] = 1;
-        ignore_vals[this_col_id]          = 1;
-        columns.erase(this_col_id);
-      }
-      // Convert only mixed type columns as string (so to copy), but not its children
-      if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 0 and
-          is_mixed_type_column[this_col_id] == 1)
-        column_categories[this_col_id] = NC_STR;
-    }
-    cudaMemcpyAsync(d_column_tree.node_categories.begin(),
-                    column_categories.data(),
-                    column_categories.size() * sizeof(column_categories[0]),
-                    cudaMemcpyDefault,
-                    stream.value());
-  }
-
-  // restore unique_col_ids order
-  std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) {
-    return thrust::get<1>(a) < thrust::get<1>(b);
-  });
-  // move columns data to device.
-  auto columns_data = cudf::detail::make_host_vector<json_column_data>(num_columns, stream);
-  for (auto& [col_id, col_ref] : columns) {
-    if (col_id == parent_node_sentinel) continue;
-    auto& col            = col_ref.get();
-    columns_data[col_id] = json_column_data{col.string_offsets.data(),
-                                            col.string_lengths.data(),
-                                            col.child_offsets.data(),
-                                            static_cast<bitmask_type*>(col.validity.data())};
-  }
-
-  auto d_ignore_vals = cudf::detail::make_device_uvector_async(
-    ignore_vals, stream, rmm::mr::get_current_device_resource());
-  auto d_columns_data = cudf::detail::make_device_uvector_async(
-    columns_data, stream, rmm::mr::get_current_device_resource());
-
-  // 3. scatter string offsets to respective columns, set validity bits
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::counting_iterator<size_type>(0),
-    num_nodes,
-    [column_categories = d_column_tree.node_categories.begin(),
-     col_ids           = col_ids.begin(),
-     row_offsets       = row_offsets.begin(),
-     range_begin       = tree.node_range_begin.begin(),
-     range_end         = tree.node_range_end.begin(),
-     d_ignore_vals     = d_ignore_vals.begin(),
-     d_columns_data    = d_columns_data.begin()] __device__(size_type i) {
-      if (d_ignore_vals[col_ids[i]]) return;
-      auto const node_category = column_categories[col_ids[i]];
-      switch (node_category) {
-        case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break;
-        case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break;
-        case NC_STR: [[fallthrough]];
-        case NC_VAL:
-          if (d_ignore_vals[col_ids[i]]) break;
-          set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]);
-          d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i];
-          d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i];
-          break;
-        default: break;
-      }
-    });
-
-  // 4. scatter List offset
-  // copy_if only node's whose parent is list, (node_id, parent_col_id)
-  // stable_sort by parent_col_id of {node_id}.
-  // For all unique parent_node_id of (i==0, i-1!=i), write start offset.
-  //                                  (i==last, i+1!=i), write end offset.
-  //    unique_copy_by_key {parent_node_id} {row_offset} to
-  //    col[parent_col_id].child_offsets[row_offset[parent_node_id]]
-
-  auto& parent_col_ids = sorted_col_ids;  // reuse sorted_col_ids
-  auto parent_col_id   = thrust::make_transform_iterator(
-    thrust::make_counting_iterator<size_type>(0),
-    cuda::proclaim_return_type<NodeIndexT>(
-      [col_ids         = col_ids.begin(),
-       parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) {
-        return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel
-                                                                  : col_ids[parent_node_ids[node_id]];
-      }));
-  auto const list_children_end = thrust::copy_if(
-    rmm::exec_policy(stream),
-    thrust::make_zip_iterator(thrust::make_counting_iterator<size_type>(0), parent_col_id),
-    thrust::make_zip_iterator(thrust::make_counting_iterator<size_type>(0), parent_col_id) +
-      num_nodes,
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()),
-    [d_ignore_vals     = d_ignore_vals.begin(),
-     parent_node_ids   = tree.parent_node_ids.begin(),
-     column_categories = d_column_tree.node_categories.begin(),
-     col_ids           = col_ids.begin()] __device__(size_type node_id) {
-      auto parent_node_id = parent_node_ids[node_id];
-      return parent_node_id != parent_node_sentinel and
-             column_categories[col_ids[parent_node_id]] == NC_LIST and
-             (!d_ignore_vals[col_ids[parent_node_id]]);
-    });
-
-  auto const num_list_children =
-    list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin());
-  thrust::stable_sort_by_key(rmm::exec_policy(stream),
-                             parent_col_ids.begin(),
-                             parent_col_ids.begin() + num_list_children,
-                             node_ids.begin());
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    num_list_children,
-    [node_ids        = node_ids.begin(),
-     parent_node_ids = tree.parent_node_ids.begin(),
-     parent_col_ids  = parent_col_ids.begin(),
-     row_offsets     = row_offsets.begin(),
-     d_columns_data  = d_columns_data.begin(),
-     num_list_children] __device__(size_type i) {
-      auto const node_id        = node_ids[i];
-      auto const parent_node_id = parent_node_ids[node_id];
-      // scatter to list_offset
-      if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) {
-        d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] =
-          row_offsets[node_id];
-      }
-      // last value of list child_offset is its size.
-      if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) {
-        d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] =
-          row_offsets[node_id] + 1;
-      }
-    });
-
-  // 5. scan on offsets.
-  for (auto& [id, col_ref] : columns) {
-    auto& col = col_ref.get();
-    if (col.type == json_col_t::StringColumn) {
-      thrust::inclusive_scan(rmm::exec_policy_nosync(stream),
-                             col.string_offsets.begin(),
-                             col.string_offsets.end(),
-                             col.string_offsets.begin(),
-                             thrust::maximum<json_column::row_offset_t>{});
-    } else if (col.type == json_col_t::ListColumn) {
-      thrust::inclusive_scan(rmm::exec_policy_nosync(stream),
-                             col.child_offsets.begin(),
-                             col.child_offsets.end(),
-                             col.child_offsets.begin(),
-                             thrust::maximum<json_column::row_offset_t>{});
-    }
-  }
-  stream.synchronize();
-}
-
 std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_column_to_cudf_column(
   device_json_column& json_col,
   device_span<SymbolT const> d_input,
diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu
new file mode 100644
index 00000000000..509dd84846e
--- /dev/null
+++ b/cpp/src/io/json/json_column_csr.cu
@@ -0,0 +1,747 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "json_utils.hpp"
+#include "nested_json.hpp"
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/detail/utilities/visitor_overload.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/bit.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cuda/atomic>
+#include <cuda/functional>
+#include <thrust/adjacent_difference.h>
+#include <thrust/binary_search.h>
+#include <thrust/count.h>
+#include <thrust/for_each.h>
+#include <thrust/functional.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/logical.h>
+#include <thrust/partition.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
+#include <thrust/set_operations.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/unique.h>
+
+namespace cudf::io::json::experimental::detail {
+
+struct device_column_subtree {
+  using row_offset_t = size_type;
+  rmm::device_uvector<row_offset_t> subtree_nrows;
+  rmm::device_uvector<row_offset_t> string_offsets;
+  rmm::device_uvector<row_offset_t> string_lengths;
+  // Row offsets
+  rmm::device_uvector<row_offset_t> child_offsets;
+  // Validity bitmap
+  rmm::device_buffer validity;
+};
+
+device_column_subtree allocation_for_device_column_subtree_annotation(
+  rmm::device_uvector<NodeIndexT> const& rowidx,
+  rmm::device_uvector<NodeIndexT> const& colidx,
+  rmm::device_uvector<NodeT> const& categories,
+  rmm::device_uvector<size_type> const& max_row_offsets,
+  cudf::io::json_reader_options reader_options,
+  rmm::cuda_stream_view stream)
+{
+  // What are the cases in which estimation works?
+  CUDF_EXPECTS(reader_options.is_enabled_mixed_types_as_string() == false,
+               "mixed type as string has not yet been implemented");
+  CUDF_EXPECTS(reader_options.is_enabled_prune_columns() == false,
+               "column pruning has not yet been implemented");
+  // traverse the column tree
+  auto num_columns = rowidx.size() - 1;
+
+  // 1. TODO: removing NC_ERR nodes and their descendants i.e.
+  // removing the entire subtree rooted at the nodes with category NC_ERR
+  // for now, we just assert that there are indeed no error nodes
+  auto num_err_nodes = thrust::count_if(
+    rmm::exec_policy(stream), categories.begin(), categories.end(), [] __device__(auto const ctg) {
+      return ctg == NC_ERR;
+    });
+  CUDF_EXPECTS(num_err_nodes == 0, "oops, there are some error nodes in the column tree!");
+
+  // 2. Let's do some validation of the column tree based on its properties.
+  // We will be using these properties to filter nodes later on.
+  // ===========================================================================
+  // (i) Every node v is of type string, val, field name, list or struct.
+  // (ii) String and val cannot have any children i.e. they can only be leaf nodes
+  // (iii) If v is a field name, it can have struct, list, string and val as children.
+  // (iv) If v is a struct, it can have a field name as child
+  // (v) If v is a list, it can have string, val, list or struct as child
+  // (vi) There can only be at most one string and one val child for a given node, but many struct,
+  // list and field name children. (vii) When mixed type support is disabled -
+  //       (a) A mix of lists and structs in the same column is not supported i.e a field name and
+  //       list node cannot have both list and struct as children (b) If there is a mix of str/val
+  //       and list/struct in the same column, then str/val is discarded
+
+  // Validation of (vii)(a)
+  auto num_field_and_list_nodes = thrust::count_if(
+    rmm::exec_policy(stream), categories.begin(), categories.end(), [] __device__(auto const ctg) {
+      return ctg == NC_FN || ctg == NC_LIST;
+    });
+  rmm::device_uvector<NodeIndexT> field_and_list_nodes(num_field_and_list_nodes, stream);
+  thrust::partition_copy(rmm::exec_policy(stream),
+                         thrust::make_counting_iterator(0),
+                         thrust::make_counting_iterator(0) + num_columns,
+                         field_and_list_nodes.begin(),
+                         thrust::make_discard_iterator(),
+                         [categories = categories.begin()] __device__(NodeIndexT node) {
+                           return categories[node] == NC_LIST || categories[node] == NC_FN;
+                         });
+  bool is_valid_tree = thrust::all_of(
+    rmm::exec_policy(stream),
+    field_and_list_nodes.begin(),
+    field_and_list_nodes.end(),
+    [rowidx = rowidx.begin(), colidx = colidx.begin(), categories = categories.begin()] __device__(
+      NodeIndexT node) {
+      NodeIndexT first_child_pos = rowidx[node] + 1;
+      NodeIndexT last_child_pos  = rowidx[node + 1] - 1;
+      bool has_struct_child      = false;
+      bool has_list_child        = false;
+      for (NodeIndexT child_pos = first_child_pos; child_pos <= last_child_pos; child_pos++) {
+        if (categories[colidx[child_pos]] == NC_STRUCT) has_struct_child = true;
+        if (categories[colidx[child_pos]] == NC_LIST) has_list_child = true;
+      }
+      return !has_struct_child && !has_list_child;
+    });
+
+  CUDF_EXPECTS(is_valid_tree,
+               "Invalidating property 7a i.e. mix of LIST and STRUCT in same column is not "
+               "supported when mixed type support is disabled");
+
+  // Validation of (vii)(b) i.e. ignore_vals in previous implementation
+  // We need to identify leaf nodes that have non-leaf sibling nodes
+  // i.e. we need to ignore leaf nodes at level above the last level
+  // idea: leaf nodes have adjacency 1. So if there is an adjacency 1 inbetween non-one
+  // adjacencies, then found the leaf node. Corner case: consider the last set of consecutive
+  // ones. If the leftmost of those ones (say node u) has a non-leaf sibling
+  // (can be found by looking at the adjacencies of the siblings
+  // (which are in turn found from the colidx of the parent u), then this leaf node should be
+  // ignored, otherwise all good.
+  rmm::device_uvector<NodeIndexT> adjacency(
+    num_columns + 1,
+    stream);  // since adjacent_difference requires that the output have the same length as input
+  thrust::adjacent_difference(
+    rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), adjacency.begin());
+  auto num_leaf_nodes = thrust::count_if(rmm::exec_policy(stream),
+                                         adjacency.begin() + 1,
+                                         adjacency.end(),
+                                         [] __device__(auto const adj) { return adj == 1; });
+  rmm::device_uvector<NodeIndexT> leaf_nodes(num_leaf_nodes, stream);
+  thrust::copy_if(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator(0),
+    thrust::make_counting_iterator(0) + num_columns,
+    leaf_nodes.begin(),
+    [adjacency = adjacency.begin()] __device__(size_t node) { return adjacency[node] == 1; });
+
+  auto rev_node_it = thrust::make_reverse_iterator(thrust::make_counting_iterator(0) + num_columns);
+  auto rev_leaf_nodes_it = thrust::make_reverse_iterator(leaf_nodes.begin());
+  auto is_leftmost_leaf  = thrust::mismatch(
+    rmm::exec_policy(stream), rev_node_it, rev_node_it + num_columns, rev_leaf_nodes_it);
+  // the node number that could be the leftmost leaf node is given by u = *(is_leftmost_leaf.second
+  // - 1)
+  NodeIndexT leftmost_leaf_node = leaf_nodes.element(
+    num_leaf_nodes - thrust::distance(rev_leaf_nodes_it, is_leftmost_leaf.second - 1) - 1, stream);
+
+  // upper_bound search for u in rowidx for parent node v. Now check if any of the other child nodes
+  // of v is non-leaf i.e check if u is the first child of v. If yes, then leafmost_leaf_node is
+  // the leftmost leaf node. Otherwise, discard all children of v after and including u
+
+  auto parent_it =
+    thrust::upper_bound(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), leftmost_leaf_node);
+  NodeIndexT parent           = thrust::distance(rowidx.begin(), parent_it - 1);
+  NodeIndexT parent_adj_start = rowidx.element(parent, stream);
+  NodeIndexT parent_adj_end   = rowidx.element(parent + 1, stream);
+  auto childnum_it            = thrust::lower_bound(rmm::exec_policy(stream),
+                                         colidx.begin() + parent_adj_start,
+                                         colidx.begin() + parent_adj_end,
+                                         leftmost_leaf_node);
+
+  auto retained_leaf_nodes_it = leaf_nodes.begin() + num_leaf_nodes -
+                                thrust::distance(rev_leaf_nodes_it, is_leftmost_leaf.second - 1) -
+                                1;
+  if (childnum_it != colidx.begin() + parent_adj_start + 1) {
+    // discarding from u to last child of parent
+    retained_leaf_nodes_it += thrust::distance(childnum_it, colidx.begin() + parent_adj_end);
+  }
+  // now, all nodes from leaf_nodes.begin() to retained_leaf_nodes_it need to be discarded i.e. they
+  // are part of ignore_vals
+
+  // (Optional?) TODO: Validation of the remaining column tree properties
+
+  // Now we annotate the extracted subtree
+  using row_offset_t     = size_type;
+  auto num_subtree_nodes = thrust::distance(retained_leaf_nodes_it, leaf_nodes.end());
+  rmm::device_uvector<row_offset_t> subtree_nrows(max_row_offsets, stream);
+  thrust::scatter(rmm::exec_policy(stream),
+                  thrust::make_constant_iterator(-1),
+                  thrust::make_constant_iterator(-1) + num_columns - num_subtree_nodes,
+                  leaf_nodes.begin(),
+                  subtree_nrows.begin());
+  thrust::transform(rmm::exec_policy(stream),
+                    thrust::make_constant_iterator(1),
+                    thrust::make_constant_iterator(1) + num_columns,
+                    subtree_nrows.begin(),
+                    subtree_nrows.begin(),
+                    thrust::plus<size_type>());
+
+  // For the subtree, we allocate memory for device column subtree properties
+  rmm::device_uvector<NodeIndexT> subtree_properties_map(num_columns, stream);
+  thrust::sequence(
+    rmm::exec_policy(stream), subtree_properties_map.begin(), subtree_properties_map.end(), 0);
+  auto partitioning_idx_it = thrust::partition(rmm::exec_policy(stream),
+                                               subtree_properties_map.begin(),
+                                               subtree_properties_map.end(),
+                                               subtree_nrows.begin(),
+                                               thrust::identity<NodeIndexT>());
+  auto str_partitioning_idx_it =
+    thrust::partition(rmm::exec_policy(stream),
+                      subtree_properties_map.begin(),
+                      partitioning_idx_it,
+                      [categories = categories.begin()] __device__(NodeIndexT node) {
+                        return categories[node] == NC_STR || categories[node] == NC_VAL;
+                      });
+  auto str_val_end = thrust::distance(subtree_properties_map.begin(), str_partitioning_idx_it);
+  auto max_row_offsets_perm_it =
+    thrust::make_permutation_iterator(max_row_offsets.begin(), subtree_properties_map.begin());
+  size_type string_offsets_size =
+    thrust::reduce(
+      rmm::exec_policy(stream), max_row_offsets_perm_it, max_row_offsets_perm_it + str_val_end) +
+    str_val_end;
+  rmm::device_uvector<row_offset_t> string_offsets(string_offsets_size, stream);
+  rmm::device_uvector<row_offset_t> string_lengths(string_offsets_size, stream);
+
+  auto list_partitioning_idx_it =
+    thrust::partition(rmm::exec_policy(stream),
+                      str_partitioning_idx_it,
+                      partitioning_idx_it,
+                      [categories = categories.begin()] __device__(NodeIndexT node) {
+                        return categories[node] == NC_LIST;
+                      });
+  auto list_end = thrust::distance(subtree_properties_map.begin(), list_partitioning_idx_it);
+  max_row_offsets_perm_it =
+    thrust::make_permutation_iterator(max_row_offsets.begin(), subtree_properties_map.begin()) +
+    str_val_end;
+  size_type child_offsets_size =
+    thrust::reduce(rmm::exec_policy(stream),
+                   max_row_offsets_perm_it,
+                   max_row_offsets_perm_it + (list_end - str_val_end)) +
+    2 * (list_end - str_val_end);
+  rmm::device_uvector<row_offset_t> child_offsets(child_offsets_size, stream);
+
+  auto validity_buffer_size =
+    thrust::reduce(rmm::exec_policy(stream), subtree_nrows.begin(), subtree_nrows.end());
+  auto validity = cudf::detail::create_null_mask(validity_buffer_size,
+                                                 cudf::mask_state::ALL_NULL,
+                                                 stream,
+                                                 rmm::mr::get_current_device_resource());
+
+  return device_column_subtree{std::move(subtree_nrows),
+                               std::move(string_offsets),
+                               std::move(string_lengths),
+                               std::move(child_offsets),
+                               std::move(validity)};
+}
+
+/**
+ * @brief Reduces node tree representation to column tree CSR representation.
+ *
+ * @param tree Node tree representation of JSON string
+ * @param original_col_ids Column ids of nodes
+ * @param sorted_col_ids Sorted column ids of nodes
+ * @param ordered_node_ids Node ids of nodes sorted by column ids
+ * @param row_offsets Row offsets of nodes
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A tuple of column tree representation of JSON string, column ids of columns, and
+ * max row offsets of columns
+ */
+std::tuple<column_tree_csr, rmm::device_uvector<size_type>> reduce_to_column_tree_csr(
+  tree_meta_t& tree,
+  device_span<NodeIndexT> original_col_ids,
+  device_span<NodeIndexT> sorted_col_ids,
+  device_span<NodeIndexT> ordered_node_ids,
+  device_span<size_type> row_offsets,
+  bool is_array_of_arrays,
+  NodeIndexT const row_array_parent_col_id,
+  cudf::io::json_reader_options const& reader_options,
+  rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  // 1. column count for allocation
+  auto const num_columns =
+    thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end());
+
+  rmm::device_uvector<size_type> unique_node_ids(num_columns, stream);
+  rmm::device_uvector<size_type> csr_unique_node_ids(num_columns, stream);
+  rmm::device_uvector<TreeDepthT> column_levels(num_columns, stream);
+  thrust::unique_by_key_copy(rmm::exec_policy(stream),
+                             sorted_col_ids.begin(),
+                             sorted_col_ids.end(),
+                             ordered_node_ids.begin(),
+                             thrust::make_discard_iterator(),
+                             unique_node_ids.begin());
+  thrust::copy_n(
+    rmm::exec_policy(stream),
+    thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()),
+    unique_node_ids.size(),
+    column_levels.begin());
+  auto [sorted_column_levels, sorted_column_levels_order] =
+    cudf::io::json::detail::stable_sorted_key_order<size_t, TreeDepthT>(column_levels, stream);
+
+  // 2. reduce_by_key {col_id}, {row_offset}, max.
+  rmm::device_uvector<NodeIndexT> unique_col_ids(num_columns, stream);
+  rmm::device_uvector<size_type> max_row_offsets(num_columns, stream);
+  rmm::device_uvector<NodeIndexT> csr_unique_col_ids(num_columns, stream);
+  rmm::device_uvector<size_type> csr_max_row_offsets(num_columns, stream);
+  auto ordered_row_offsets =
+    thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin());
+  thrust::reduce_by_key(rmm::exec_policy(stream),
+                        sorted_col_ids.begin(),
+                        sorted_col_ids.end(),
+                        ordered_row_offsets,
+                        unique_col_ids.begin(),
+                        max_row_offsets.begin(),
+                        thrust::equal_to<size_type>(),
+                        thrust::maximum<size_type>());
+
+  // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E)
+  rmm::device_uvector<NodeT> column_categories(num_columns, stream);
+  rmm::device_uvector<NodeT> csr_column_categories(num_columns, stream);
+  thrust::reduce_by_key(
+    rmm::exec_policy(stream),
+    sorted_col_ids.begin(),
+    sorted_col_ids.end(),
+    thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()),
+    unique_col_ids.begin(),
+    column_categories.begin(),
+    thrust::equal_to<size_type>(),
+    [] __device__(NodeT type_a, NodeT type_b) -> NodeT {
+      auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR);
+      auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR);
+      // (v+v=v, *+*=*,  *+v=*, *+#=E, NESTED+VAL=NESTED)
+      // *+*=*, v+v=v
+      if (type_a == type_b) {
+        return type_a;
+      } else if (is_a_leaf) {
+        // *+v=*, N+V=N
+        // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR
+        return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b);
+      } else if (is_b_leaf) {
+        return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a);
+      }
+      // *+#=E
+      return NC_ERR;
+    });
+
+  auto csr_permutation_it = thrust::make_zip_iterator(
+    thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()),
+    thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()),
+    thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()),
+    thrust::make_permutation_iterator(column_categories.begin(),
+                                      sorted_column_levels_order.begin()));
+  thrust::copy(rmm::exec_policy(stream),
+               csr_permutation_it,
+               csr_permutation_it + num_columns,
+               thrust::make_zip_iterator(csr_unique_node_ids.begin(),
+                                         csr_unique_col_ids.begin(),
+                                         csr_max_row_offsets.begin(),
+                                         csr_column_categories.begin()));
+
+  // 4. unique_copy parent_node_ids, ranges
+  rmm::device_uvector<NodeIndexT> csr_parent_col_ids(num_columns, stream);
+  rmm::device_uvector<SymbolOffsetT> csr_col_range_begin(num_columns, stream);  // Field names
+  rmm::device_uvector<SymbolOffsetT> csr_col_range_end(num_columns, stream);
+  thrust::copy_n(
+    rmm::exec_policy(stream),
+    thrust::make_zip_iterator(
+      thrust::make_permutation_iterator(tree.parent_node_ids.begin(), csr_unique_node_ids.begin()),
+      thrust::make_permutation_iterator(tree.node_range_begin.begin(), csr_unique_node_ids.begin()),
+      thrust::make_permutation_iterator(tree.node_range_end.begin(), csr_unique_node_ids.begin())),
+    csr_unique_node_ids.size(),
+    thrust::make_zip_iterator(
+      csr_parent_col_ids.begin(), csr_col_range_begin.begin(), csr_col_range_end.begin()));
+
+  // convert parent_node_ids to parent_col_ids
+  thrust::transform(
+    rmm::exec_policy(stream),
+    csr_parent_col_ids.begin(),
+    csr_parent_col_ids.end(),
+    csr_parent_col_ids.begin(),
+    [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type {
+      return parent_node_id == parent_node_sentinel ? parent_node_sentinel
+                                                    : col_ids[parent_node_id];
+    });
+
+  /*
+    CSR construction:
+    1. Sort column levels and get their ordering
+    2. For each column node coln iterated according to sorted_column_levels; do
+        a. Find nodes that have coln as the parent node -> set adj_coln
+        b. row idx[coln] = size of adj_coln + 1
+        c. col idx[coln] = adj_coln U {parent_col_id[coln]}
+  */
+
+  rmm::device_uvector<NodeIndexT> rowidx(num_columns + 1, stream);
+  thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0);
+
+  // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel)
+  // children adjacency
+  auto num_non_leaf_columns = thrust::unique_count(
+    rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end());
+  thrust::reduce_by_key(rmm::exec_policy(stream),
+                        csr_parent_col_ids.begin() + 1,
+                        csr_parent_col_ids.end(),
+                        thrust::make_constant_iterator(1),
+                        thrust::make_discard_iterator(),
+                        rowidx.begin() + 1,
+                        thrust::equal_to<TreeDepthT>());
+  thrust::inclusive_scan(
+    rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1);
+  // overwrite the csr_parent_col_ids with the col ids in the csr tree
+  thrust::fill(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), -1);
+  thrust::scatter(rmm::exec_policy(stream),
+                  thrust::make_counting_iterator(0),
+                  thrust::make_counting_iterator(0) + num_non_leaf_columns,
+                  rowidx.begin(),
+                  csr_parent_col_ids.begin() + 1);
+  thrust::inclusive_scan(rmm::exec_policy(stream),
+                         csr_parent_col_ids.begin(),
+                         csr_parent_col_ids.end(),
+                         csr_parent_col_ids.begin(),
+                         thrust::maximum<NodeIndexT>{});
+  // We are discarding the parent of the root node. Add the parent adjacency. Since we have already
+  // performed the scan, we use a counting iterator to add
+  thrust::transform(rmm::exec_policy(stream),
+                    rowidx.begin() + 2,
+                    rowidx.end(),
+                    thrust::make_counting_iterator(1),
+                    rowidx.begin() + 2,
+                    thrust::plus<NodeIndexT>());
+
+  rmm::device_uvector<NodeIndexT> colidx((num_columns - 1) * 2, stream);
+  thrust::fill(rmm::exec_policy(stream), colidx.begin(), colidx.end(), 0);
+  // Skip the parent of root node
+  thrust::scatter(rmm::exec_policy(stream),
+                  csr_parent_col_ids.begin() + 1,
+                  csr_parent_col_ids.end(),
+                  rowidx.begin() + 1,
+                  colidx.begin());
+  // excluding root node
+  rmm::device_uvector<NodeIndexT> map(num_columns - 1, stream);
+  thrust::fill(rmm::exec_policy(stream), map.begin(), map.end(), 1);
+  thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                csr_parent_col_ids.begin() + 1,
+                                csr_parent_col_ids.end(),
+                                map.begin(),
+                                map.begin());
+  thrust::for_each(rmm::exec_policy(stream),
+                   thrust::make_counting_iterator(1),
+                   thrust::make_counting_iterator(1) + num_columns - 1,
+                   [rowidx             = rowidx.begin(),
+                    map                = map.begin(),
+                    csr_parent_col_ids = csr_parent_col_ids.begin()] __device__(auto i) {
+                     auto csr_parent_col_id = csr_parent_col_ids[i];
+                     if (csr_parent_col_id == 0)
+                       map[i - 1]--;
+                     else
+                       map[i - 1] += rowidx[csr_parent_col_id];
+                   });
+  thrust::scatter(rmm::exec_policy(stream),
+                  thrust::make_counting_iterator(1),
+                  thrust::make_counting_iterator(1) + num_columns - 1,
+                  map.begin(),
+                  colidx.begin());
+
+  // condition is true if parent is not a list, or sentinel/root
+  // Special case to return true if parent is a list and is_array_of_arrays is true
+  auto is_non_list_parent = [column_categories = column_categories.begin(),
+                             is_array_of_arrays,
+                             row_array_parent_col_id] __device__(auto parent_col_id) -> bool {
+    return !(parent_col_id == parent_node_sentinel ||
+             column_categories[parent_col_id] == NC_LIST &&
+               (!is_array_of_arrays || parent_col_id != row_array_parent_col_id));
+  };
+  // Mixed types in List children go to different columns,
+  // so all immediate children of list column should have same max_row_offsets.
+  //   create list's children max_row_offsets array. (initialize to zero)
+  //   atomicMax on  children max_row_offsets array.
+  //   gather the max_row_offsets from children row offset array.
+  {
+    rmm::device_uvector<NodeIndexT> list_parents_children_max_row_offsets(num_columns, stream);
+    thrust::fill(rmm::exec_policy(stream),
+                 list_parents_children_max_row_offsets.begin(),
+                 list_parents_children_max_row_offsets.end(),
+                 0);
+    thrust::for_each(rmm::exec_policy(stream),
+                     csr_unique_col_ids.begin(),
+                     csr_unique_col_ids.end(),
+                     [csr_column_categories = csr_column_categories.begin(),
+                      csr_parent_col_ids    = csr_parent_col_ids.begin(),
+                      csr_max_row_offsets   = csr_max_row_offsets.begin(),
+                      list_parents_children_max_row_offsets =
+                        list_parents_children_max_row_offsets.begin()] __device__(auto col_id) {
+                       auto csr_parent_col_id = csr_parent_col_ids[col_id];
+                       if (csr_parent_col_id != parent_node_sentinel and
+                           csr_column_categories[csr_parent_col_id] == node_t::NC_LIST) {
+                         cuda::atomic_ref<NodeIndexT, cuda::thread_scope_device> ref{
+                           *(list_parents_children_max_row_offsets + csr_parent_col_id)};
+                         ref.fetch_max(csr_max_row_offsets[col_id],
+                                       cuda::std::memory_order_relaxed);
+                       }
+                     });
+    thrust::gather_if(
+      rmm::exec_policy(stream),
+      csr_parent_col_ids.begin(),
+      csr_parent_col_ids.end(),
+      csr_parent_col_ids.begin(),
+      list_parents_children_max_row_offsets.begin(),
+      csr_max_row_offsets.begin(),
+      [csr_column_categories = csr_column_categories.begin()] __device__(size_type parent_col_id) {
+        return parent_col_id != parent_node_sentinel and
+               csr_column_categories[parent_col_id] == node_t::NC_LIST;
+      });
+  }
+
+  // copy lists' max_row_offsets to children.
+  // all structs should have same size.
+  thrust::transform_if(
+    rmm::exec_policy(stream),
+    csr_unique_col_ids.begin(),
+    csr_unique_col_ids.end(),
+    csr_max_row_offsets.begin(),
+    [csr_column_categories = csr_column_categories.begin(),
+     is_non_list_parent,
+     csr_parent_col_ids  = csr_parent_col_ids.begin(),
+     csr_max_row_offsets = csr_max_row_offsets.begin()] __device__(size_type col_id) {
+      auto parent_col_id = csr_parent_col_ids[col_id];
+      // condition is true if parent is not a list, or sentinel/root
+      while (is_non_list_parent(parent_col_id)) {
+        col_id        = parent_col_id;
+        parent_col_id = csr_parent_col_ids[parent_col_id];
+      }
+      return csr_max_row_offsets[col_id];
+    },
+    [csr_column_categories = csr_column_categories.begin(),
+     is_non_list_parent,
+     parent_col_ids = csr_parent_col_ids.begin()] __device__(size_type col_id) {
+      auto parent_col_id = parent_col_ids[col_id];
+      // condition is true if parent is not a list, or sentinel/root
+      return is_non_list_parent(parent_col_id);
+    });
+
+  // For Struct and List (to avoid copying entire strings when mixed type as string is enabled)
+  thrust::transform_if(
+    rmm::exec_policy(stream),
+    csr_col_range_begin.begin(),
+    csr_col_range_begin.end(),
+    csr_column_categories.begin(),
+    csr_col_range_end.begin(),
+    [] __device__(auto i) { return i + 1; },
+    [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; });
+
+  // this function allocates memory for the annotation
+  auto device_column_subtree_obj = allocation_for_device_column_subtree_annotation(
+    rowidx, colidx, csr_column_categories, csr_max_row_offsets, reader_options, stream);
+  // now we actually do the annotation
+  // relabel original_col_ids with the positions of the csr_unique_col_ids with same element. How do
+  // we accomplish this? one idea is to sort the row offsets by node level. Just the way we did this
+  // for the csr_column_ids sort original_col_ids, extract subtree based on the annotation above,
+  using row_offset_t = size_type;
+  auto [sorted_node_levels, sorted_node_levels_order] =
+    cudf::io::json::detail::stable_sorted_key_order<size_t, TreeDepthT>(tree.node_levels, stream);
+  auto num_nodes = original_col_ids.size();
+  auto row_offsets_it =
+    thrust::make_permutation_iterator(row_offsets.begin(), sorted_node_levels_order.begin());
+  auto node_range_begin_it = thrust::make_permutation_iterator(tree.node_range_begin.begin(),
+                                                               sorted_node_levels_order.begin());
+  auto node_range_end_it   = thrust::make_permutation_iterator(tree.node_range_end.begin(),
+                                                             sorted_node_levels_order.begin());
+  auto node_col_ids_it =
+    thrust::make_permutation_iterator(original_col_ids.begin(), sorted_node_levels_order.begin());
+  auto node_categories_it = thrust::make_permutation_iterator(tree.node_categories.begin(),
+                                                              sorted_node_levels_order.begin());
+
+  rmm::device_uvector<row_offset_t> sorted_subtree_nrows(device_column_subtree_obj.subtree_nrows,
+                                                         stream);
+  rmm::device_uvector<NodeIndexT> sorted_csr_unique_col_ids(csr_unique_col_ids, stream);
+  thrust::sort_by_key(rmm::exec_policy(stream),
+                      sorted_csr_unique_col_ids.begin(),
+                      sorted_csr_unique_col_ids.end(),
+                      sorted_subtree_nrows.begin());
+  thrust::copy_if(
+    rmm::exec_policy(stream),
+    node_range_begin_it,
+    node_range_begin_it + num_nodes,
+    thrust::make_counting_iterator(0),
+    device_column_subtree_obj.string_offsets.begin(),
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(),
+     node_col_ids_it,
+     node_categories_it] __device__(NodeIndexT node) {
+      return sorted_subtree_nrows[node_col_ids_it[node]] &&
+             (node_categories_it[node] == NC_STR || node_categories_it[node] == NC_VAL);
+    });
+
+  auto node_range_lengths_it = thrust::make_transform_iterator(
+    thrust::make_zip_iterator(node_range_begin_it, node_range_end_it),
+    cuda::proclaim_return_type<row_offset_t>([] __device__(auto range_it) {
+      return thrust::get<1>(range_it) - thrust::get<0>(range_it);
+    }));
+  thrust::copy_if(
+    rmm::exec_policy(stream),
+    node_range_lengths_it,
+    node_range_lengths_it + num_nodes,
+    thrust::make_counting_iterator(0),
+    device_column_subtree_obj.string_lengths.begin(),
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(),
+     node_col_ids_it,
+     node_categories_it] __device__(NodeIndexT node) {
+      return sorted_subtree_nrows[node_col_ids_it[node]] &&
+             (node_categories_it[node] == NC_STR || node_categories_it[node] == NC_VAL);
+    });
+
+  // row_offsets need to be prefix summed across columns!
+  thrust::replace_if(
+    rmm::exec_policy(stream),
+    row_offsets_it,
+    row_offsets_it + num_nodes,
+    thrust::make_counting_iterator(0),
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(), node_col_ids_it] __device__(
+      NodeIndexT node) { return sorted_subtree_nrows[node_col_ids_it[node]] > 0; },
+    0);
+  thrust::inclusive_scan(
+    rmm::exec_policy(stream), row_offsets_it, row_offsets_it + num_nodes, row_offsets_it);
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator(0),
+    num_nodes,
+    [sorted_subtree_nrows = sorted_subtree_nrows.begin(),
+     node_col_ids_it,
+     node_categories_it,
+     row_offsets_it,
+     validity = static_cast<bitmask_type*>(
+       device_column_subtree_obj.validity.data())] __device__(NodeIndexT node) {
+      if (sorted_subtree_nrows[node_col_ids_it[node]] && node_categories_it[node] != NC_LIST)
+        cudf::set_bit(validity, row_offsets_it[node]);
+    });
+
+  // scatter list offsets
+
+  return std::tuple{column_tree_csr{std::move(rowidx),
+                                    std::move(colidx),
+                                    std::move(csr_unique_col_ids),
+                                    std::move(csr_column_categories),
+                                    std::move(csr_col_range_begin),
+                                    std::move(csr_col_range_end)},
+                    std::move(csr_max_row_offsets)};
+}
+
+/**
+ * @brief Constructs `d_json_column` from node tree representation
+ * Newly constructed columns are insert into `root`'s children.
+ * `root` must be a list type.
+ *
+ * @param input Input JSON string device data
+ * @param tree Node tree representation of the JSON string
+ * @param col_ids Column ids of the nodes in the tree
+ * @param row_offsets Row offsets of the nodes in the tree
+ * @param root Root node of the `d_json_column` tree
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param options Parsing options specifying the parsing behaviour
+ * options affecting behaviour are
+ *   is_enabled_lines: Whether the input is a line-delimited JSON
+ *   is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the device memory
+ * of child_offets and validity members of `d_json_column`
+ */
+void make_device_json_column_csr(device_span<SymbolT const> input,
+                                 tree_meta_t& tree,
+                                 device_span<NodeIndexT> col_ids,
+                                 device_span<size_type> row_offsets,
+                                 device_json_column& root,
+                                 bool is_array_of_arrays,
+                                 cudf::io::json_reader_options const& options,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  bool const is_enabled_lines = options.is_enabled_lines();
+  auto const num_nodes        = col_ids.size();
+  rmm::device_uvector<NodeIndexT> sorted_col_ids(col_ids.size(), stream);  // make a copy
+  thrust::copy(rmm::exec_policy(stream), col_ids.begin(), col_ids.end(), sorted_col_ids.begin());
+
+  // sort by {col_id} on {node_ids} stable
+  rmm::device_uvector<NodeIndexT> node_ids(col_ids.size(), stream);
+  thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end());
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin());
+
+  NodeIndexT const row_array_parent_col_id = [&]() {
+    NodeIndexT value = parent_node_sentinel;
+    if (!col_ids.empty()) {
+      auto const list_node_index = is_enabled_lines ? 0 : 1;
+      CUDF_CUDA_TRY(cudaMemcpyAsync(&value,
+                                    col_ids.data() + list_node_index,
+                                    sizeof(NodeIndexT),
+                                    cudaMemcpyDefault,
+                                    stream.value()));
+      stream.synchronize();
+    }
+    return value;
+  }();
+
+  // 1. gather column information.
+  auto [d_column_tree, d_max_row_offsets] = reduce_to_column_tree_csr(tree,
+                                                                      col_ids,
+                                                                      sorted_col_ids,
+                                                                      node_ids,
+                                                                      row_offsets,
+                                                                      is_array_of_arrays,
+                                                                      row_array_parent_col_id,
+                                                                      options,
+                                                                      stream);
+
+  CUDF_EXPECTS(is_array_of_arrays == false, "array of arrays has not yet been implemented");
+  CUDF_EXPECTS(options.is_enabled_mixed_types_as_string() == false,
+               "mixed type as string has not yet been implemented");
+  CUDF_EXPECTS(options.is_enabled_prune_columns() == false,
+               "column pruning has not yet been implemented");
+}
+
+}  // namespace cudf::io::json::experimental::detail
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index ad807b57766..5e0d2b389ba 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -15,6 +15,7 @@
  */
 
 #include "io/utilities/hostdevice_vector.hpp"
+#include "json_utils.hpp"
 #include "nested_json.hpp"
 
 #include <cudf/detail/cuco_helpers.hpp>
@@ -33,7 +34,6 @@
 #include <rmm/exec_policy.hpp>
 #include <rmm/resource_ref.hpp>
 
-#include <cub/device/device_radix_sort.cuh>
 #include <cuco/static_set.cuh>
 #include <cuda/functional>
 #include <thrust/binary_search.h>
@@ -139,55 +139,6 @@ struct is_nested_end {
   }
 };
 
-/**
- * @brief Returns stable sorted keys and its sorted order
- *
- * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory.
- * Since the key and order is returned, using double buffer helps to avoid extra copy to user
- * provided output iterator.
- *
- * @tparam IndexType sorted order type
- * @tparam KeyType key type
- * @param keys keys to sort
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @return Sorted keys and indices producing that sorted order
- */
-template <typename IndexType = size_t, typename KeyType>
-std::pair<rmm::device_uvector<KeyType>, rmm::device_uvector<IndexType>> stable_sorted_key_order(
-  cudf::device_span<KeyType const> keys, rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-
-  // Determine temporary device storage requirements
-  rmm::device_uvector<KeyType> keys_buffer1(keys.size(), stream);
-  rmm::device_uvector<KeyType> keys_buffer2(keys.size(), stream);
-  rmm::device_uvector<IndexType> order_buffer1(keys.size(), stream);
-  rmm::device_uvector<IndexType> order_buffer2(keys.size(), stream);
-  cub::DoubleBuffer<IndexType> order_buffer(order_buffer1.data(), order_buffer2.data());
-  cub::DoubleBuffer<KeyType> keys_buffer(keys_buffer1.data(), keys_buffer2.data());
-  size_t temp_storage_bytes = 0;
-  cub::DeviceRadixSort::SortPairs(
-    nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size());
-  rmm::device_buffer d_temp_storage(temp_storage_bytes, stream);
-
-  thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin());
-  thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end());
-
-  cub::DeviceRadixSort::SortPairs(d_temp_storage.data(),
-                                  temp_storage_bytes,
-                                  keys_buffer,
-                                  order_buffer,
-                                  keys.size(),
-                                  0,
-                                  sizeof(KeyType) * 8,
-                                  stream.value());
-
-  return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1)
-                                                                : std::move(keys_buffer2),
-                   order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1)
-                                                                  : std::move(order_buffer2)};
-}
-
 /**
  * @brief Propagate parent node from first sibling to other siblings.
  *
diff --git a/cpp/src/io/json/json_utils.hpp b/cpp/src/io/json/json_utils.hpp
new file mode 100644
index 00000000000..8864bde84d8
--- /dev/null
+++ b/cpp/src/io/json/json_utils.hpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/io/detail/tokenize_json.hpp>
+#include <cudf/io/types.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/bit.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/exec_policy.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cub/device/device_radix_sort.cuh>
+#include <thrust/sequence.h>
+
+namespace cudf::io::json::detail {
+/**
+ * @brief Returns stable sorted keys and its sorted order
+ *
+ * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory.
+ * Since the key and order is returned, using double buffer helps to avoid extra copy to user
+ * provided output iterator.
+ *
+ * @tparam IndexType sorted order type
+ * @tparam KeyType key type
+ * @param keys keys to sort
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @return Sorted keys and indices producing that sorted order
+ */
+template <typename IndexType = size_t, typename KeyType>
+std::pair<rmm::device_uvector<KeyType>, rmm::device_uvector<IndexType>> stable_sorted_key_order(
+  cudf::device_span<KeyType const> keys, rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+
+  // Determine temporary device storage requirements
+  rmm::device_uvector<KeyType> keys_buffer1(keys.size(), stream);
+  rmm::device_uvector<KeyType> keys_buffer2(keys.size(), stream);
+  rmm::device_uvector<IndexType> order_buffer1(keys.size(), stream);
+  rmm::device_uvector<IndexType> order_buffer2(keys.size(), stream);
+  cub::DoubleBuffer<IndexType> order_buffer(order_buffer1.data(), order_buffer2.data());
+  cub::DoubleBuffer<KeyType> keys_buffer(keys_buffer1.data(), keys_buffer2.data());
+  size_t temp_storage_bytes = 0;
+  cub::DeviceRadixSort::SortPairs(
+    nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size());
+  rmm::device_buffer d_temp_storage(temp_storage_bytes, stream);
+
+  thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin());
+  thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end());
+
+  cub::DeviceRadixSort::SortPairs(d_temp_storage.data(),
+                                  temp_storage_bytes,
+                                  keys_buffer,
+                                  order_buffer,
+                                  keys.size(),
+                                  0,
+                                  sizeof(KeyType) * 8,
+                                  stream.value());
+
+  return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1)
+                                                                : std::move(keys_buffer2),
+                   order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1)
+                                                                  : std::move(order_buffer2)};
+}
+
+}  // namespace cudf::io::json::detail
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 20c143f66c7..457a336b165 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -23,8 +23,13 @@
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/export.hpp>
 
+#include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <thrust/sequence.h>
+
 #include <map>
 #include <vector>
 
@@ -52,24 +57,6 @@ struct tree_meta_t {
  */
 enum class json_col_t : char { ListColumn, StructColumn, StringColumn, Unknown };
 
-/**
- * @brief Enum class to specify whether we just push onto and pop from the stack or whether we also
- * reset to an empty stack on a newline character.
- */
-enum class stack_behavior_t : char {
-  /// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
-  /// from the stack
-  PushPopWithoutReset,
-
-  /// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
-  /// from the stack. Delimiter characters are passed when the stack context is constructed to
-  /// reset to an empty stack.
-  ResetOnDelimiter
-};
-
-// Default name for a list's child column
-constexpr auto list_child_name{"element"};
-
 /**
  * @brief Intermediate representation of data from a nested JSON input
  */
@@ -185,6 +172,120 @@ struct device_json_column {
   {
   }
 };
+/**
+ * @brief Holds member data pointers of `d_json_column`
+ *
+ */
+struct json_column_data {
+  using row_offset_t = json_column::row_offset_t;
+  row_offset_t* string_offsets;
+  row_offset_t* string_lengths;
+  row_offset_t* child_offsets;
+  bitmask_type* validity;
+};
+
+/**
+ * @brief Enum class to specify whether we just push onto and pop from the stack or whether we also
+ * reset to an empty stack on a newline character.
+ */
+enum class stack_behavior_t : char {
+  /// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
+  /// from the stack
+  PushPopWithoutReset,
+
+  /// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
+  /// from the stack. Delimiter characters are passed when the stack context is constructed to
+  /// reset to an empty stack.
+  ResetOnDelimiter
+};
+
+// Default name for a list's child column
+constexpr auto list_child_name{"element"};
+
+namespace experimental {
+using row_offset_t = size_type;
+/*
+ * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format.
+ */
+struct csr {
+  rmm::device_uvector<NodeIndexT> rowidx;
+  rmm::device_uvector<NodeIndexT> colidx;
+};
+
+/*
+ * @brief Auxiliary column tree properties that are required to construct the device json
+ * column subtree, but not required for the final cudf column construction.
+ */
+struct column_tree_properties {
+  rmm::device_scalar<NodeIndexT> num_levels;
+  rmm::device_uvector<NodeT> categories;
+  rmm::device_uvector<row_offset_t> max_row_offsets;
+  rmm::device_uvector<NodeIndexT> mapped_ids;
+};
+
+/*
+ * @brief Positional and validity information for subgraph extracted from column tree. The offsets
+ * for each node in the member arrays can be obtained by segmented sums of the max_row_offsets array
+ * in column_tree_properties.
+ */
+struct device_column_subtree_properties {
+  rmm::device_uvector<SymbolOffsetT> string_offsets;
+  rmm::device_uvector<SymbolOffsetT> string_lengths;
+  // Row offsets
+  rmm::device_uvector<SymbolOffsetT> child_offsets;
+  // Validity bitmap
+  rmm::device_buffer validity;
+};
+
+/*
+ * @brief Unvalidated column tree stored in Compressed Sparse Row (CSR) format. The device json
+ * column subtree - the subgraph that conforms to column tree properties - is extracted and further
+ * processed according to the JSON reader options passed. Only the final processed subgraph is
+ * annotated with information required to construct cuDF columns.
+ */
+struct column_tree {
+  // position of nnzs
+  csr adjacency;
+  // device_json_column properties
+  // Indicator array for the device column subtree
+  // Stores the number of rows in the column if the node is part of device column subtree
+  // Stores zero otherwise
+  rmm::device_uvector<row_offset_t> subtree_nrows;
+  device_column_subtree_properties d_props;
+};
+
+namespace detail {
+/**
+ * @brief Reduce node tree into column tree by aggregating each property of column.
+ *
+ * @param tree json node tree to reduce (modified in-place, but restored to original state)
+ * @param col_ids column ids of each node (modified in-place, but restored to original state)
+ * @param row_offsets row offsets of each node (modified in-place, but restored to original state)
+ * @param stream The CUDA stream to which kernels are dispatched
+ * @return A tuple containing the column tree, identifier for each column and the maximum row index
+ * in each column
+ */
+CUDF_EXPORT
+std::tuple<csr, column_tree_properties> reduce_to_column_tree(
+  tree_meta_t& tree,
+  device_span<NodeIndexT> original_col_ids,
+  device_span<size_type> row_offsets,
+  bool is_array_of_arrays,
+  NodeIndexT const row_array_parent_col_id,
+  rmm::cuda_stream_view stream);
+
+void make_device_json_column(device_span<SymbolT const> input,
+                             tree_meta_t& tree,
+                             device_span<NodeIndexT> col_ids,
+                             device_span<size_type> row_offsets,
+                             device_json_column& root,
+                             bool is_array_of_arrays,
+                             cudf::io::json_reader_options const& options,
+                             rmm::cuda_stream_view stream,
+                             rmm::device_async_resource_ref mr);
+
+}  // namespace detail
+}  // namespace experimental
 
 namespace detail {
 
@@ -285,20 +386,30 @@ get_array_children_indices(TreeDepthT row_array_children_level,
                            device_span<TreeDepthT const> node_levels,
                            device_span<NodeIndexT const> parent_node_ids,
                            rmm::cuda_stream_view stream);
+
 /**
  * @brief Reduce node tree into column tree by aggregating each property of column.
  *
- * @param tree json node tree to reduce (modified in-place, but restored to original state)
- * @param col_ids column ids of each node (modified in-place, but restored to original state)
- * @param row_offsets row offsets of each node (modified in-place, but restored to original state)
- * @param stream The CUDA stream to which kernels are dispatched
- * @return A tuple containing the column tree, identifier for each column and the maximum row index
- * in each column
+ * @param tree Node tree representation of JSON string
+ * @param original_col_ids Column ids of nodes
+ * @param sorted_col_ids Sorted column ids of nodes
+ * @param ordered_node_ids Node ids of nodes sorted by column ids
+ * @param row_offsets Row offsets of nodes
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A tuple of column tree representation of JSON string, column ids of columns, and
+ * max row offsets of columns
  */
+CUDF_EXPORT
 std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
 reduce_to_column_tree(tree_meta_t& tree,
-                      device_span<NodeIndexT> col_ids,
+                      device_span<NodeIndexT> original_col_ids,
+                      device_span<NodeIndexT> sorted_col_ids,
+                      device_span<NodeIndexT> ordered_node_ids,
                       device_span<size_type> row_offsets,
+                      bool is_array_of_arrays,
+                      NodeIndexT const row_array_parent_col_id,
                       rmm::cuda_stream_view stream);
 
 /**
@@ -311,6 +422,35 @@ reduce_to_column_tree(tree_meta_t& tree,
 cudf::io::parse_options parsing_options(cudf::io::json_reader_options const& options,
                                         rmm::cuda_stream_view stream);
 
+/**
+ * @brief Constructs `d_json_column` from node tree representation
+ * Newly constructed columns are insert into `root`'s children.
+ * `root` must be a list type.
+ *
+ * @param input Input JSON string device data
+ * @param tree Node tree representation of the JSON string
+ * @param col_ids Column ids of the nodes in the tree
+ * @param row_offsets Row offsets of the nodes in the tree
+ * @param root Root node of the `d_json_column` tree
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param options Parsing options specifying the parsing behaviour
+ * options affecting behaviour are
+ *   is_enabled_lines: Whether the input is a line-delimited JSON
+ *   is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the device memory
+ * of child_offets and validity members of `d_json_column`
+ */
+void make_device_json_column(device_span<SymbolT const> input,
+                             tree_meta_t& tree,
+                             device_span<NodeIndexT> col_ids,
+                             device_span<size_type> row_offsets,
+                             device_json_column& root,
+                             bool is_array_of_arrays,
+                             cudf::io::json_reader_options const& options,
+                             rmm::cuda_stream_view stream,
+                             rmm::device_async_resource_ref mr);
+
 /**
  * @brief Parses the given JSON string and generates table from the given input.
  *
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 4dffcb41ba2..7698c4f2cbf 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -325,6 +325,7 @@ ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
 ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
 ConfigureTest(JSON_QUOTE_NORMALIZATION io/json/json_quote_normalization_test.cpp)
 ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json/json_whitespace_normalization_test.cu)
+ConfigureTest(JSON_TREE_CSR io/json/json_tree_csr.cu)
 ConfigureTest(
   DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp
   GPUS 1
diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu
new file mode 100644
index 00000000000..bc135e041d0
--- /dev/null
+++ b/cpp/tests/io/json/json_tree_csr.cu
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "io/json/nested_json.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/random.hpp>
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/io/detail/tokenize_json.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/cuda_stream_view.hpp>
+
+#include <thrust/sort.h>
+
+#include <cassert>
+#include <string>
+
+namespace cuio_json = cudf::io::json;
+
+struct h_tree_meta_t {
+  std::vector<cuio_json::NodeT> node_categories;
+  std::vector<cuio_json::NodeIndexT> parent_node_ids;
+  std::vector<cuio_json::SymbolOffsetT> node_range_begin;
+  std::vector<cuio_json::SymbolOffsetT> node_range_end;
+};
+
+struct h_column_tree {
+  // position of nnzs
+  std::vector<cuio_json::NodeIndexT> rowidx;
+  std::vector<cuio_json::NodeIndexT> colidx;
+  // node properties
+  std::vector<cuio_json::NodeT> categories;
+  std::vector<cuio_json::NodeIndexT> column_ids;
+};
+
+template <typename T>
+void print(std::string str, std::vector<T>& vec)
+{
+  std::cout << str << " = ";
+  for (size_t i = 0; i < vec.size(); i++)
+    std::cout << vec[i] << " ";
+  std::cout << std::endl;
+}
+
+bool check_equality(cuio_json::tree_meta_t& d_a,
+                    rmm::device_uvector<cudf::size_type>& d_a_max_row_offsets,
+                    cuio_json::experimental::csr& d_b_csr,
+                    cuio_json::experimental::column_tree_properties& d_b_ctp,
+                    rmm::cuda_stream_view stream)
+{
+  // convert from tree_meta_t to column_tree_csr
+  stream.synchronize();
+
+  h_tree_meta_t a{cudf::detail::make_std_vector_async(d_a.node_categories, stream),
+                  cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream),
+                  cudf::detail::make_std_vector_async(d_a.node_range_begin, stream),
+                  cudf::detail::make_std_vector_async(d_a.node_range_end, stream)};
+
+  h_column_tree b{cudf::detail::make_std_vector_async(d_b_csr.rowidx, stream),
+                  cudf::detail::make_std_vector_async(d_b_csr.colidx, stream),
+                  cudf::detail::make_std_vector_async(d_b_ctp.categories, stream),
+                  cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)};
+
+  auto a_max_row_offsets = cudf::detail::make_std_vector_async(d_a_max_row_offsets, stream);
+  auto b_max_row_offsets = cudf::detail::make_std_vector_async(d_b_ctp.max_row_offsets, stream);
+
+  stream.synchronize();
+
+  auto num_nodes = a.parent_node_ids.size();
+  if (b.rowidx.size() != num_nodes + 1) { return false; }
+
+  for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) {
+    auto v = b.colidx[pos];
+    if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; }
+  }
+  for (size_t u = 1; u < num_nodes; u++) {
+    auto v = b.colidx[b.rowidx[u]];
+    if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; }
+    for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) {
+      v = b.colidx[pos];
+      if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; }
+    }
+  }
+  for (size_t u = 0; u < num_nodes; u++) {
+    if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; }
+  }
+  for (size_t u = 0; u < num_nodes; u++) {
+    if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; }
+  }
+  return true;
+}
+
+struct JsonColumnTreeTests : public cudf::test::BaseFixture {};
+
+TEST_F(JsonColumnTreeTests, SimpleLines)
+{
+  auto const stream = cudf::get_default_stream();
+  std::string const input =
+    R"(  {}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )";  // Prepare input & output buffers
+  cudf::string_scalar d_scalar(input, true, stream);
+  auto d_input = cudf::device_span<cuio_json::SymbolT const>{d_scalar.data(),
+                                                             static_cast<size_t>(d_scalar.size())};
+
+  cudf::io::json_reader_options options{};
+  options.enable_lines(true);
+
+  // Parse the JSON and get the token stream
+  auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream(
+    d_input, options, stream, rmm::mr::get_current_device_resource());
+
+  // Get the JSON's tree representation
+  auto gpu_tree = cuio_json::detail::get_tree_representation(
+    tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource());
+
+  auto tup =
+    cuio_json::detail::records_orient_tree_traversal(d_input,
+                                                     gpu_tree,
+                                                     false,
+                                                     options.is_enabled_lines(),
+                                                     stream,
+                                                     rmm::mr::get_current_device_resource());
+  auto& gpu_col_id      = std::get<0>(tup);
+  auto& gpu_row_offsets = std::get<1>(tup);
+
+  auto const num_nodes = gpu_col_id.size();
+  rmm::device_uvector<cudf::size_type> sorted_col_ids(gpu_col_id.size(), stream);  // make a copy
+  thrust::copy(
+    rmm::exec_policy(stream), gpu_col_id.begin(), gpu_col_id.end(), sorted_col_ids.begin());
+
+  // sort by {col_id} on {node_ids} stable
+  rmm::device_uvector<cudf::size_type> node_ids(gpu_col_id.size(), stream);
+  thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end());
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin());
+
+  cudf::size_type const row_array_parent_col_id = [&]() {
+    cudf::size_type value      = cudf::io::json::parent_node_sentinel;
+    auto const list_node_index = options.is_enabled_lines() ? 0 : 1;
+    CUDF_CUDA_TRY(cudaMemcpyAsync(&value,
+                                  gpu_col_id.data() + list_node_index,
+                                  sizeof(cudf::size_type),
+                                  cudaMemcpyDefault,
+                                  stream.value()));
+    stream.synchronize();
+    return value;
+  }();
+
+  auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] =
+    cudf::io::json::detail::reduce_to_column_tree(gpu_tree,
+                                                  gpu_col_id,
+                                                  sorted_col_ids,
+                                                  node_ids,
+                                                  gpu_row_offsets,
+                                                  false,
+                                                  row_array_parent_col_id,
+                                                  stream);
+
+  auto [d_column_tree_csr, d_column_tree_properties] =
+    cudf::io::json::experimental::detail::reduce_to_column_tree(
+      gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream);
+
+  auto iseq = check_equality(
+    d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream);
+  // assert equality between csr and meta formats
+  assert(iseq == true);
+}