diff --git a/cpp/benchmarks/t/geometry/PointCloud.cpp b/cpp/benchmarks/t/geometry/PointCloud.cpp
index 39cc5f3368a..814587320a6 100644
--- a/cpp/benchmarks/t/geometry/PointCloud.cpp
+++ b/cpp/benchmarks/t/geometry/PointCloud.cpp
@@ -69,7 +69,7 @@ void LegacyVoxelDownSample(benchmark::State& state, float voxel_size) {
 void VoxelDownSample(benchmark::State& state,
                      const core::Device& device,
                      float voxel_size,
-                     const core::HashBackendType& backend) {
+                     const std::string& reduction) {
     t::geometry::PointCloud pcd;
     // t::io::CreatePointCloudFromFile lacks support of remove_inf_points and
     // remove_nan_points
@@ -77,10 +77,10 @@ void VoxelDownSample(benchmark::State& state,
     pcd = pcd.To(device);
 
     // Warm up.
-    pcd.VoxelDownSample(voxel_size, backend);
+    pcd.VoxelDownSample(voxel_size, reduction);
 
     for (auto _ : state) {
-        pcd.VoxelDownSample(voxel_size, backend);
+        pcd.VoxelDownSample(voxel_size, reduction);
         core::cuda::Synchronize(device);
     }
 }
@@ -387,28 +387,34 @@ BENCHMARK_CAPTURE(ToLegacyPointCloud, CUDA, core::Device("CUDA:0"))
         ->Unit(benchmark::kMillisecond);
 #endif
 
-#define ENUM_VOXELSIZE(DEVICE, BACKEND)                                       \
-    BENCHMARK_CAPTURE(VoxelDownSample, BACKEND##_0_01, DEVICE, 0.01, BACKEND) \
-            ->Unit(benchmark::kMillisecond);                                  \
-    BENCHMARK_CAPTURE(VoxelDownSample, BACKEND##_0_02, DEVICE, 0.08, BACKEND) \
-            ->Unit(benchmark::kMillisecond);                                  \
-    BENCHMARK_CAPTURE(VoxelDownSample, BACKEND##_0_04, DEVICE, 0.04, BACKEND) \
-            ->Unit(benchmark::kMillisecond);                                  \
-    BENCHMARK_CAPTURE(VoxelDownSample, BACKEND##_0_08, DEVICE, 0.08, BACKEND) \
-            ->Unit(benchmark::kMillisecond);                                  \
-    BENCHMARK_CAPTURE(VoxelDownSample, BACKEND##_0_16, DEVICE, 0.16, BACKEND) \
-            ->Unit(benchmark::kMillisecond);                                  \
-    BENCHMARK_CAPTURE(VoxelDownSample, BACKEND##_0_32, DEVICE, 0.32, BACKEND) \
+#define ENUM_VOXELSIZE(DEVICE, REDUCTION)                              \
+    BENCHMARK_CAPTURE(VoxelDownSample, REDUCTION##_0_01, DEVICE, 0.01, \
+                      REDUCTION)                                       \
+            ->Unit(benchmark::kMillisecond);                           \
+    BENCHMARK_CAPTURE(VoxelDownSample, REDUCTION##_0_02, DEVICE, 0.08, \
+                      REDUCTION)                                       \
+            ->Unit(benchmark::kMillisecond);                           \
+    BENCHMARK_CAPTURE(VoxelDownSample, REDUCTION##_0_04, DEVICE, 0.04, \
+                      REDUCTION)                                       \
+            ->Unit(benchmark::kMillisecond);                           \
+    BENCHMARK_CAPTURE(VoxelDownSample, REDUCTION##_0_08, DEVICE, 0.08, \
+                      REDUCTION)                                       \
+            ->Unit(benchmark::kMillisecond);                           \
+    BENCHMARK_CAPTURE(VoxelDownSample, REDUCTION##_0_16, DEVICE, 0.16, \
+                      REDUCTION)                                       \
+            ->Unit(benchmark::kMillisecond);                           \
+    BENCHMARK_CAPTURE(VoxelDownSample, REDUCTION##_0_32, DEVICE, 0.32, \
+                      REDUCTION)                                       \
             ->Unit(benchmark::kMillisecond);
 
+const std::string kReductionMean = "mean";
 #ifdef BUILD_CUDA_MODULE
-#define ENUM_VOXELDOWNSAMPLE_BACKEND()                                  \
-    ENUM_VOXELSIZE(core::Device("CPU:0"), core::HashBackendType::TBB)   \
-    ENUM_VOXELSIZE(core::Device("CUDA:0"), core::HashBackendType::Slab) \
-    ENUM_VOXELSIZE(core::Device("CUDA:0"), core::HashBackendType::StdGPU)
+#define ENUM_VOXELDOWNSAMPLE_REDUCTION()                  \
+    ENUM_VOXELSIZE(core::Device("CPU:0"), kReductionMean) \
+    ENUM_VOXELSIZE(core::Device("CUDA:0"), kReductionMean)
 #else
-#define ENUM_VOXELDOWNSAMPLE_BACKEND() \
-    ENUM_VOXELSIZE(core::Device("CPU:0"), core::HashBackendType::TBB)
+#define ENUM_VOXELDOWNSAMPLE_REDUCTION() \
+    ENUM_VOXELSIZE(core::Device("CPU:0"), kReductionMean)
 #endif
 
 BENCHMARK_CAPTURE(LegacyVoxelDownSample, Legacy_0_01, 0.01)
@@ -423,7 +429,7 @@ BENCHMARK_CAPTURE(LegacyVoxelDownSample, Legacy_0_16, 0.16)
         ->Unit(benchmark::kMillisecond);
 BENCHMARK_CAPTURE(LegacyVoxelDownSample, Legacy_0_32, 0.32)
         ->Unit(benchmark::kMillisecond);
-ENUM_VOXELDOWNSAMPLE_BACKEND()
+ENUM_VOXELDOWNSAMPLE_REDUCTION()
 
 BENCHMARK_CAPTURE(LegacyUniformDownSample, Legacy_2, 2)
         ->Unit(benchmark::kMillisecond);
diff --git a/cpp/open3d/core/CMakeLists.txt b/cpp/open3d/core/CMakeLists.txt
index 43f0802f4d0..d42e645da39 100644
--- a/cpp/open3d/core/CMakeLists.txt
+++ b/cpp/open3d/core/CMakeLists.txt
@@ -47,6 +47,8 @@ target_sources(core PRIVATE
     kernel/BinaryEWCPU.cpp
     kernel/IndexGetSet.cpp
     kernel/IndexGetSetCPU.cpp
+    kernel/IndexReduction.cpp
+    kernel/IndexReductionCPU.cpp
     kernel/Kernel.cpp
     kernel/NonZero.cpp
     kernel/NonZeroCPU.cpp
@@ -90,6 +92,7 @@ if (BUILD_CUDA_MODULE)
         kernel/ArangeCUDA.cu
         kernel/BinaryEWCUDA.cu
         kernel/IndexGetSetCUDA.cu
+        kernel/IndexReductionCUDA.cu
         kernel/NonZeroCUDA.cu
         kernel/ReductionCUDA.cu
         kernel/UnaryEWCUDA.cu
diff --git a/cpp/open3d/core/Tensor.cpp b/cpp/open3d/core/Tensor.cpp
index 69e04acfb94..9ee485bd63a 100644
--- a/cpp/open3d/core/Tensor.cpp
+++ b/cpp/open3d/core/Tensor.cpp
@@ -22,6 +22,7 @@
 #include "open3d/core/TensorFunction.h"
 #include "open3d/core/TensorKey.h"
 #include "open3d/core/kernel/Arange.h"
+#include "open3d/core/kernel/IndexReduction.h"
 #include "open3d/core/kernel/Kernel.h"
 #include "open3d/core/linalg/Det.h"
 #include "open3d/core/linalg/Inverse.h"
@@ -955,6 +956,43 @@ void Tensor::IndexSet(const std::vector<Tensor>& index_tensors,
                      aip.GetIndexedShape(), aip.GetIndexedStrides());
 }
 
+void Tensor::IndexAdd_(int64_t dim, const Tensor& index, const Tensor& src) {
+    if (index.NumDims() != 1) {
+        utility::LogError("IndexAdd_ only supports 1D index tensors.");
+    }
+
+    // Dim check.
+    if (dim < 0) {
+        utility::LogError("IndexAdd_ only supports sum at non-negative dim.");
+    }
+    if (NumDims() <= dim) {
+        utility::LogError("Sum dim {} exceeds tensor dim {}.", dim, NumDims());
+    }
+
+    // shape check
+    if (src.NumDims() != NumDims()) {
+        utility::LogError(
+                "IndexAdd_ only supports src tensor with same dimension as "
+                "this tensor.");
+    }
+    for (int64_t d = 0; d < NumDims(); ++d) {
+        if (d != dim && src.GetShape(d) != GetShape(d)) {
+            utility::LogError(
+                    "IndexAdd_ only supports src tensor with same shape as "
+                    "this "
+                    "tensor except dim {}.",
+                    dim);
+        }
+    }
+
+    // Type check.
+    AssertTensorDtype(index, core::Int64);
+    AssertTensorDtype(*this, src.GetDtype());
+
+    // Apply kernel.
+    kernel::IndexAdd_(dim, index, src, *this);
+}
+
 Tensor Tensor::Permute(const SizeVector& dims) const {
     // Check dimension size
     if (static_cast<int64_t>(dims.size()) != NumDims()) {
diff --git a/cpp/open3d/core/Tensor.h b/cpp/open3d/core/Tensor.h
index 8b4d0280077..38422055a30 100644
--- a/cpp/open3d/core/Tensor.h
+++ b/cpp/open3d/core/Tensor.h
@@ -575,6 +575,16 @@ class Tensor : public IsDevice {
     void IndexSet(const std::vector<Tensor>& index_tensors,
                   const Tensor& src_tensor);
 
+    /// \brief Advanced in-place reduction by index.
+    ///
+    /// See
+    /// https://pytorch.org/docs/stable/generated/torch.Tensor.index_add_.html
+    ///
+    /// self[index[i]] = operator(self[index[i]], src[i]).
+    ///
+    /// Note: Only support 1D index and src tensors now.
+    void IndexAdd_(int64_t dim, const Tensor& index, const Tensor& src);
+
     /// \brief Permute (dimension shuffle) the Tensor, returns a view.
     ///
     /// \param dims The desired ordering of dimensions.
diff --git a/cpp/open3d/core/kernel/IndexReduction.cpp b/cpp/open3d/core/kernel/IndexReduction.cpp
new file mode 100644
index 00000000000..128da56e370
--- /dev/null
+++ b/cpp/open3d/core/kernel/IndexReduction.cpp
@@ -0,0 +1,49 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2023 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#include "open3d/core/kernel/IndexReduction.h"
+
+#include "open3d/utility/Logging.h"
+
+namespace open3d {
+namespace core {
+namespace kernel {
+
+void IndexAdd_(int64_t dim,
+               const Tensor& index,
+               const Tensor& src,
+               Tensor& dst) {
+    // Permute the reduction dimension to the first.
+    SizeVector permute = {};
+    for (int64_t d = 0; d <= dim; ++d) {
+        if (d == 0) {
+            permute.push_back(dim);
+        } else {
+            permute.push_back(d - 1);
+        }
+    }
+    for (int64_t d = dim + 1; d < src.NumDims(); ++d) {
+        permute.push_back(d);
+    }
+
+    auto src_permute = src.Permute(permute);
+    auto dst_permute = dst.Permute(permute);
+
+    if (dst.IsCPU()) {
+        IndexAddCPU_(dim, index, src_permute, dst_permute);
+    } else if (dst.IsCUDA()) {
+#ifdef BUILD_CUDA_MODULE
+        IndexAddCUDA_(dim, index, src_permute, dst_permute);
+#endif
+    } else {
+        utility::LogError("IndexAdd_: Unimplemented device");
+    }
+}
+
+}  // namespace kernel
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/kernel/IndexReduction.h b/cpp/open3d/core/kernel/IndexReduction.h
new file mode 100644
index 00000000000..7035b53210b
--- /dev/null
+++ b/cpp/open3d/core/kernel/IndexReduction.h
@@ -0,0 +1,36 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2023 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include "open3d/core/Tensor.h"
+#include "open3d/utility/Logging.h"
+
+namespace open3d {
+namespace core {
+namespace kernel {
+
+void IndexAdd_(int64_t dim,
+               const Tensor& index,
+               const Tensor& src,
+               Tensor& dst);
+
+void IndexAddCPU_(int64_t dim,
+                  const Tensor& index,
+                  const Tensor& src,
+                  Tensor& dst);
+
+#ifdef BUILD_CUDA_MODULE
+void IndexAddCUDA_(int64_t dim,
+                   const Tensor& index,
+                   const Tensor& src,
+                   Tensor& dst);
+#endif
+
+}  // namespace kernel
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/kernel/IndexReductionCPU.cpp b/cpp/open3d/core/kernel/IndexReductionCPU.cpp
new file mode 100644
index 00000000000..4458832ec96
--- /dev/null
+++ b/cpp/open3d/core/kernel/IndexReductionCPU.cpp
@@ -0,0 +1,79 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2023 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#include "open3d/core/Dispatch.h"
+#include "open3d/core/Indexer.h"
+#include "open3d/core/Tensor.h"
+#include "open3d/utility/Logging.h"
+
+namespace open3d {
+namespace core {
+namespace kernel {
+
+template <typename func_t>
+void LaunchIndexReductionKernel(int64_t dim,
+                                const Device& device,
+                                const Tensor& index,
+                                const Tensor& src,
+                                Tensor& dst,
+                                const func_t& element_kernel) {
+    // index: [N,], src: [N, D], dst: [M, D]
+    // In Indexer, output shape defines the actual master strides.
+    // However, in IndexAdd_, input dominates the iterations.
+    // So put dst (output) at indexer's input, and src (input) at output.
+    Indexer indexer({dst}, src, DtypePolicy::NONE);
+
+    // Index is simply a 1D contiguous tensor, with a different stride
+    // behavior to src. So use raw pointer for simplicity.
+    auto index_ptr = index.GetDataPtr<int64_t>();
+
+    int64_t broadcasting_elems = 1;
+    for (int64_t d = 1; d < src.NumDims(); ++d) {
+        broadcasting_elems *= src.GetShape(d);
+    }
+    auto element_func = [=](int64_t workload_idx) {
+        int reduction_idx = workload_idx / broadcasting_elems;
+        int broadcasting_idx = workload_idx % broadcasting_elems;
+
+        const int64_t idx = index_ptr[reduction_idx];
+        int64_t dst_idx = idx * broadcasting_elems + broadcasting_idx;
+
+        void* src_ptr = indexer.GetOutputPtr(0, workload_idx);
+        void* dst_ptr = indexer.GetInputPtr(0, dst_idx);
+        // Note input and output is switched here to adapt to the indexer
+        element_kernel(src_ptr, dst_ptr);
+    };
+
+    // TODO: check in detail
+    // No OpenMP could be faster, otherwise there would be thousands of atomics.
+    for (int64_t d = 0; d < indexer.NumWorkloads(); ++d) {
+        element_func(d);
+    }
+}
+
+template <typename scalar_t>
+static OPEN3D_HOST_DEVICE void CPUSumKernel(const void* src, void* dst) {
+    scalar_t* dst_s_ptr = static_cast<scalar_t*>(dst);
+    const scalar_t* src_s_ptr = static_cast<const scalar_t*>(src);
+    *dst_s_ptr += *src_s_ptr;
+}
+
+void IndexAddCPU_(int64_t dim,
+                  const Tensor& index,
+                  const Tensor& src,
+                  Tensor& dst) {
+    DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(src.GetDtype(), [&]() {
+        LaunchIndexReductionKernel(dim, src.GetDevice(), index, src, dst,
+                                   [](const void* src, void* dst) {
+                                       CPUSumKernel<scalar_t>(src, dst);
+                                   });
+    });
+}
+
+}  // namespace kernel
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/kernel/IndexReductionCUDA.cu b/cpp/open3d/core/kernel/IndexReductionCUDA.cu
new file mode 100644
index 00000000000..24c913a46af
--- /dev/null
+++ b/cpp/open3d/core/kernel/IndexReductionCUDA.cu
@@ -0,0 +1,82 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2023 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+#include <type_traits>
+
+#include "open3d/core/CUDAUtils.h"
+#include "open3d/core/Dispatch.h"
+#include "open3d/core/Indexer.h"
+#include "open3d/core/ParallelFor.h"
+#include "open3d/core/Tensor.h"
+#include "open3d/t/geometry/kernel/GeometryMacros.h"
+
+namespace open3d {
+namespace core {
+namespace kernel {
+
+template <typename func_t>
+void LaunchIndexReductionKernel(int64_t dim,
+                                const Device& device,
+                                const Tensor& index,
+                                const Tensor& src,
+                                Tensor& dst,
+                                const func_t& element_kernel) {
+    OPEN3D_ASSERT_HOST_DEVICE_LAMBDA(func_t);
+
+    // index: [N,], src: [N, D], dst: [M, D]
+    // In Indexer, output shape defines the actual master strides.
+    // However, in IndexAdd_, input dominates the iterations.
+    // So put dst (output) at indexer's input, and src (input) at output.
+    Indexer indexer({dst}, src, DtypePolicy::NONE);
+
+    // Index is simply a 1D contiguous tensor, with a different stride
+    // behavior to src. So use raw pointer for simplicity.
+    auto index_ptr = index.GetDataPtr<int64_t>();
+
+    int64_t broadcasting_elems = 1;
+    for (int64_t d = 1; d < src.NumDims(); ++d) {
+        broadcasting_elems *= src.GetShape(d);
+    }
+    auto element_func = [=] OPEN3D_HOST_DEVICE(int64_t workload_idx) {
+        int reduction_idx = workload_idx / broadcasting_elems;
+        int broadcasting_idx = workload_idx % broadcasting_elems;
+
+        const int64_t idx = index_ptr[reduction_idx];
+        int64_t dst_idx = idx * broadcasting_elems + broadcasting_idx;
+
+        void* src_ptr = indexer.GetOutputPtr(0, workload_idx);
+        void* dst_ptr = indexer.GetInputPtr(0, dst_idx);
+        // Note input and output is switched here to adapt to the indexer.
+        element_kernel(src_ptr, dst_ptr);
+    };
+
+    ParallelFor(device, indexer.NumWorkloads(), element_func);
+    OPEN3D_GET_LAST_CUDA_ERROR("LaunchIndexReductionKernel failed.");
+}
+
+template <typename scalar_t>
+static OPEN3D_HOST_DEVICE void CUDASumKernel(const void* src, void* dst) {
+    scalar_t* dst_s_ptr = static_cast<scalar_t*>(dst);
+    const scalar_t* src_s_ptr = static_cast<const scalar_t*>(src);
+    atomicAdd(dst_s_ptr, *src_s_ptr);
+}
+
+void IndexAddCUDA_(int64_t dim,
+                   const Tensor& index,
+                   const Tensor& src,
+                   Tensor& dst) {
+    DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(src.GetDtype(), [&]() {
+        LaunchIndexReductionKernel(
+                dim, src.GetDevice(), index, src, dst,
+                [] OPEN3D_HOST_DEVICE(const void* src, void* dst) {
+                    CUDASumKernel<scalar_t>(src, dst);
+                });
+    });
+}
+
+}  // namespace kernel
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/t/geometry/PointCloud.cpp b/cpp/open3d/t/geometry/PointCloud.cpp
index 6ec721ffa4a..0ff124075dc 100644
--- a/cpp/open3d/t/geometry/PointCloud.cpp
+++ b/cpp/open3d/t/geometry/PointCloud.cpp
@@ -275,30 +275,66 @@ PointCloud PointCloud::SelectByIndex(
     return pcd;
 }
 
-PointCloud PointCloud::VoxelDownSample(
-        double voxel_size, const core::HashBackendType &backend) const {
+PointCloud PointCloud::VoxelDownSample(double voxel_size,
+                                       const std::string &reduction) const {
     if (voxel_size <= 0) {
         utility::LogError("voxel_size must be positive.");
     }
-    core::Tensor points_voxeld = GetPointPositions() / voxel_size;
-    core::Tensor points_voxeli = points_voxeld.Floor().To(core::Int64);
-
-    core::HashSet points_voxeli_hashset(points_voxeli.GetLength(), core::Int64,
-                                        {3}, device_, backend);
-
-    core::Tensor buf_indices, masks;
-    points_voxeli_hashset.Insert(points_voxeli, buf_indices, masks);
+    if (reduction != "mean") {
+        utility::LogError("Reduction can only be 'mean' for VoxelDownSample.");
+    }
 
-    PointCloud pcd_down(GetPointPositions().GetDevice());
+    // Discretize voxels.
+    core::Tensor voxeld = GetPointPositions() / voxel_size;
+    core::Tensor voxeli = voxeld.Floor().To(core::Int64);
+
+    // Map discrete voxels to indices.
+    core::HashSet voxeli_hashset(voxeli.GetLength(), core::Int64, {3}, device_);
+
+    // Index map: (0, original_points) -> (0, unique_points).
+    core::Tensor index_map_point2voxel, masks;
+    voxeli_hashset.Insert(voxeli, index_map_point2voxel, masks);
+
+    // Insert and find are two different passes.
+    // In the insertion pass, -1/false is returned for already existing
+    // downsampled corresponding points.
+    // In the find pass, actual indices are returned corresponding downsampled
+    // points.
+    voxeli_hashset.Find(voxeli, index_map_point2voxel, masks);
+    index_map_point2voxel = index_map_point2voxel.To(core::Int64);
+
+    int64_t num_points = voxeli.GetLength();
+    int64_t num_voxels = voxeli_hashset.Size();
+
+    // Count the number of points in each voxel.
+    auto voxel_num_points =
+            core::Tensor::Zeros({num_voxels}, core::Float32, device_);
+    voxel_num_points.IndexAdd_(
+            /*dim*/ 0, index_map_point2voxel,
+            core::Tensor::Ones({num_points}, core::Float32, device_));
+
+    // Create a new point cloud.
+    PointCloud pcd_down(device_);
     for (auto &kv : point_attr_) {
-        if (kv.first == "positions") {
-            pcd_down.SetPointAttr(kv.first,
-                                  points_voxeli.IndexGet({masks}).To(
-                                          GetPointPositions().GetDtype()) *
-                                          voxel_size);
+        auto point_attr = kv.second;
+
+        std::string attr_string = kv.first;
+        auto attr_dtype = point_attr.GetDtype();
+
+        // Use float to avoid unsupported tensor types.
+        core::SizeVector attr_shape = point_attr.GetShape();
+        attr_shape[0] = num_voxels;
+        auto voxel_attr =
+                core::Tensor::Zeros(attr_shape, core::Float32, device_);
+        if (reduction == "mean") {
+            voxel_attr.IndexAdd_(0, index_map_point2voxel,
+                                 point_attr.To(core::Float32));
+            voxel_attr /= voxel_num_points.View({-1, 1});
+            voxel_attr = voxel_attr.To(attr_dtype);
         } else {
-            pcd_down.SetPointAttr(kv.first, kv.second.IndexGet({masks}));
+            utility::LogError("Unsupported reduction type {}.", reduction);
         }
+        pcd_down.SetPointAttr(attr_string, voxel_attr);
     }
 
     return pcd_down;
diff --git a/cpp/open3d/t/geometry/PointCloud.h b/cpp/open3d/t/geometry/PointCloud.h
index 68827fa489c..d7eb9ccd227 100644
--- a/cpp/open3d/t/geometry/PointCloud.h
+++ b/cpp/open3d/t/geometry/PointCloud.h
@@ -329,9 +329,9 @@ class PointCloud : public Geometry, public DrawableGeometry {
     /// \brief Downsamples a point cloud with a specified voxel size.
     ///
     /// \param voxel_size Voxel size. A positive number.
+    /// \param reduction Reduction type. Currently only support "mean".
     PointCloud VoxelDownSample(double voxel_size,
-                               const core::HashBackendType &backend =
-                                       core::HashBackendType::Default) const;
+                               const std::string &reduction = "mean") const;
 
     /// \brief Downsamples a point cloud by selecting every kth index point and
     /// its attributes.
diff --git a/cpp/pybind/t/geometry/pointcloud.cpp b/cpp/pybind/t/geometry/pointcloud.cpp
index 6ca0d68f2e1..8d10c6dae45 100644
--- a/cpp/pybind/t/geometry/pointcloud.cpp
+++ b/cpp/pybind/t/geometry/pointcloud.cpp
@@ -72,8 +72,8 @@ The attributes of the point cloud have different levels::
     # The shape must be (N, 3). The device of "positions" determines the device
     # of the point cloud.
     pcd.point.positions = o3d.core.Tensor([[0, 0, 0],
-                                              [1, 1, 1],
-                                              [2, 2, 2]], dtype, device)
+                                           [1, 1, 1],
+                                           [2, 2, 2]], dtype, device)
 
     # Common attributes: "normals", "colors".
     # Common attributes are used in built-in point cloud operations. The
@@ -82,11 +82,11 @@ The attributes of the point cloud have different levels::
     # "normals" and "colors" must have shape (N, 3) and must be on the same
     # device as the point cloud.
     pcd.point.normals = o3d.core.Tensor([[0, 0, 1],
-                                            [0, 1, 0],
-                                            [1, 0, 0]], dtype, device)
+                                         [0, 1, 0],
+                                         [1, 0, 0]], dtype, device)
     pcd.point.colors = o3d.core.Tensor([[0.0, 0.0, 0.0],
-                                            [0.1, 0.1, 0.1],
-                                            [0.2, 0.2, 0.2]], dtype, device)
+                                        [0.1, 0.1, 0.1],
+                                        [0.2, 0.2, 0.2]], dtype, device)
 
     # User-defined attributes.
     # You can also attach custom attributes. The value tensor must be on the
@@ -211,12 +211,32 @@ The attributes of the point cloud have different levels::
                    "output point cloud.");
     pointcloud.def(
             "voxel_down_sample",
-            [](const PointCloud& pointcloud, const double voxel_size) {
-                return pointcloud.VoxelDownSample(
-                        voxel_size, core::HashBackendType::Default);
+            [](const PointCloud& pointcloud, const double voxel_size,
+               const std::string& reduction) {
+                return pointcloud.VoxelDownSample(voxel_size, reduction);
             },
-            "Downsamples a point cloud with a specified voxel size.",
-            "voxel_size"_a);
+            "Downsamples a point cloud with a specified voxel size and a "
+            "reduction type.",
+            "voxel_size"_a, "reduction"_a = "mean",
+            R"doc(Downsamples a point cloud with a specified voxel size.
+
+Args:
+    voxel_size (float): The size of the voxel used to downsample the point cloud.
+
+    reduction (str): The approach to pool point properties in a voxel. Can only be "mean" at current.
+
+Return:
+    A downsampled point cloud with point properties reduced in each voxel.
+
+Example:
+
+    We will load the Eagle dataset, downsample it, and show the result::
+
+        eagle = o3d.data.EaglePointCloud()
+        pcd = o3d.t.io.read_point_cloud(eagle.path)
+        pcd_down = pcd.voxel_down_sample(voxel_size=0.05)
+        o3d.visualization.draw([{'name': 'pcd', 'geometry': pcd}, {'name': 'pcd_down', 'geometry': pcd_down}])
+    )doc");
     pointcloud.def("uniform_down_sample", &PointCloud::UniformDownSample,
                    "Downsamples a point cloud by selecting every kth index "
                    "point and its attributes.",
@@ -239,8 +259,8 @@ The attributes of the point cloud have different levels::
 sphere of a given search radius. 
 
 Args:
-    nb_points. Number of neighbor points required within the radius.
-    search_radius. Radius of the sphere.
+    nb_points: Number of neighbor points required within the radius.
+    search_radius: Radius of the sphere.
 
 Return:
     Tuple of filtered point cloud and boolean mask tensor for selected values
@@ -253,8 +273,8 @@ sphere of a given search radius.
 neighbors in average. This function is not recommended to use on GPU. 
 
 Args:
-    nb_neighbors. Number of neighbors around the target point.
-    std_ratio. Standard deviation ratio.
+    nb_neighbors: Number of neighbors around the target point.
+    std_ratio: Standard deviation ratio.
 
 Return:
     Tuple of filtered point cloud and boolean mask tensor for selected values
@@ -269,8 +289,8 @@ neighbors in average. This function is not recommended to use on GPU.
 infinite value. It also removes the corresponding attributes. 
     
 Args:
-    remove_nan. Remove NaN values from the PointCloud.
-    remove_infinite. Remove infinite values from the PointCloud.
+    remove_nan: Remove NaN values from the PointCloud.
+    remove_infinite: Remove infinite values from the PointCloud.
 
 Return:
     Tuple of filtered point cloud and boolean mask tensor for selected values
@@ -325,7 +345,7 @@ infinite value. It also removes the corresponding attributes.
             "with_normals"_a = false,
             "Factory function to create a pointcloud (with only 'points') from "
             "a depth image and a camera model.\n\n Given depth value d at (u, "
-            "v) image coordinate, the corresponding 3d point is:\n z = d / "
+            "v) image coordinate, the corresponding 3d point is:\n\n z = d / "
             "depth_scale\n\n x = (u - cx) * z / fx\n\n y = (v - cy) * z / fy");
     pointcloud.def_static(
             "create_from_rgbd_image", &PointCloud::CreateFromRGBDImage,
@@ -370,14 +390,16 @@ This is a wrapper for a CPU implementation and a copy of the point cloud data
 and resulting visible triangle mesh and indiecs will be made.
 
 Args:
-    camera_location. All points not visible from that location will be removed.
-    radius. The radius of the spherical projection.
+    camera_location: All points not visible from that location will be removed.
+
+    radius: The radius of the spherical projection.
 
 Return:
     Tuple of visible triangle mesh and indices of visible points on the same
     device as the point cloud.
 
 Example:
+
     We use armadillo mesh to compute the visible points from given camera::
 
         # Convert mesh to a point cloud and estimate dimensions.
@@ -406,15 +428,18 @@ with Noise', 1996. This is a wrapper for a CPU implementation and a copy of the
 point cloud data and resulting labels will be made.
 
 Args:
-    eps. Density parameter that is used to find neighbouring points.
-    min_points. Minimum number of points to form a cluster.
-    print_progress (default False). If 'True' the progress is visualized in the console.
+    eps: Density parameter that is used to find neighbouring points.
+
+    min_points: Minimum number of points to form a cluster.
+
+print_progress (default False): If 'True' the progress is visualized in the console.
 
 Return:
     A Tensor list of point labels on the same device as the point cloud, -1
     indicates noise according to the algorithm.
 
 Example:
+
     We use Redwood dataset for demonstration::
 
         import matplotlib.pyplot as plt
@@ -433,23 +458,25 @@ point cloud data and resulting labels will be made.
     pointcloud.def(
             "segment_plane", &PointCloud::SegmentPlane,
             "distance_threshold"_a = 0.01, "ransac_n"_a = 3,
-            "num_iterations"_a = 100, "probability"_a = 0.99999999,
+            "num_iterations"_a = 100, "probability"_a = 0.999,
             R"(Segments a plane in the point cloud using the RANSAC algorithm.
 This is a wrapper for a CPU implementation and a copy of the point cloud data and
 resulting plane model and inlier indiecs will be made.
 
 Args:
-    distance_threshold (default 0.01). Max distance a point can be from the plane
-    model, and still be considered an inlier.
-    ransac_n (default 3). Number of initial points to be considered inliers in each iteration.
-    num_iterations (default 100). Maximum number of iterations.
-    probability (default 0.99999999). Expected probability of finding the optimal plane.
+    distance_threshold (default 0.01): Max distance a point can be from the plane model, and still be considered an inlier.
+
+    ransac_n (default 3): Number of initial points to be considered inliers in each iteration.
+    num_iterations (default 100): Maximum number of iterations.
+
+    probability (default 0.999): Expected probability of finding the optimal plane.
 
 Return:
-    Tuple of the plane model ax + by + cz + d = 0 and the indices of
+    Tuple of the plane model `ax + by + cz + d = 0` and the indices of
     the plane inliers on the same device as the point cloud.
 
 Example:
+
     We use Redwood dataset to compute its plane model and inliers::
 
         sample_pcd_data = o3d.data.PCDPointCloud()
@@ -467,16 +494,11 @@ resulting plane model and inlier indiecs will be made.
             R"doc(Compute the convex hull of a triangle mesh using qhull. This runs on the CPU.
 
 Args:
-    joggle_inputs (default False). Handle precision problems by
-    randomly perturbing the input data. Set to True if perturbing the input
-    iis acceptable but you need convex simplicial output. If False,
-    neighboring facets may be merged in case of precision problems. See
-    `QHull docs <http://www.qhull.org/html/qh-impre.htm#joggle>`__ for more
-    details.
+    joggle_inputs (default False): Handle precision problems by randomly perturbing the input data. Set to True if perturbing the input is acceptable but you need convex simplicial output. If False, neighboring facets may be merged in case of precision problems. See `QHull docs <http://www.qhull.org/html/qh-impre.htm#joggle>`__ for more details.
 
 Return:
     TriangleMesh representing the convexh hull. This contains an
-    extra vertex property "point_indices" that contains the index of the
+    extra vertex property `point_indices` that contains the index of the
     corresponding vertex in the original mesh.
 
 Example:
@@ -495,9 +517,9 @@ The implementation is inspired by the PCL implementation. Reference:
 https://pointclouds.org/documentation/classpcl_1_1_boundary_estimation.html
 
 Args:
-    radius. Neighbor search radius parameter.
-    max_nn (default 30). Maximum number of neighbors to search.
-    angle_threshold (default 90.0). Angle threshold to decide if a point is on the boundary.
+    radius: Neighbor search radius parameter.
+    max_nn (default 30): Maximum number of neighbors to search.
+    angle_threshold (default 90.0): Angle threshold to decide if a point is on the boundary.
 
 Return:
     Tensor of boundary points and its boolean mask tensor.
diff --git a/cpp/tests/core/Tensor.cpp b/cpp/tests/core/Tensor.cpp
index 46ca362735c..ad377274169 100644
--- a/cpp/tests/core/Tensor.cpp
+++ b/cpp/tests/core/Tensor.cpp
@@ -1415,6 +1415,40 @@ TEST_P(TensorPermuteDevicePairs, IndexSetBroadcast) {
                                   0, 0, 0, 0, 20, 20, 20, 0, 0, 0, 0, 0}));
 }
 
+TEST_P(TensorPermuteDevices, IndexAdd_) {
+    core::Device device = GetParam();
+
+    const int tensor_size = 100;
+
+    // Test one: dst_t[np.array([0, 1, 2, 3, 4])] += np.array([1, 1, 1, 1, 1])
+    {
+        core::Tensor index =
+                core::Tensor::Arange(0, tensor_size, 1, core::Int64, device);
+        core::Tensor src =
+                core::Tensor::Zeros({tensor_size}, core::Float32, device);
+        src.IndexAdd_(
+                /*dim=*/0, index,
+                core::Tensor::Ones({tensor_size}, core::Float32, device));
+        EXPECT_TRUE(src.AllClose(
+                core::Tensor::Ones({tensor_size}, core::Float32, device)));
+    }
+
+    // Test two: dst_t[np.array([0, 0, 0, 0, 0])] += np.array([1, 1, 1, 1, 1])
+    {
+        core::Tensor index =
+                core::Tensor::Zeros({tensor_size}, core::Int64, device);
+        core::Tensor src =
+                core::Tensor::Zeros({tensor_size}, core::Float32, device);
+        src.IndexAdd_(
+                /*dim=*/0, index,
+                core::Tensor::Ones({tensor_size}, core::Float32, device));
+        EXPECT_EQ(src[0].Item<float>(), tensor_size);
+        EXPECT_TRUE(src.Slice(0, 1, tensor_size)
+                            .AllClose(core::Tensor::Zeros(
+                                    {tensor_size - 1}, core::Float32, device)));
+    }
+}
+
 TEST_P(TensorPermuteDevices, Permute) {
     core::Device device = GetParam();
 
diff --git a/cpp/tests/t/geometry/PointCloud.cpp b/cpp/tests/t/geometry/PointCloud.cpp
index 1134a33c472..0b4d7365de6 100644
--- a/cpp/tests/t/geometry/PointCloud.cpp
+++ b/cpp/tests/t/geometry/PointCloud.cpp
@@ -901,7 +901,7 @@ TEST_P(PointCloudPermuteDevices, VoxelDownSample) {
                                       device));
     auto pcd_small_down = pcd_small.VoxelDownSample(1);
     EXPECT_TRUE(pcd_small_down.GetPointPositions().AllClose(
-            core::Tensor::Init<float>({{0, 0, 0}}, device)));
+            core::Tensor::Init<float>({{0.375, 0.375, 0.575}}, device)));
 }
 
 TEST_P(PointCloudPermuteDevices, UniformDownSample) {
diff --git a/python/test/t/geometry/test_pointcloud.py b/python/test/t/geometry/test_pointcloud.py
index 87db1ff7dd4..e6c194477ca 100644
--- a/python/test/t/geometry/test_pointcloud.py
+++ b/python/test/t/geometry/test_pointcloud.py
@@ -161,7 +161,7 @@ def test_member_functions(device):
 
     pcd_small_down = pcd.voxel_down_sample(1)
     assert pcd_small_down.point.positions.allclose(
-        o3c.Tensor([[0, 0, 0]], dtype, device))
+        o3c.Tensor([[0.375, 0.375, 0.575]], dtype, device))
 
 
 def test_extrude_rotation():