isl-org · ManuCorrea · Jan 31, 2026 · Jan 31, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -72,6 +72,8 @@
 -   Download tarballs instead of Git repos for "3rdparty/uvatlas" (PR #7371)
 -   macOS x86_64 not longer supported, only macOS arm64 is supported.
 -   Python 3.13+3.14 support
+-   Fix color artifacts in PointCloud projection due to CUDA race condition [(PR #7424)](https://github.com/isl-org/Open3D/pull/7424)
+
 
 
 ## 0.13

diff --git a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu
@@ -30,6 +30,18 @@ void ProjectCUDA(
     const float* point_colors_ptr =
             has_colors ? colors.value().get().GetDataPtr<float>() : nullptr;
 
+    // 1. Determine image size
+    const int width = depth.GetShape(1);
+    const int height = depth.GetShape(0);
+
+    // buffer for id and depth
+    core::Tensor index_buffer = core::Tensor::Full(
+            {height, width},
+            0xFFFFFFFFFFFFFFFF,  // The largest possible 64-bit number
+            core::UInt64, depth.GetDevice());
+
+    uint64_t* index_buffer_ptr = index_buffer.GetDataPtr<uint64_t>();
+
     TransformIndexer transform_indexer(intrinsics, extrinsics, 1.0f);
     NDArrayIndexer depth_indexer(depth, 2);
 
@@ -60,13 +72,27 @@ void ProjectCUDA(
                 if (d_old > 0) {
                     atomicMinf(depth_ptr, d_old);
                 }
+
+                int64_t row = static_cast<int64_t>(v);
+                int64_t col = static_cast<int64_t>(u);
+
+                // Get the specific address for this pixel (u, v)
+                uint64_t* pixel_address =
+                        index_buffer_ptr + (row * width + col);
+
+                // Prepare the packed value of id and depth
+                uint32_t d_as_uint = __float_as_uint(d);
+                uint64_t val = (static_cast<uint64_t>(d_as_uint) << 32) |
+                               (uint32_t)workload_idx;
+
+                atomicMin(reinterpret_cast<unsigned long long*>(pixel_address),
+                          static_cast<unsigned long long>(val));
             });
 
     // Pass 2: color map
     if (!has_colors) return;
 
     NDArrayIndexer color_indexer(image_colors.value().get(), 2);
-    float precision_bound = depth_scale * 1e-4;
     core::ParallelFor(
             depth.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
                 float x = points_ptr[3 * workload_idx + 0];
@@ -79,17 +105,22 @@ void ProjectCUDA(
 
                 // coordinate in image (in pixel)
                 transform_indexer.Project(xc, yc, zc, &u, &v);
+                u = round(u);
+                v = round(v);
+
                 if (!depth_indexer.InBoundary(u, v) || zc <= 0 ||
                     zc > depth_max) {
                     return;
                 }
 
-                float dmap = *depth_indexer.GetDataPtr<float>(
-                        static_cast<int64_t>(u), static_cast<int64_t>(v));
-                float d = zc * depth_scale;
-                if (d < dmap + precision_bound) {
-                    float* color_ptr = color_indexer.GetDataPtr<float>(
-                            static_cast<int64_t>(u), static_cast<int64_t>(v));
+                int64_t pu = static_cast<int64_t>(u),
+                        pv = static_cast<int64_t>(v);
+                uint64_t final_val = index_buffer_ptr[pv * width + pu];
+                uint32_t winning_idx =
+                        static_cast<uint32_t>(final_val & 0xFFFFFFFF);
+
+                if (winning_idx == (uint32_t)workload_idx) {
+                    float* color_ptr = color_indexer.GetDataPtr<float>(u, v);
                     color_ptr[0] = point_colors_ptr[3 * workload_idx + 0];
                     color_ptr[1] = point_colors_ptr[3 * workload_idx + 1];
                     color_ptr[2] = point_colors_ptr[3 * workload_idx + 2];

diff --git a/cpp/tests/t/geometry/PointCloud.cpp b/cpp/tests/t/geometry/PointCloud.cpp
@@ -822,6 +822,62 @@ TEST_P(PointCloudPermuteDevices, CreateFromRGBDOrDepthImageWithNormals) {
     EXPECT_TRUE(pcd_out.GetPointNormals().AllClose(t_normal_ref));
 }
 
+TEST_P(PointCloudPermuteDevices, ProjectToDepthImage) {
+    core::Device device = GetParam();
+    if (device.IsSYCL()) GTEST_SKIP() << "Not Implemented!";
+
+    const int width = 8, height = 8;
+    float depth_scale = 1.f, depth_max = 10.f;
+    core::Tensor positions = core::Tensor::Init<float>(
+            {{0.f, 0.f, 1.f}, {0.1f, 0.1f, 1.f}}, device);
+    t::geometry::PointCloud pcd(positions);
+    core::Tensor intrinsics = core::Tensor::Init<double>(
+            {{10., 0., 4.}, {0., 10., 4.}, {0., 0., 1.}}, device);
+    core::Tensor extrinsics = core::Tensor::Eye(4, core::Float64, device);
+
+    t::geometry::Image depth_img = pcd.ProjectToDepthImage(
+            width, height, intrinsics, extrinsics, depth_scale, depth_max);
+
+    EXPECT_EQ(depth_img.AsTensor().GetShape(),
+              core::SizeVector({height, width, 1}));
+    std::vector<float> depth_flat = depth_img.AsTensor().ToFlatVector<float>();
+    EXPECT_GT(*std::max_element(depth_flat.begin(), depth_flat.end()), 0.f);
+}
+
+TEST_P(PointCloudPermuteDevices, ProjectToRGBDImage) {
+    using ::testing::ElementsAre;
+
+    core::Device device = GetParam();
+    if (device.IsSYCL()) GTEST_SKIP() << "Not Implemented!";
+
+    const int width = 8, height = 8;
+    float depth_scale = 1.f, depth_max = 10.f;
+    core::Tensor positions = core::Tensor::Init<float>(
+            {{0.f, 0.f, 1.f}, {0.1f, 0.f, 1.f}, {0.f, 0.1f, 1.f}}, device);
+    core::Tensor colors = core::Tensor::Init<float>(
+            {{1.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}, device);
+    t::geometry::PointCloud pcd(device);
+    pcd.SetPointPositions(positions);
+    pcd.SetPointColors(colors);
+    core::Tensor intrinsics = core::Tensor::Init<double>(
+            {{10., 0., 4.}, {0., 10., 4.}, {0., 0., 1.}}, device);
+    core::Tensor extrinsics = core::Tensor::Eye(4, core::Float64, device);
+
+    t::geometry::RGBDImage rgbd = pcd.ProjectToRGBDImage(
+            width, height, intrinsics, extrinsics, depth_scale, depth_max);
+
+    EXPECT_THAT(rgbd.depth_.AsTensor().GetShape(),
+                ElementsAre(height, width, 1));
+    EXPECT_THAT(rgbd.color_.AsTensor().GetShape(),
+                ElementsAre(height, width, 3));
+    std::vector<float> depth_flat =
+            rgbd.depth_.AsTensor().ToFlatVector<float>();
+    std::vector<float> color_flat =
+            rgbd.color_.AsTensor().ToFlatVector<float>();
+    EXPECT_GT(*std::max_element(depth_flat.begin(), depth_flat.end()), 0.f);
+    EXPECT_GT(*std::max_element(color_flat.begin(), color_flat.end()), 0.f);
+}
+
 TEST_P(PointCloudPermuteDevices, SelectByMask) {
     core::Device device = GetParam();
 

diff --git a/python/test/t/geometry/test_pointcloud.py b/python/test/t/geometry/test_pointcloud.py
@@ -214,3 +214,108 @@ def test_metrics():
         metrics.cpu().numpy(),
         (0.22436734, np.sqrt(3) / 10, 100. / 8, 400. / 8, 700. / 8, 100.),
         rtol=1e-6)
+
+
+@pytest.mark.parametrize("device", list_devices())
+def test_project_to_depth_image(device):
+    """Project point cloud to depth image; check shape and non-empty depth."""
+    dtype = o3c.float32
+    width, height = 8, 8
+    # Points in front of camera (z > 0): (0, 0, 1) and (0.1, 0.1, 1) project
+    # with intrinsics fx=fy=10, cx=cy=4 to pixel (4,4) and (5,5) approx.
+    positions = o3c.Tensor([[0.0, 0.0, 1.0], [0.1, 0.1, 1.0]], dtype, device)
+    pcd = o3d.t.geometry.PointCloud(positions)
+    intrinsics = o3c.Tensor([[10.0, 0, 4.0], [0, 10.0, 4.0], [0, 0, 1.0]],
+                            o3c.float64)
+    extrinsics = o3c.Tensor(np.eye(4), o3c.float64)
+
+    depth_img = pcd.project_to_depth_image(width,
+                                           height,
+                                           intrinsics,
+                                           extrinsics,
+                                           depth_scale=1.0,
+                                           depth_max=10.0)
+
+    depth_tensor = depth_img.as_tensor()
+    assert depth_tensor.shape == (height, width, 1)
+    depth_np = depth_tensor.cpu().numpy()
+    assert depth_np.shape == (height, width, 1)
+    # At least one pixel should have depth (points project into image)
+    assert np.any(depth_np > 0)
+
+
+@pytest.mark.parametrize("device", list_devices())
+def test_project_to_rgbd_image(device):
+    """Project colored point cloud to RGBD image; check shapes and content."""
+    dtype = o3c.float32
+    width, height = 8, 8
+    positions = o3c.Tensor([[0.0, 0.0, 1.0], [0.1, 0.0, 1.0], [0.0, 0.1, 1.0]],
+                           dtype, device)
+    colors = o3c.Tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
+                        dtype, device)
+    pcd = o3d.t.geometry.PointCloud(positions)
+    pcd.point.colors = colors
+    pcd = pcd.to(device)
+    intrinsics = o3c.Tensor([[10.0, 0, 4.0], [0, 10.0, 4.0], [0, 0, 1.0]],
+                            o3c.float64)
+    extrinsics = o3c.Tensor(np.eye(4), o3c.float64)
+
+    rgbd = pcd.project_to_rgbd_image(width,
+                                     height,
+                                     intrinsics,
+                                     extrinsics,
+                                     depth_scale=1.0,
+                                     depth_max=10.0)
+
+    assert rgbd.depth.as_tensor().shape == (height, width, 1)
+    assert rgbd.color.as_tensor().shape == (height, width, 3)
+    depth_np = rgbd.depth.as_tensor().cpu().numpy()
+    color_np = rgbd.color.as_tensor().cpu().numpy()
+    assert np.any(depth_np > 0), "depth should have at least one hit"
+    # Where depth > 0, color should not be all zeros (no black-artifact pixels)
+    hit_mask = (depth_np.squeeze(-1) > 0).astype(bool)
+    hit_colors = color_np[hit_mask]
+    assert hit_colors.size > 0
+    assert np.any(hit_colors > 0), "projected pixels should have non-zero color"
+
+
+@pytest.mark.parametrize("device", list_devices())
+def test_project_to_rgbd_image_cpu_cuda_consistent(device):
+    """When both CPU and CUDA are available, RGBD projection should match."""
+    if o3c.cuda.device_count() == 0:
+        pytest.skip("CUDA not available")
+    width, height = 16, 16
+    np.random.seed(42)
+    n = 50
+    positions_np = np.random.randn(n, 3).astype(np.float32) * 0.2
+    positions_np[:, 2] = 1.0 + np.abs(positions_np[:, 2])  # z in [1, ~2]
+    colors_np = np.random.rand(n, 3).astype(np.float32)
+    intrinsics = o3c.Tensor([[20.0, 0, 8.0], [0, 20.0, 8.0], [0, 0, 1.0]],
+                            o3c.float64)
+    extrinsics = o3c.Tensor(np.eye(4), o3c.float64)
+
+    pcd_cpu = o3d.t.geometry.PointCloud(o3c.Tensor(positions_np))
+    pcd_cpu.point.colors = o3c.Tensor(colors_np)
+    rgbd_cpu = pcd_cpu.project_to_rgbd_image(width,
+                                             height,
+                                             intrinsics,
+                                             extrinsics,
+                                             depth_scale=1.0,
+                                             depth_max=5.0)
+
+    pcd_cuda = pcd_cpu.to(o3c.Device("CUDA:0"))
+    rgbd_cuda = pcd_cuda.project_to_rgbd_image(width,
+                                               height,
+                                               intrinsics,
+                                               extrinsics,
+                                               depth_scale=1.0,
+                                               depth_max=5.0)
+
+    np.testing.assert_allclose(rgbd_cpu.depth.as_tensor().cpu().numpy(),
+                               rgbd_cuda.depth.as_tensor().cpu().numpy(),
+                               rtol=1e-5,
+                               atol=1e-5)
+    np.testing.assert_allclose(rgbd_cpu.color.as_tensor().cpu().numpy(),
+                               rgbd_cuda.color.as_tensor().cpu().numpy(),
+                               rtol=1e-5,
+                               atol=1e-5)