From 26b39e09dbe37a73351f3134b50f7c8e26cbca0d Mon Sep 17 00:00:00 2001 From: yo-torre Date: Sat, 31 Jan 2026 22:51:54 +0100 Subject: [PATCH 1/5] Added index buffer to avoid race condition in pass 2 --- .../t/geometry/kernel/PointCloudCUDA.cu | 42 +++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu index 42c82ddddc4..acf91caad21 100644 --- a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu +++ b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu @@ -30,6 +30,20 @@ void ProjectCUDA( const float* point_colors_ptr = has_colors ? colors.value().get().GetDataPtr() : nullptr; + // 1. Determine image size + const int width = depth.GetShape(1); + const int height = depth.GetShape(0); + + // buffer for id and depth + core::Tensor index_buffer = core::Tensor::Full( + {height, width}, + 0xFFFFFFFFFFFFFFFF, // The largest possible 64-bit number + core::UInt64, + depth.GetDevice() + ); + + uint64_t* index_buffer_ptr = index_buffer.GetDataPtr(); + TransformIndexer transform_indexer(intrinsics, extrinsics, 1.0f); NDArrayIndexer depth_indexer(depth, 2); @@ -60,13 +74,25 @@ void ProjectCUDA( if (d_old > 0) { atomicMinf(depth_ptr, d_old); } + + int64_t row = static_cast(v); + int64_t col = static_cast(u); + + // Get the specific address for this pixel (u, v) + uint64_t* pixel_address = index_buffer_ptr + (row * width + col); + + // Prepare the packed value of id and depth + uint32_t d_as_uint = __float_as_uint(d); + uint64_t val = (static_cast(d_as_uint) << 32) | (uint32_t)workload_idx; + + atomicMin(reinterpret_cast(pixel_address), + static_cast(val)); }); // Pass 2: color map if (!has_colors) return; NDArrayIndexer color_indexer(image_colors.value().get(), 2); - float precision_bound = depth_scale * 1e-4; core::ParallelFor( depth.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) { float x = points_ptr[3 * workload_idx + 0]; @@ -79,17 +105,19 @@ void ProjectCUDA( // coordinate in image (in pixel) transform_indexer.Project(xc, yc, zc, &u, &v); + u = round(u); v = round(v); + if (!depth_indexer.InBoundary(u, v) || zc <= 0 || zc > depth_max) { return; } - float dmap = *depth_indexer.GetDataPtr( - static_cast(u), static_cast(v)); - float d = zc * depth_scale; - if (d < dmap + precision_bound) { - float* color_ptr = color_indexer.GetDataPtr( - static_cast(u), static_cast(v)); + int64_t pu = static_cast(u), pv = static_cast(v); + uint64_t final_val = index_buffer_ptr[pv * width + pu]; + uint32_t winning_idx = static_cast(final_val & 0xFFFFFFFF); + + if (winning_idx == (uint32_t)workload_idx) { + float* color_ptr = color_indexer.GetDataPtr(u, v); color_ptr[0] = point_colors_ptr[3 * workload_idx + 0]; color_ptr[1] = point_colors_ptr[3 * workload_idx + 1]; color_ptr[2] = point_colors_ptr[3 * workload_idx + 2]; From 44771177da0c44504fa269cfd7fac0b002ca86ca Mon Sep 17 00:00:00 2001 From: yo-torre Date: Sat, 31 Jan 2026 23:05:36 +0100 Subject: [PATCH 2/5] Applied Open3D code style --- .../t/geometry/kernel/PointCloudCUDA.cu | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu index acf91caad21..56c95712d06 100644 --- a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu +++ b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu @@ -36,11 +36,9 @@ void ProjectCUDA( // buffer for id and depth core::Tensor index_buffer = core::Tensor::Full( - {height, width}, - 0xFFFFFFFFFFFFFFFF, // The largest possible 64-bit number - core::UInt64, - depth.GetDevice() - ); + {height, width}, + 0xFFFFFFFFFFFFFFFF, // The largest possible 64-bit number + core::UInt64, depth.GetDevice()); uint64_t* index_buffer_ptr = index_buffer.GetDataPtr(); @@ -79,14 +77,16 @@ void ProjectCUDA( int64_t col = static_cast(u); // Get the specific address for this pixel (u, v) - uint64_t* pixel_address = index_buffer_ptr + (row * width + col); + uint64_t* pixel_address = + index_buffer_ptr + (row * width + col); // Prepare the packed value of id and depth uint32_t d_as_uint = __float_as_uint(d); - uint64_t val = (static_cast(d_as_uint) << 32) | (uint32_t)workload_idx; + uint64_t val = (static_cast(d_as_uint) << 32) | + (uint32_t)workload_idx; atomicMin(reinterpret_cast(pixel_address), - static_cast(val)); + static_cast(val)); }); // Pass 2: color map @@ -105,16 +105,19 @@ void ProjectCUDA( // coordinate in image (in pixel) transform_indexer.Project(xc, yc, zc, &u, &v); - u = round(u); v = round(v); + u = round(u); + v = round(v); if (!depth_indexer.InBoundary(u, v) || zc <= 0 || zc > depth_max) { return; } - int64_t pu = static_cast(u), pv = static_cast(v); + int64_t pu = static_cast(u), + pv = static_cast(v); uint64_t final_val = index_buffer_ptr[pv * width + pu]; - uint32_t winning_idx = static_cast(final_val & 0xFFFFFFFF); + uint32_t winning_idx = + static_cast(final_val & 0xFFFFFFFF); if (winning_idx == (uint32_t)workload_idx) { float* color_ptr = color_indexer.GetDataPtr(u, v); From a36d6482c43c68a39b4a8476b2bd620a241412a5 Mon Sep 17 00:00:00 2001 From: yo-torre Date: Mon, 2 Feb 2026 22:11:42 +0100 Subject: [PATCH 3/5] Added python project_to_depth_image test --- python/test/t/geometry/test_pointcloud.py | 94 +++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/python/test/t/geometry/test_pointcloud.py b/python/test/t/geometry/test_pointcloud.py index 74f5d5f27c0..5beac461443 100644 --- a/python/test/t/geometry/test_pointcloud.py +++ b/python/test/t/geometry/test_pointcloud.py @@ -214,3 +214,97 @@ def test_metrics(): metrics.cpu().numpy(), (0.22436734, np.sqrt(3) / 10, 100. / 8, 400. / 8, 700. / 8, 100.), rtol=1e-6) + + +@pytest.mark.parametrize("device", list_devices()) +def test_project_to_depth_image(device): + """Project point cloud to depth image; check shape and non-empty depth.""" + dtype = o3c.float32 + width, height = 8, 8 + # Points in front of camera (z > 0): (0, 0, 1) and (0.1, 0.1, 1) project + # with intrinsics fx=fy=10, cx=cy=4 to pixel (4,4) and (5,5) approx. + positions = o3c.Tensor([[0.0, 0.0, 1.0], [0.1, 0.1, 1.0]], dtype, device) + pcd = o3d.t.geometry.PointCloud(positions) + intrinsics = o3c.Tensor([[10.0, 0, 4.0], [0, 10.0, 4.0], [0, 0, 1.0]], + o3c.float64) + extrinsics = o3c.Tensor(np.eye(4), o3c.float64) + + depth_img = pcd.project_to_depth_image( + width, height, intrinsics, extrinsics, + depth_scale=1.0, depth_max=10.0) + + depth_tensor = depth_img.as_tensor() + assert depth_tensor.shape == (height, width, 1) + depth_np = depth_tensor.cpu().numpy() + assert depth_np.shape == (height, width, 1) + # At least one pixel should have depth (points project into image) + assert np.any(depth_np > 0) + + +@pytest.mark.parametrize("device", list_devices()) +def test_project_to_rgbd_image(device): + """Project colored point cloud to RGBD image; check shapes and content.""" + dtype = o3c.float32 + width, height = 8, 8 + positions = o3c.Tensor( + [[0.0, 0.0, 1.0], [0.1, 0.0, 1.0], [0.0, 0.1, 1.0]], dtype, device) + colors = o3c.Tensor( + [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], dtype, device) + pcd = o3d.t.geometry.PointCloud(positions) + pcd.point.colors = colors + pcd = pcd.to(device) + intrinsics = o3c.Tensor([[10.0, 0, 4.0], [0, 10.0, 4.0], [0, 0, 1.0]], + o3c.float64) + extrinsics = o3c.Tensor(np.eye(4), o3c.float64) + + rgbd = pcd.project_to_rgbd_image( + width, height, intrinsics, extrinsics, + depth_scale=1.0, depth_max=10.0) + + assert rgbd.depth.as_tensor().shape == (height, width, 1) + assert rgbd.color.as_tensor().shape == (height, width, 3) + depth_np = rgbd.depth.as_tensor().cpu().numpy() + color_np = rgbd.color.as_tensor().cpu().numpy() + assert np.any(depth_np > 0), "depth should have at least one hit" + # Where depth > 0, color should not be all zeros (no black-artifact pixels) + hit_mask = (depth_np.squeeze(-1) > 0).astype(bool) + hit_colors = color_np[hit_mask] + assert hit_colors.size > 0 + assert np.any(hit_colors > 0), "projected pixels should have non-zero color" + + +@pytest.mark.parametrize("device", list_devices()) +def test_project_to_rgbd_image_cpu_cuda_consistent(device): + """When both CPU and CUDA are available, RGBD projection should match.""" + if o3c.cuda.device_count() == 0: + pytest.skip("CUDA not available") + dtype = o3c.float32 + width, height = 16, 16 + np.random.seed(42) + n = 50 + positions_np = np.random.randn(n, 3).astype(np.float32) * 0.2 + positions_np[:, 2] = 1.0 + np.abs(positions_np[:, 2]) # z in [1, ~2] + colors_np = np.random.rand(n, 3).astype(np.float32) + intrinsics = o3c.Tensor([[20.0, 0, 8.0], [0, 20.0, 8.0], [0, 0, 1.0]], + o3c.float64) + extrinsics = o3c.Tensor(np.eye(4), o3c.float64) + + pcd_cpu = o3d.t.geometry.PointCloud(o3c.Tensor(positions_np)) + pcd_cpu.point.colors = o3c.Tensor(colors_np) + rgbd_cpu = pcd_cpu.project_to_rgbd_image( + width, height, intrinsics, extrinsics, + depth_scale=1.0, depth_max=5.0) + + pcd_cuda = pcd_cpu.to(o3c.Device("CUDA:0")) + rgbd_cuda = pcd_cuda.project_to_rgbd_image( + width, height, intrinsics, extrinsics, + depth_scale=1.0, depth_max=5.0) + + np.testing.assert_allclose( + rgbd_cpu.depth.as_tensor().cpu().numpy(), + rgbd_cuda.depth.as_tensor().cpu().numpy(), + rtol=1e-5, atol=1e-5) + np.testing.assert_allclose( + rgbd_cpu.color.as_tensor().cpu().numpy(), + rgbd_cuda.color.as_tensor().cpu().numpy(), + rtol=1e-5, atol=1e-5) From 5b97c3e8e2f2afa59959da627a73b8bf006f3ae7 Mon Sep 17 00:00:00 2001 From: yo-torre Date: Mon, 2 Feb 2026 22:13:06 +0100 Subject: [PATCH 4/5] Correct python styling --- python/test/t/geometry/test_pointcloud.py | 62 ++++++++++++++--------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/python/test/t/geometry/test_pointcloud.py b/python/test/t/geometry/test_pointcloud.py index 5beac461443..1c5e108e5f9 100644 --- a/python/test/t/geometry/test_pointcloud.py +++ b/python/test/t/geometry/test_pointcloud.py @@ -229,9 +229,12 @@ def test_project_to_depth_image(device): o3c.float64) extrinsics = o3c.Tensor(np.eye(4), o3c.float64) - depth_img = pcd.project_to_depth_image( - width, height, intrinsics, extrinsics, - depth_scale=1.0, depth_max=10.0) + depth_img = pcd.project_to_depth_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=10.0) depth_tensor = depth_img.as_tensor() assert depth_tensor.shape == (height, width, 1) @@ -246,10 +249,10 @@ def test_project_to_rgbd_image(device): """Project colored point cloud to RGBD image; check shapes and content.""" dtype = o3c.float32 width, height = 8, 8 - positions = o3c.Tensor( - [[0.0, 0.0, 1.0], [0.1, 0.0, 1.0], [0.0, 0.1, 1.0]], dtype, device) - colors = o3c.Tensor( - [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], dtype, device) + positions = o3c.Tensor([[0.0, 0.0, 1.0], [0.1, 0.0, 1.0], [0.0, 0.1, 1.0]], + dtype, device) + colors = o3c.Tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + dtype, device) pcd = o3d.t.geometry.PointCloud(positions) pcd.point.colors = colors pcd = pcd.to(device) @@ -257,9 +260,12 @@ def test_project_to_rgbd_image(device): o3c.float64) extrinsics = o3c.Tensor(np.eye(4), o3c.float64) - rgbd = pcd.project_to_rgbd_image( - width, height, intrinsics, extrinsics, - depth_scale=1.0, depth_max=10.0) + rgbd = pcd.project_to_rgbd_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=10.0) assert rgbd.depth.as_tensor().shape == (height, width, 1) assert rgbd.color.as_tensor().shape == (height, width, 3) @@ -291,20 +297,26 @@ def test_project_to_rgbd_image_cpu_cuda_consistent(device): pcd_cpu = o3d.t.geometry.PointCloud(o3c.Tensor(positions_np)) pcd_cpu.point.colors = o3c.Tensor(colors_np) - rgbd_cpu = pcd_cpu.project_to_rgbd_image( - width, height, intrinsics, extrinsics, - depth_scale=1.0, depth_max=5.0) + rgbd_cpu = pcd_cpu.project_to_rgbd_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=5.0) pcd_cuda = pcd_cpu.to(o3c.Device("CUDA:0")) - rgbd_cuda = pcd_cuda.project_to_rgbd_image( - width, height, intrinsics, extrinsics, - depth_scale=1.0, depth_max=5.0) - - np.testing.assert_allclose( - rgbd_cpu.depth.as_tensor().cpu().numpy(), - rgbd_cuda.depth.as_tensor().cpu().numpy(), - rtol=1e-5, atol=1e-5) - np.testing.assert_allclose( - rgbd_cpu.color.as_tensor().cpu().numpy(), - rgbd_cuda.color.as_tensor().cpu().numpy(), - rtol=1e-5, atol=1e-5) + rgbd_cuda = pcd_cuda.project_to_rgbd_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=5.0) + + np.testing.assert_allclose(rgbd_cpu.depth.as_tensor().cpu().numpy(), + rgbd_cuda.depth.as_tensor().cpu().numpy(), + rtol=1e-5, + atol=1e-5) + np.testing.assert_allclose(rgbd_cpu.color.as_tensor().cpu().numpy(), + rgbd_cuda.color.as_tensor().cpu().numpy(), + rtol=1e-5, + atol=1e-5) From d1ee6904ce3dd57d40fa80b84cc5ef04a96f3d26 Mon Sep 17 00:00:00 2001 From: yo-torre Date: Mon, 2 Feb 2026 22:38:29 +0100 Subject: [PATCH 5/5] Added cpp tests and CHANGELOG --- CHANGELOG.md | 2 + cpp/tests/t/geometry/PointCloud.cpp | 56 +++++++++++++++++++++++ python/test/t/geometry/test_pointcloud.py | 1 - 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2af40328b26..697bf954e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,8 @@ - Download tarballs instead of Git repos for "3rdparty/uvatlas" (PR #7371) - macOS x86_64 not longer supported, only macOS arm64 is supported. - Python 3.13+3.14 support +- Fix color artifacts in PointCloud projection due to CUDA race condition [(PR #7424)](https://github.com/isl-org/Open3D/pull/7424) + ## 0.13 diff --git a/cpp/tests/t/geometry/PointCloud.cpp b/cpp/tests/t/geometry/PointCloud.cpp index 38e6c555786..2ef3d2b9ee3 100644 --- a/cpp/tests/t/geometry/PointCloud.cpp +++ b/cpp/tests/t/geometry/PointCloud.cpp @@ -822,6 +822,62 @@ TEST_P(PointCloudPermuteDevices, CreateFromRGBDOrDepthImageWithNormals) { EXPECT_TRUE(pcd_out.GetPointNormals().AllClose(t_normal_ref)); } +TEST_P(PointCloudPermuteDevices, ProjectToDepthImage) { + core::Device device = GetParam(); + if (device.IsSYCL()) GTEST_SKIP() << "Not Implemented!"; + + const int width = 8, height = 8; + float depth_scale = 1.f, depth_max = 10.f; + core::Tensor positions = core::Tensor::Init( + {{0.f, 0.f, 1.f}, {0.1f, 0.1f, 1.f}}, device); + t::geometry::PointCloud pcd(positions); + core::Tensor intrinsics = core::Tensor::Init( + {{10., 0., 4.}, {0., 10., 4.}, {0., 0., 1.}}, device); + core::Tensor extrinsics = core::Tensor::Eye(4, core::Float64, device); + + t::geometry::Image depth_img = pcd.ProjectToDepthImage( + width, height, intrinsics, extrinsics, depth_scale, depth_max); + + EXPECT_EQ(depth_img.AsTensor().GetShape(), + core::SizeVector({height, width, 1})); + std::vector depth_flat = depth_img.AsTensor().ToFlatVector(); + EXPECT_GT(*std::max_element(depth_flat.begin(), depth_flat.end()), 0.f); +} + +TEST_P(PointCloudPermuteDevices, ProjectToRGBDImage) { + using ::testing::ElementsAre; + + core::Device device = GetParam(); + if (device.IsSYCL()) GTEST_SKIP() << "Not Implemented!"; + + const int width = 8, height = 8; + float depth_scale = 1.f, depth_max = 10.f; + core::Tensor positions = core::Tensor::Init( + {{0.f, 0.f, 1.f}, {0.1f, 0.f, 1.f}, {0.f, 0.1f, 1.f}}, device); + core::Tensor colors = core::Tensor::Init( + {{1.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}, device); + t::geometry::PointCloud pcd(device); + pcd.SetPointPositions(positions); + pcd.SetPointColors(colors); + core::Tensor intrinsics = core::Tensor::Init( + {{10., 0., 4.}, {0., 10., 4.}, {0., 0., 1.}}, device); + core::Tensor extrinsics = core::Tensor::Eye(4, core::Float64, device); + + t::geometry::RGBDImage rgbd = pcd.ProjectToRGBDImage( + width, height, intrinsics, extrinsics, depth_scale, depth_max); + + EXPECT_THAT(rgbd.depth_.AsTensor().GetShape(), + ElementsAre(height, width, 1)); + EXPECT_THAT(rgbd.color_.AsTensor().GetShape(), + ElementsAre(height, width, 3)); + std::vector depth_flat = + rgbd.depth_.AsTensor().ToFlatVector(); + std::vector color_flat = + rgbd.color_.AsTensor().ToFlatVector(); + EXPECT_GT(*std::max_element(depth_flat.begin(), depth_flat.end()), 0.f); + EXPECT_GT(*std::max_element(color_flat.begin(), color_flat.end()), 0.f); +} + TEST_P(PointCloudPermuteDevices, SelectByMask) { core::Device device = GetParam(); diff --git a/python/test/t/geometry/test_pointcloud.py b/python/test/t/geometry/test_pointcloud.py index 1c5e108e5f9..9feccd7307c 100644 --- a/python/test/t/geometry/test_pointcloud.py +++ b/python/test/t/geometry/test_pointcloud.py @@ -284,7 +284,6 @@ def test_project_to_rgbd_image_cpu_cuda_consistent(device): """When both CPU and CUDA are available, RGBD projection should match.""" if o3c.cuda.device_count() == 0: pytest.skip("CUDA not available") - dtype = o3c.float32 width, height = 16, 16 np.random.seed(42) n = 50