diff --git a/CHANGELOG.md b/CHANGELOG.md index 2af40328b26..697bf954e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,8 @@ - Download tarballs instead of Git repos for "3rdparty/uvatlas" (PR #7371) - macOS x86_64 not longer supported, only macOS arm64 is supported. - Python 3.13+3.14 support +- Fix color artifacts in PointCloud projection due to CUDA race condition [(PR #7424)](https://github.com/isl-org/Open3D/pull/7424) + ## 0.13 diff --git a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu index 42c82ddddc4..56c95712d06 100644 --- a/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu +++ b/cpp/open3d/t/geometry/kernel/PointCloudCUDA.cu @@ -30,6 +30,18 @@ void ProjectCUDA( const float* point_colors_ptr = has_colors ? colors.value().get().GetDataPtr() : nullptr; + // 1. Determine image size + const int width = depth.GetShape(1); + const int height = depth.GetShape(0); + + // buffer for id and depth + core::Tensor index_buffer = core::Tensor::Full( + {height, width}, + 0xFFFFFFFFFFFFFFFF, // The largest possible 64-bit number + core::UInt64, depth.GetDevice()); + + uint64_t* index_buffer_ptr = index_buffer.GetDataPtr(); + TransformIndexer transform_indexer(intrinsics, extrinsics, 1.0f); NDArrayIndexer depth_indexer(depth, 2); @@ -60,13 +72,27 @@ void ProjectCUDA( if (d_old > 0) { atomicMinf(depth_ptr, d_old); } + + int64_t row = static_cast(v); + int64_t col = static_cast(u); + + // Get the specific address for this pixel (u, v) + uint64_t* pixel_address = + index_buffer_ptr + (row * width + col); + + // Prepare the packed value of id and depth + uint32_t d_as_uint = __float_as_uint(d); + uint64_t val = (static_cast(d_as_uint) << 32) | + (uint32_t)workload_idx; + + atomicMin(reinterpret_cast(pixel_address), + static_cast(val)); }); // Pass 2: color map if (!has_colors) return; NDArrayIndexer color_indexer(image_colors.value().get(), 2); - float precision_bound = depth_scale * 1e-4; core::ParallelFor( depth.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) { float x = points_ptr[3 * workload_idx + 0]; @@ -79,17 +105,22 @@ void ProjectCUDA( // coordinate in image (in pixel) transform_indexer.Project(xc, yc, zc, &u, &v); + u = round(u); + v = round(v); + if (!depth_indexer.InBoundary(u, v) || zc <= 0 || zc > depth_max) { return; } - float dmap = *depth_indexer.GetDataPtr( - static_cast(u), static_cast(v)); - float d = zc * depth_scale; - if (d < dmap + precision_bound) { - float* color_ptr = color_indexer.GetDataPtr( - static_cast(u), static_cast(v)); + int64_t pu = static_cast(u), + pv = static_cast(v); + uint64_t final_val = index_buffer_ptr[pv * width + pu]; + uint32_t winning_idx = + static_cast(final_val & 0xFFFFFFFF); + + if (winning_idx == (uint32_t)workload_idx) { + float* color_ptr = color_indexer.GetDataPtr(u, v); color_ptr[0] = point_colors_ptr[3 * workload_idx + 0]; color_ptr[1] = point_colors_ptr[3 * workload_idx + 1]; color_ptr[2] = point_colors_ptr[3 * workload_idx + 2]; diff --git a/cpp/tests/t/geometry/PointCloud.cpp b/cpp/tests/t/geometry/PointCloud.cpp index 38e6c555786..2ef3d2b9ee3 100644 --- a/cpp/tests/t/geometry/PointCloud.cpp +++ b/cpp/tests/t/geometry/PointCloud.cpp @@ -822,6 +822,62 @@ TEST_P(PointCloudPermuteDevices, CreateFromRGBDOrDepthImageWithNormals) { EXPECT_TRUE(pcd_out.GetPointNormals().AllClose(t_normal_ref)); } +TEST_P(PointCloudPermuteDevices, ProjectToDepthImage) { + core::Device device = GetParam(); + if (device.IsSYCL()) GTEST_SKIP() << "Not Implemented!"; + + const int width = 8, height = 8; + float depth_scale = 1.f, depth_max = 10.f; + core::Tensor positions = core::Tensor::Init( + {{0.f, 0.f, 1.f}, {0.1f, 0.1f, 1.f}}, device); + t::geometry::PointCloud pcd(positions); + core::Tensor intrinsics = core::Tensor::Init( + {{10., 0., 4.}, {0., 10., 4.}, {0., 0., 1.}}, device); + core::Tensor extrinsics = core::Tensor::Eye(4, core::Float64, device); + + t::geometry::Image depth_img = pcd.ProjectToDepthImage( + width, height, intrinsics, extrinsics, depth_scale, depth_max); + + EXPECT_EQ(depth_img.AsTensor().GetShape(), + core::SizeVector({height, width, 1})); + std::vector depth_flat = depth_img.AsTensor().ToFlatVector(); + EXPECT_GT(*std::max_element(depth_flat.begin(), depth_flat.end()), 0.f); +} + +TEST_P(PointCloudPermuteDevices, ProjectToRGBDImage) { + using ::testing::ElementsAre; + + core::Device device = GetParam(); + if (device.IsSYCL()) GTEST_SKIP() << "Not Implemented!"; + + const int width = 8, height = 8; + float depth_scale = 1.f, depth_max = 10.f; + core::Tensor positions = core::Tensor::Init( + {{0.f, 0.f, 1.f}, {0.1f, 0.f, 1.f}, {0.f, 0.1f, 1.f}}, device); + core::Tensor colors = core::Tensor::Init( + {{1.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}, device); + t::geometry::PointCloud pcd(device); + pcd.SetPointPositions(positions); + pcd.SetPointColors(colors); + core::Tensor intrinsics = core::Tensor::Init( + {{10., 0., 4.}, {0., 10., 4.}, {0., 0., 1.}}, device); + core::Tensor extrinsics = core::Tensor::Eye(4, core::Float64, device); + + t::geometry::RGBDImage rgbd = pcd.ProjectToRGBDImage( + width, height, intrinsics, extrinsics, depth_scale, depth_max); + + EXPECT_THAT(rgbd.depth_.AsTensor().GetShape(), + ElementsAre(height, width, 1)); + EXPECT_THAT(rgbd.color_.AsTensor().GetShape(), + ElementsAre(height, width, 3)); + std::vector depth_flat = + rgbd.depth_.AsTensor().ToFlatVector(); + std::vector color_flat = + rgbd.color_.AsTensor().ToFlatVector(); + EXPECT_GT(*std::max_element(depth_flat.begin(), depth_flat.end()), 0.f); + EXPECT_GT(*std::max_element(color_flat.begin(), color_flat.end()), 0.f); +} + TEST_P(PointCloudPermuteDevices, SelectByMask) { core::Device device = GetParam(); diff --git a/python/test/t/geometry/test_pointcloud.py b/python/test/t/geometry/test_pointcloud.py index 74f5d5f27c0..9feccd7307c 100644 --- a/python/test/t/geometry/test_pointcloud.py +++ b/python/test/t/geometry/test_pointcloud.py @@ -214,3 +214,108 @@ def test_metrics(): metrics.cpu().numpy(), (0.22436734, np.sqrt(3) / 10, 100. / 8, 400. / 8, 700. / 8, 100.), rtol=1e-6) + + +@pytest.mark.parametrize("device", list_devices()) +def test_project_to_depth_image(device): + """Project point cloud to depth image; check shape and non-empty depth.""" + dtype = o3c.float32 + width, height = 8, 8 + # Points in front of camera (z > 0): (0, 0, 1) and (0.1, 0.1, 1) project + # with intrinsics fx=fy=10, cx=cy=4 to pixel (4,4) and (5,5) approx. + positions = o3c.Tensor([[0.0, 0.0, 1.0], [0.1, 0.1, 1.0]], dtype, device) + pcd = o3d.t.geometry.PointCloud(positions) + intrinsics = o3c.Tensor([[10.0, 0, 4.0], [0, 10.0, 4.0], [0, 0, 1.0]], + o3c.float64) + extrinsics = o3c.Tensor(np.eye(4), o3c.float64) + + depth_img = pcd.project_to_depth_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=10.0) + + depth_tensor = depth_img.as_tensor() + assert depth_tensor.shape == (height, width, 1) + depth_np = depth_tensor.cpu().numpy() + assert depth_np.shape == (height, width, 1) + # At least one pixel should have depth (points project into image) + assert np.any(depth_np > 0) + + +@pytest.mark.parametrize("device", list_devices()) +def test_project_to_rgbd_image(device): + """Project colored point cloud to RGBD image; check shapes and content.""" + dtype = o3c.float32 + width, height = 8, 8 + positions = o3c.Tensor([[0.0, 0.0, 1.0], [0.1, 0.0, 1.0], [0.0, 0.1, 1.0]], + dtype, device) + colors = o3c.Tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + dtype, device) + pcd = o3d.t.geometry.PointCloud(positions) + pcd.point.colors = colors + pcd = pcd.to(device) + intrinsics = o3c.Tensor([[10.0, 0, 4.0], [0, 10.0, 4.0], [0, 0, 1.0]], + o3c.float64) + extrinsics = o3c.Tensor(np.eye(4), o3c.float64) + + rgbd = pcd.project_to_rgbd_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=10.0) + + assert rgbd.depth.as_tensor().shape == (height, width, 1) + assert rgbd.color.as_tensor().shape == (height, width, 3) + depth_np = rgbd.depth.as_tensor().cpu().numpy() + color_np = rgbd.color.as_tensor().cpu().numpy() + assert np.any(depth_np > 0), "depth should have at least one hit" + # Where depth > 0, color should not be all zeros (no black-artifact pixels) + hit_mask = (depth_np.squeeze(-1) > 0).astype(bool) + hit_colors = color_np[hit_mask] + assert hit_colors.size > 0 + assert np.any(hit_colors > 0), "projected pixels should have non-zero color" + + +@pytest.mark.parametrize("device", list_devices()) +def test_project_to_rgbd_image_cpu_cuda_consistent(device): + """When both CPU and CUDA are available, RGBD projection should match.""" + if o3c.cuda.device_count() == 0: + pytest.skip("CUDA not available") + width, height = 16, 16 + np.random.seed(42) + n = 50 + positions_np = np.random.randn(n, 3).astype(np.float32) * 0.2 + positions_np[:, 2] = 1.0 + np.abs(positions_np[:, 2]) # z in [1, ~2] + colors_np = np.random.rand(n, 3).astype(np.float32) + intrinsics = o3c.Tensor([[20.0, 0, 8.0], [0, 20.0, 8.0], [0, 0, 1.0]], + o3c.float64) + extrinsics = o3c.Tensor(np.eye(4), o3c.float64) + + pcd_cpu = o3d.t.geometry.PointCloud(o3c.Tensor(positions_np)) + pcd_cpu.point.colors = o3c.Tensor(colors_np) + rgbd_cpu = pcd_cpu.project_to_rgbd_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=5.0) + + pcd_cuda = pcd_cpu.to(o3c.Device("CUDA:0")) + rgbd_cuda = pcd_cuda.project_to_rgbd_image(width, + height, + intrinsics, + extrinsics, + depth_scale=1.0, + depth_max=5.0) + + np.testing.assert_allclose(rgbd_cpu.depth.as_tensor().cpu().numpy(), + rgbd_cuda.depth.as_tensor().cpu().numpy(), + rtol=1e-5, + atol=1e-5) + np.testing.assert_allclose(rgbd_cpu.color.as_tensor().cpu().numpy(), + rgbd_cuda.color.as_tensor().cpu().numpy(), + rtol=1e-5, + atol=1e-5)