From d0019ae89597928cdb82ff560920ff554148c083 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 1 Oct 2025 20:55:17 -0700 Subject: [PATCH 1/3] add changes for undirected MIS --- cpp/src/components/mis_impl.cuh | 548 ++++++++++++++----------------- cpp/tests/components/mis_test.cu | 124 +++++-- 2 files changed, 340 insertions(+), 332 deletions(-) diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index 085b4837873..8390dd30678 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023-2025, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -11,325 +10,276 @@ * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and + * See the License for the specific language governin_from_mtxg permissions and * limitations under the License. */ -#pragma once -#include "prims/fill_edge_src_dst_property.cuh" #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" -#include "prims/update_edge_src_dst_property.cuh" +#include "prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh" +#include "prims/reduce_op.cuh" +#include "utilities/base_fixture.hpp" +#include "utilities/test_graphs.hpp" #include +#include #include #include #include #include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace cugraph { - -namespace detail { - -template -rmm::device_uvector maximal_independent_set( - raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view, - raft::random::RngState& rng_state) -{ - using GraphViewType = cugraph::graph_view_t; - - vertex_t local_vtx_partitoin_size = graph_view.local_vertex_partition_range_size(); - - rmm::device_uvector remaining_vertices(local_vtx_partitoin_size, handle.get_stream()); - - auto vertex_begin = - thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()); - auto vertex_end = thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()); - - auto out_degrees = graph_view.compute_out_degrees(handle); - auto in_degrees = graph_view.compute_in_degrees(handle); - - // Vertices with degree zero are always part of MIS - remaining_vertices.resize( - cuda::std::distance( - remaining_vertices.begin(), - thrust::copy_if(handle.get_thrust_policy(), - vertex_begin, - vertex_end, - thrust::make_zip_iterator(out_degrees.begin(), in_degrees.begin()), - remaining_vertices.begin(), - [] __device__(auto out_deg_and_in_deg) { - return !((cuda::std::get<0>(out_deg_and_in_deg) == 0) && - (cuda::std::get<1>(out_deg_and_in_deg) == 0)); - })), - handle.get_stream()); - - // Set ID of each vertex as its rank - rmm::device_uvector ranks(local_vtx_partitoin_size, handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin()); - - // Set ranks of zero degree vetices to std::numeric_limits::max() - thrust::transform_if(handle.get_thrust_policy(), - thrust::make_zip_iterator(out_degrees.begin(), in_degrees.begin()), - thrust::make_zip_iterator(out_degrees.end(), in_degrees.end()), - ranks.begin(), - cuda::proclaim_return_type( - [] __device__(auto) { return std::numeric_limits::max(); }), - [] __device__(auto in_out_degree) { - return (cuda::std::get<0>(in_out_degree) == 0) && - (cuda::std::get<1>(in_out_degree) == 0); - }); - - out_degrees.resize(0, handle.get_stream()); - out_degrees.shrink_to_fit(handle.get_stream()); - - in_degrees.resize(0, handle.get_stream()); - in_degrees.shrink_to_fit(handle.get_stream()); - - size_t loop_counter = 0; - while (true) { - loop_counter++; - - // Copy ranks into temporary vector to begin with - - rmm::device_uvector temporary_ranks(local_vtx_partitoin_size, handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), ranks.begin(), ranks.end(), temporary_ranks.begin()); - - // Select a random set of candidate vertices - - vertex_t nr_remaining_vertices_to_check = remaining_vertices.size(); - if (multi_gpu) { - nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), - nr_remaining_vertices_to_check, - raft::comms::op_t::SUM, - handle.get_stream()); +#include +#include + +#include + +#include + +#include +#include +#include + +struct MaximalIndependentSet_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_SGMaximalIndependentSet + : public ::testing::TestWithParam> { + public: + Tests_SGMaximalIndependentSet() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + + auto [mis_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + hr_timer.start("Construct graph"); } - vertex_t nr_candidates = (nr_remaining_vertices_to_check < 1024) - ? nr_remaining_vertices_to_check - : std::min(static_cast((0.50 + 0.25 * loop_counter) * - nr_remaining_vertices_to_check), - nr_remaining_vertices_to_check); - - // FIXME: Can we improve performance here? - // FIXME: if(nr_remaining_vertices_to_check < 1024), may avoid calling select_random_vertices - auto d_sampled_vertices = - cugraph::select_random_vertices(handle, - graph_view, - std::make_optional(raft::device_span{ - remaining_vertices.data(), remaining_vertices.size()}), - rng_state, - nr_candidates, - false, - true); - - rmm::device_uvector non_candidate_vertices( - remaining_vertices.size() - d_sampled_vertices.size(), handle.get_stream()); - - thrust::set_difference(handle.get_thrust_policy(), - remaining_vertices.begin(), - remaining_vertices.end(), - d_sampled_vertices.begin(), - d_sampled_vertices.end(), - non_candidate_vertices.begin()); - - // Set temporary ranks of non-candidate vertices to std::numeric_limits::lowest() - thrust::for_each( - handle.get_thrust_policy(), - non_candidate_vertices.begin(), - non_candidate_vertices.end(), - [temporary_ranks = - raft::device_span(temporary_ranks.data(), temporary_ranks.size()), - v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { - // - // if rank of a non-candidate vertex is not std::numeric_limits::max() (i.e. the - // vertex is not already in MIS), set it to std::numeric_limits::lowest() - // - auto v_offset = v - v_first; - if (temporary_ranks[v_offset] < std::numeric_limits::max()) { - temporary_ranks[v_offset] = std::numeric_limits::lowest(); - } - }); + constexpr bool multi_gpu = false; + + bool test_weighted = false; + bool renumber = true; + bool drop_self_loops = true; + bool drop_multi_edges = true; + + auto [sg_graph, sg_edge_weights, sg_renumber_map] = + cugraph::test::construct_graph( + handle, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); - // Caches for ranks - edge_src_property_t src_rank_cache(handle); - edge_dst_property_t dst_rank_cache(handle); - - // Update rank caches with temporary ranks - if constexpr (multi_gpu) { - src_rank_cache = edge_src_property_t(handle, graph_view); - dst_rank_cache = edge_dst_property_t(handle, graph_view); - update_edge_src_property( - handle, graph_view, temporary_ranks.begin(), src_rank_cache.mutable_view()); - update_edge_dst_property( - handle, graph_view, temporary_ranks.begin(), dst_rank_cache.mutable_view()); + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); } + + auto sg_graph_view = sg_graph.view(); + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + + auto edge_partition = sg_graph_view.local_edge_partition_view(0); + + auto number_of_local_edges = edge_partition.number_of_edges(); + + raft::random::RngState rng_state(0); + auto d_mis = cugraph::maximal_independent_set( + handle, sg_graph_view, rng_state); + + // Test MIS + if (mis_usecase.check_correctness) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::vector h_mis(d_mis.size()); + raft::update_host(h_mis.data(), d_mis.data(), d_mis.size(), handle.get_stream()); - // - // Find maximum rank outgoing neighbor for each vertex - // - - rmm::device_uvector max_outgoing_ranks(local_vtx_partitoin_size, handle.get_stream()); - - per_v_transform_reduce_outgoing_e( - handle, - graph_view, - multi_gpu ? src_rank_cache.view() - : make_edge_src_property_view( - graph_view, temporary_ranks.begin(), temporary_ranks.size()), - multi_gpu ? dst_rank_cache.view() - : make_edge_dst_property_view( - graph_view, temporary_ranks.begin(), temporary_ranks.size()), - edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return dst_rank; }, - std::numeric_limits::lowest(), - cugraph::reduce_op::maximum{}, - max_outgoing_ranks.begin()); - - // - // Find maximum rank incoming neighbor for each vertex - // - - rmm::device_uvector max_incoming_ranks(local_vtx_partitoin_size, handle.get_stream()); - - per_v_transform_reduce_incoming_e( - handle, - graph_view, - multi_gpu ? src_rank_cache.view() - : make_edge_src_property_view( - graph_view, temporary_ranks.begin(), temporary_ranks.size()), - multi_gpu ? dst_rank_cache.view() - : make_edge_dst_property_view( - graph_view, temporary_ranks.begin(), temporary_ranks.size()), - edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return src_rank; }, - std::numeric_limits::lowest(), - cugraph::reduce_op::maximum{}, - max_incoming_ranks.begin()); - - temporary_ranks.resize(0, handle.get_stream()); - temporary_ranks.shrink_to_fit(handle.get_stream()); - - // - // Compute max of outgoing and incoming neighbors - // - thrust::transform(handle.get_thrust_policy(), - max_incoming_ranks.begin(), - max_incoming_ranks.end(), - max_outgoing_ranks.begin(), - max_outgoing_ranks.begin(), - thrust::maximum()); - - max_incoming_ranks.resize(0, handle.get_stream()); - max_incoming_ranks.shrink_to_fit(handle.get_stream()); - - // - // If the max neighbor of a vertex is already in MIS (i.e. has rank - // std::numeric_limits::max()), discard it, otherwise, - // include the vertex if it has larger rank than its maximum rank neighbor - // - auto last = thrust::remove_if( - handle.get_thrust_policy(), - d_sampled_vertices.begin(), - d_sampled_vertices.end(), - [max_rank_neighbor_first = max_outgoing_ranks.begin(), - ranks = raft::device_span(ranks.data(), ranks.size()), - v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { - auto v_offset = v - v_first; - auto max_neighbor_rank = *(max_rank_neighbor_first + v_offset); - auto rank_of_v = ranks[v_offset]; - - if (max_neighbor_rank >= std::numeric_limits::max()) { - // Maximum rank neighbor is alreay in MIS - // Discard current vertex by setting its rank to - // std::numeric_limits::lowest() - ranks[v_offset] = std::numeric_limits::lowest(); - return true; - } - - if (rank_of_v >= max_neighbor_rank) { - // Include v and set its rank to std::numeric_limits::max() - ranks[v_offset] = std::numeric_limits::max(); - return true; - } - return false; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto vertex_first = sg_graph_view.local_vertex_partition_range_first(); + auto vertex_last = sg_graph_view.local_vertex_partition_range_last(); + + std::for_each(h_mis.begin(), h_mis.end(), [vertex_first, vertex_last](vertex_t v) { + ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); }); - max_outgoing_ranks.resize(0, handle.get_stream()); - max_outgoing_ranks.shrink_to_fit(handle.get_stream()); - - d_sampled_vertices.resize(cuda::std::distance(d_sampled_vertices.begin(), last), - handle.get_stream()); - d_sampled_vertices.shrink_to_fit(handle.get_stream()); - - remaining_vertices.resize(non_candidate_vertices.size() + d_sampled_vertices.size(), - handle.get_stream()); - remaining_vertices.shrink_to_fit(handle.get_stream()); - - // merge non-candidate and remaining candidate vertices - thrust::merge(handle.get_thrust_policy(), - non_candidate_vertices.begin(), - non_candidate_vertices.end(), - d_sampled_vertices.begin(), - d_sampled_vertices.end(), - remaining_vertices.begin()); - - nr_remaining_vertices_to_check = remaining_vertices.size(); - if (multi_gpu) { - nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), - nr_remaining_vertices_to_check, - raft::comms::op_t::SUM, - handle.get_stream()); + // If a vertex is included in MIS, then none of its neighbor should be + + vertex_t local_vtx_partitoin_size = sg_graph_view.local_vertex_partition_range_size(); + rmm::device_uvector d_any_outgoing_nbrs_included_mis(local_vtx_partitoin_size, + handle.get_stream()); + + rmm::device_uvector inclusiong_flags(local_vtx_partitoin_size, handle.get_stream()); + + thrust::uninitialized_fill( + handle.get_thrust_policy(), inclusiong_flags.begin(), inclusiong_flags.end(), vertex_t{0}); + + thrust::for_each( + handle.get_thrust_policy(), + d_mis.begin(), + d_mis.end(), + [inclusiong_flags = + raft::device_span(inclusiong_flags.data(), inclusiong_flags.size()), + v_first = sg_graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + inclusiong_flags[v_offset] = vertex_t{1}; + }); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + per_v_transform_reduce_if_outgoing_e( + handle, + sg_graph_view, + cugraph::make_edge_src_property_view( + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), + cugraph::make_edge_dst_property_view( + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { return vertex_t{1}; }, + vertex_t{0}, + cugraph::reduce_op::any(), + // just use auto, auto remove src_rank and wt # FIXME: address this. + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { + // Adjacent vertices are in the MIS + return (src_included == dst_included) && (src_included == 1); + }, + d_any_outgoing_nbrs_included_mis.begin(), + false); + + auto num_invalid_vertices_in_mis = thrust::reduce( + handle.get_thrust_policy(), + d_any_outgoing_nbrs_included_mis.begin(), + d_any_outgoing_nbrs_included_mis.end()); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + ASSERT_TRUE(num_invalid_vertices_in_mis == 0); + + auto vertex_begin = + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()); + + auto vertex_end = + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()); + + rmm::device_uvector vertices(local_vtx_partitoin_size, + handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, vertices.begin()); + + rmm::device_uvector non_candidate_vertices( + vertices.size() - d_mis.size(), handle.get_stream()); + + thrust::set_difference(handle.get_thrust_policy(), + vertices.begin(), + vertices.end(), + d_mis.begin(), + d_mis.end(), + non_candidate_vertices.begin()); + + cugraph::vertex_frontier_t vertex_frontier( + handle, + 1); + + vertex_frontier.bucket(0).insert(non_candidate_vertices.begin(), non_candidate_vertices.end()); + + d_any_outgoing_nbrs_included_mis.resize(non_candidate_vertices.size(), handle.get_stream()); + + per_v_transform_reduce_if_outgoing_e( + handle, + sg_graph_view, + vertex_frontier.bucket(0), + cugraph::make_edge_src_property_view( + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), + cugraph::make_edge_dst_property_view( + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { return vertex_t{1}; }, + vertex_t{0}, + cugraph::reduce_op::any(), + // just use auto, auto remove src_rank and wt # FIXME: address this. + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { + // Adjacent vertices are in the MIS + return dst_included == 1; + }, + d_any_outgoing_nbrs_included_mis.begin(), + false); + + auto num_invalid_non_candidate_vertices_out_mis = thrust::reduce( + handle.get_thrust_policy(), + d_any_outgoing_nbrs_included_mis.begin(), + d_any_outgoing_nbrs_included_mis.end()); + + // FIXME: Add an error message + ASSERT_TRUE( + num_invalid_non_candidate_vertices_out_mis == d_any_outgoing_nbrs_included_mis.size()); } - - if (nr_remaining_vertices_to_check == 0) { break; } + } +}; + +using Tests_SGMaximalIndependentSet_File = + Tests_SGMaximalIndependentSet; +using Tests_SGMaximalIndependentSet_Rmat = + Tests_SGMaximalIndependentSet; - // Count number of vertices included in MIS - - vertex_t nr_vertices_included_in_mis = thrust::count_if( - handle.get_thrust_policy(), ranks.begin(), ranks.end(), [] __device__(auto v_rank) { - return v_rank >= std::numeric_limits::max(); - }); - - // Build MIS and return - rmm::device_uvector mis(nr_vertices_included_in_mis, handle.get_stream()); - thrust::copy_if( - handle.get_thrust_policy(), - vertex_begin, - vertex_end, - ranks.begin(), - mis.begin(), - [] __device__(auto v_rank) { return v_rank >= std::numeric_limits::max(); }); - - ranks.resize(0, handle.get_stream()); - ranks.shrink_to_fit(handle.get_stream()); - return mis; +TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -} // namespace detail -template -rmm::device_uvector maximal_independent_set( - raft::handle_t const& handle, - graph_view_t const& graph_view, - raft::random::RngState& rng_state) +TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt64Int64FloatFloat) { - return detail::maximal_independent_set(handle, graph_view, rng_state); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -} // namespace cugraph +bool constexpr check_correctness = false; +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SGMaximalIndependentSet_File, + ::testing::Combine(::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, + MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_SGMaximalIndependentSet_Rmat, + ::testing::Combine( + ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(3, 4, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_SGMaximalIndependentSet_Rmat, + ::testing::Combine( + ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, + MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/mis_test.cu b/cpp/tests/components/mis_test.cu index 0edbccfc356..8390dd30678 100644 --- a/cpp/tests/components/mis_test.cu +++ b/cpp/tests/components/mis_test.cu @@ -15,6 +15,7 @@ */ #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" +#include "prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh" #include "prims/reduce_op.cuh" #include "utilities/base_fixture.hpp" #include "utilities/test_graphs.hpp" @@ -25,6 +26,7 @@ #include #include #include +#include #include #include @@ -51,9 +53,10 @@ class Tests_SGMaximalIndependentSet virtual void SetUp() {} virtual void TearDown() {} - template + template void run_current_test(std::tuple const& param) { + auto [mis_usecase, input_usecase] = param; raft::handle_t handle{}; @@ -66,9 +69,14 @@ class Tests_SGMaximalIndependentSet constexpr bool multi_gpu = false; + bool test_weighted = false; + bool renumber = true; + bool drop_self_loops = true; + bool drop_multi_edges = true; + auto [sg_graph, sg_edge_weights, sg_renumber_map] = cugraph::test::construct_graph( - handle, input_usecase, false, true); + handle, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); @@ -76,11 +84,15 @@ class Tests_SGMaximalIndependentSet hr_timer.stop(); hr_timer.display_and_clear(std::cout); } - + auto sg_graph_view = sg_graph.view(); auto sg_edge_weight_view = sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + auto edge_partition = sg_graph_view.local_edge_partition_view(0); + + auto number_of_local_edges = edge_partition.number_of_edges(); + raft::random::RngState rng_state(0); auto d_mis = cugraph::maximal_independent_set( handle, sg_graph_view, rng_state); @@ -103,7 +115,7 @@ class Tests_SGMaximalIndependentSet // If a vertex is included in MIS, then none of its neighbor should be vertex_t local_vtx_partitoin_size = sg_graph_view.local_vertex_partition_range_size(); - rmm::device_uvector d_total_outgoing_nbrs_included_mis(local_vtx_partitoin_size, + rmm::device_uvector d_any_outgoing_nbrs_included_mis(local_vtx_partitoin_size, handle.get_stream()); rmm::device_uvector inclusiong_flags(local_vtx_partitoin_size, handle.get_stream()); @@ -124,47 +136,93 @@ class Tests_SGMaximalIndependentSet RAFT_CUDA_TRY(cudaDeviceSynchronize()); - per_v_transform_reduce_outgoing_e( + per_v_transform_reduce_if_outgoing_e( handle, sg_graph_view, cugraph::make_edge_src_property_view( - sg_graph_view, inclusiong_flags.data(), 1), + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), cugraph::make_edge_dst_property_view( - sg_graph_view, inclusiong_flags.data(), 1), + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { - return (src == dst) ? 0 : dst_included; - }, + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { return vertex_t{1}; }, vertex_t{0}, - cugraph::reduce_op::plus{}, - d_total_outgoing_nbrs_included_mis.begin()); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::vector h_total_outgoing_nbrs_included_mis( - d_total_outgoing_nbrs_included_mis.size()); - raft::update_host(h_total_outgoing_nbrs_included_mis.data(), - d_total_outgoing_nbrs_included_mis.data(), - d_total_outgoing_nbrs_included_mis.size(), - handle.get_stream()); + cugraph::reduce_op::any(), + // just use auto, auto remove src_rank and wt # FIXME: address this. + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { + // Adjacent vertices are in the MIS + return (src_included == dst_included) && (src_included == 1); + }, + d_any_outgoing_nbrs_included_mis.begin(), + false); + + auto num_invalid_vertices_in_mis = thrust::reduce( + handle.get_thrust_policy(), + d_any_outgoing_nbrs_included_mis.begin(), + d_any_outgoing_nbrs_included_mis.end()); RAFT_CUDA_TRY(cudaDeviceSynchronize()); - { - auto vertex_first = sg_graph_view.local_vertex_partition_range_first(); - auto vertex_last = sg_graph_view.local_vertex_partition_range_last(); + ASSERT_TRUE(num_invalid_vertices_in_mis == 0); + + auto vertex_begin = + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()); + + auto vertex_end = + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()); + + rmm::device_uvector vertices(local_vtx_partitoin_size, + handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, vertices.begin()); + + rmm::device_uvector non_candidate_vertices( + vertices.size() - d_mis.size(), handle.get_stream()); + + thrust::set_difference(handle.get_thrust_policy(), + vertices.begin(), + vertices.end(), + d_mis.begin(), + d_mis.end(), + non_candidate_vertices.begin()); + + cugraph::vertex_frontier_t vertex_frontier( + handle, + 1); + + vertex_frontier.bucket(0).insert(non_candidate_vertices.begin(), non_candidate_vertices.end()); + + d_any_outgoing_nbrs_included_mis.resize(non_candidate_vertices.size(), handle.get_stream()); - std::for_each(h_mis.begin(), - h_mis.end(), - [vertex_first, vertex_last, &h_total_outgoing_nbrs_included_mis](vertex_t v) { - ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)) - << v << " is not within vertex parition range" << std::endl; + per_v_transform_reduce_if_outgoing_e( + handle, + sg_graph_view, + vertex_frontier.bucket(0), + cugraph::make_edge_src_property_view( + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), + cugraph::make_edge_dst_property_view( + sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { return vertex_t{1}; }, + vertex_t{0}, + cugraph::reduce_op::any(), + // just use auto, auto remove src_rank and wt # FIXME: address this. + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { + // Adjacent vertices are in the MIS + return dst_included == 1; + }, + d_any_outgoing_nbrs_included_mis.begin(), + false); - ASSERT_TRUE(h_total_outgoing_nbrs_included_mis[v - vertex_first] == 0) - << v << "'s neighbor is included in MIS" << std::endl; - }); - } + auto num_invalid_non_candidate_vertices_out_mis = thrust::reduce( + handle.get_thrust_policy(), + d_any_outgoing_nbrs_included_mis.begin(), + d_any_outgoing_nbrs_included_mis.end()); + + // FIXME: Add an error message + ASSERT_TRUE( + num_invalid_non_candidate_vertices_out_mis == d_any_outgoing_nbrs_included_mis.size()); } + } }; From 75879f47f79b40493eb0b200a1c7607d7502e9fc Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 1 Oct 2025 21:01:44 -0700 Subject: [PATCH 2/3] fix wrong copy --- cpp/src/components/mis_impl.cuh | 509 +++++++++++++++++--------------- 1 file changed, 277 insertions(+), 232 deletions(-) diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index 8390dd30678..c567e0320d0 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -1,5 +1,6 @@ + /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -10,276 +11,320 @@ * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governin_from_mtxg permissions and + * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once -#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" +#include "prims/fill_edge_src_dst_property.cuh" +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" // FIXME: remove if unused #include "prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh" -#include "prims/reduce_op.cuh" -#include "utilities/base_fixture.hpp" -#include "utilities/test_graphs.hpp" +#include "prims/update_edge_src_dst_property.cuh" #include -#include #include #include #include #include -#include -#include - -#include - -#include - -#include -#include -#include - -struct MaximalIndependentSet_Usecase { - bool check_correctness{true}; -}; - -template -class Tests_SGMaximalIndependentSet - : public ::testing::TestWithParam> { - public: - Tests_SGMaximalIndependentSet() {} - - static void SetUpTestCase() {} - static void TearDownTestCase() {} +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { + +namespace detail { + +template +rmm::device_uvector maximal_independent_set( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + raft::random::RngState& rng_state) +{ - virtual void SetUp() {} - virtual void TearDown() {} + using GraphViewType = cugraph::graph_view_t; - template - void run_current_test(std::tuple const& param) - { + vertex_t local_vtx_partition_size = graph_view.local_vertex_partition_range_size(); - auto [mis_usecase, input_usecase] = param; + auto vertex_begin = + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()); - raft::handle_t handle{}; - HighResTimer hr_timer{}; + auto vertex_end = thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()); - if (cugraph::test::g_perf) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - hr_timer.start("Construct graph"); - } - - constexpr bool multi_gpu = false; + // Set ID of each vertex as its rank + rmm::device_uvector ranks(local_vtx_partition_size, handle.get_stream()); - bool test_weighted = false; - bool renumber = true; - bool drop_self_loops = true; - bool drop_multi_edges = true; + auto segment_offsets = graph_view.local_vertex_partition_segment_offsets(); - auto [sg_graph, sg_edge_weights, sg_renumber_map] = - cugraph::test::construct_graph( - handle, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); + vertex_t isolated_v_start = multi_gpu ? segment_offsets->data()[4] : segment_offsets->data()[3]; + // Only the non zero degree vertices are part of the initial 'remaining_vertices' list + rmm::device_uvector remaining_vertices(isolated_v_start, handle.get_stream()); - if (cugraph::test::g_perf) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_size()), + [ + isolated_v_start = multi_gpu ? segment_offsets->data()[4] : segment_offsets->data()[3], + ranks = raft::device_span(ranks.data(), ranks.size()), + remaining_vertices = raft::device_span(remaining_vertices.data(), remaining_vertices.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto idx) { + + + if (idx < isolated_v_start) { + // initializing the ranks array + ranks[idx] = v_first + idx; + // initializing the remaining vertices array + remaining_vertices[idx] = v_first + idx; + } else { + // zero-degree vertices are always part of the MIS + ranks[idx] = std::numeric_limits::max(); + } + + }); + + auto num_buckets = 1; + + vertex_frontier_t vertex_frontier(handle, + num_buckets); + + size_t loop_counter = 0; + vertex_t nr_remaining_vertices_to_check = remaining_vertices.size(); + vertex_t nr_remaining_local_vertices_to_check = remaining_vertices.size(); + edge_dst_property_t dst_rank_cache(handle); + + + while (true) { + loop_counter++; + + auto num_processed_vertices = remaining_vertices.size() - nr_remaining_local_vertices_to_check; + + if constexpr (multi_gpu) { + if (loop_counter == 1) { + // Update the property of all edge endpoints during the + // first iteration + dst_rank_cache = edge_dst_property_t(handle, graph_view); + + update_edge_dst_property( + handle, graph_view, ranks.begin(), dst_rank_cache.mutable_view()); + + } else { + // Only update the property of endpoints that had their ranks modified + rmm::device_uvector processed_ranks( + num_processed_vertices, handle.get_stream()); + + auto pair_idx_processed_vertex_first = thrust::make_zip_iterator( + thrust::make_counting_iterator(0), + remaining_vertices.begin() + nr_remaining_local_vertices_to_check + ); + + thrust::for_each( + handle.get_thrust_policy(), + pair_idx_processed_vertex_first, + pair_idx_processed_vertex_first + num_processed_vertices, + [processed_ranks = + raft::device_span(processed_ranks.data(), processed_ranks.size()), + ranks = + raft::device_span(ranks.data(), ranks.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto pair_idx_v) { + + auto idx = thrust::get<0>(pair_idx_v); + auto v = thrust::get<1>(pair_idx_v); + auto v_offset = v - v_first; + + processed_ranks[idx] = ranks[v_offset]; + }); - hr_timer.stop(); - hr_timer.display_and_clear(std::cout); + // Only update a subset of the graph edge dst property values + + // FIXME: Since we know that the property being updated are either + // std::numeric_limits::max() or std::numeric_limits::min(), + // explore 'fill_edge_dst_property' which is faster + update_edge_dst_property( + handle, + graph_view, + remaining_vertices.begin() + nr_remaining_local_vertices_to_check, + remaining_vertices.end(), + processed_ranks.begin(), + dst_rank_cache.mutable_view() + ); + + } } - - auto sg_graph_view = sg_graph.view(); - auto sg_edge_weight_view = - sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; - auto edge_partition = sg_graph_view.local_edge_partition_view(0); - - auto number_of_local_edges = edge_partition.number_of_edges(); + rmm::device_uvector max_outgoing_ranks( + nr_remaining_local_vertices_to_check, handle.get_stream()); - raft::random::RngState rng_state(0); - auto d_mis = cugraph::maximal_independent_set( - handle, sg_graph_view, rng_state); + remaining_vertices.resize(nr_remaining_local_vertices_to_check, + handle.get_stream()); + remaining_vertices.shrink_to_fit(handle.get_stream()); - // Test MIS - if (mis_usecase.check_correctness) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::vector h_mis(d_mis.size()); - raft::update_host(h_mis.data(), d_mis.data(), d_mis.size(), handle.get_stream()); + vertex_frontier.bucket(0).clear(); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - auto vertex_first = sg_graph_view.local_vertex_partition_range_first(); - auto vertex_last = sg_graph_view.local_vertex_partition_range_last(); - - std::for_each(h_mis.begin(), h_mis.end(), [vertex_first, vertex_last](vertex_t v) { - ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); - }); - - // If a vertex is included in MIS, then none of its neighbor should be - - vertex_t local_vtx_partitoin_size = sg_graph_view.local_vertex_partition_range_size(); - rmm::device_uvector d_any_outgoing_nbrs_included_mis(local_vtx_partitoin_size, - handle.get_stream()); - - rmm::device_uvector inclusiong_flags(local_vtx_partitoin_size, handle.get_stream()); - - thrust::uninitialized_fill( - handle.get_thrust_policy(), inclusiong_flags.begin(), inclusiong_flags.end(), vertex_t{0}); - - thrust::for_each( - handle.get_thrust_policy(), - d_mis.begin(), - d_mis.end(), - [inclusiong_flags = - raft::device_span(inclusiong_flags.data(), inclusiong_flags.size()), - v_first = sg_graph_view.local_vertex_partition_range_first()] __device__(auto v) { - auto v_offset = v - v_first; - inclusiong_flags[v_offset] = vertex_t{1}; - }); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + vertex_frontier.bucket(0).insert(remaining_vertices.begin(), remaining_vertices.end()); + if (loop_counter == 1) { + // FIXME: The optimization below is not appropriate for the current + // implementation since the neighbor with the highest priority + // needs to be retrieved to update its rank if possible in the 'rank' + // array. When using this primitive, it will stop once the first higher + // priority neighbor is found which may not be the highest. This + // will lead to more iterations until the highest priority neighbor + // is found if any (this will increase the overall runtime). per_v_transform_reduce_if_outgoing_e( handle, - sg_graph_view, - cugraph::make_edge_src_property_view( - sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), - cugraph::make_edge_dst_property_view( - sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { return vertex_t{1}; }, - vertex_t{0}, - cugraph::reduce_op::any(), + graph_view, + vertex_frontier.bucket(0), + edge_src_dummy_property_t{}.view(), + multi_gpu ? dst_rank_cache.view() + : make_edge_dst_property_view( + graph_view, ranks.begin(), ranks.size()), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return dst_rank; }, + std::numeric_limits::lowest(), + reduce_op::any(), // just use auto, auto remove src_rank and wt # FIXME: address this. - [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { - // Adjacent vertices are in the MIS - return (src_included == dst_included) && (src_included == 1); - }, - d_any_outgoing_nbrs_included_mis.begin(), - false); - - auto num_invalid_vertices_in_mis = thrust::reduce( - handle.get_thrust_policy(), - d_any_outgoing_nbrs_included_mis.begin(), - d_any_outgoing_nbrs_included_mis.end()); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - ASSERT_TRUE(num_invalid_vertices_in_mis == 0); - - auto vertex_begin = - thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()); - - auto vertex_end = - thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()); - - rmm::device_uvector vertices(local_vtx_partitoin_size, - handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, vertices.begin()); - - rmm::device_uvector non_candidate_vertices( - vertices.size() - d_mis.size(), handle.get_stream()); - - thrust::set_difference(handle.get_thrust_policy(), - vertices.begin(), - vertices.end(), - d_mis.begin(), - d_mis.end(), - non_candidate_vertices.begin()); - - cugraph::vertex_frontier_t vertex_frontier( + [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return src < dst_rank; }, + max_outgoing_ranks.begin(), + false); // FIXME: Set expensive check to False + } else { + per_v_transform_reduce_outgoing_e( handle, - 1); - - vertex_frontier.bucket(0).insert(non_candidate_vertices.begin(), non_candidate_vertices.end()); - - d_any_outgoing_nbrs_included_mis.resize(non_candidate_vertices.size(), handle.get_stream()); - - per_v_transform_reduce_if_outgoing_e( - handle, - sg_graph_view, + graph_view, vertex_frontier.bucket(0), - cugraph::make_edge_src_property_view( - sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), - cugraph::make_edge_dst_property_view( - sg_graph_view, inclusiong_flags.data(), inclusiong_flags.size()), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { return vertex_t{1}; }, - vertex_t{0}, - cugraph::reduce_op::any(), - // just use auto, auto remove src_rank and wt # FIXME: address this. - [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { - // Adjacent vertices are in the MIS - return dst_included == 1; - }, - d_any_outgoing_nbrs_included_mis.begin(), - false); - - auto num_invalid_non_candidate_vertices_out_mis = thrust::reduce( - handle.get_thrust_policy(), - d_any_outgoing_nbrs_included_mis.begin(), - d_any_outgoing_nbrs_included_mis.end()); + edge_src_dummy_property_t{}.view(), + + multi_gpu ? dst_rank_cache.view() + : make_edge_dst_property_view( + graph_view, ranks.begin(), ranks.size()), - // FIXME: Add an error message - ASSERT_TRUE( - num_invalid_non_candidate_vertices_out_mis == d_any_outgoing_nbrs_included_mis.size()); + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return dst_rank; }, + std::numeric_limits::lowest(), + cugraph::reduce_op::maximum{}, + max_outgoing_ranks.begin(), + false); } + + auto pair_idx_vertex_first = thrust::make_zip_iterator( // FIXME: rename this. + thrust::make_counting_iterator(0), + remaining_vertices.begin() + ); - } -}; -using Tests_SGMaximalIndependentSet_File = - Tests_SGMaximalIndependentSet; -using Tests_SGMaximalIndependentSet_Rmat = - Tests_SGMaximalIndependentSet; + // + // If the max neighbor of a vertex is already in MIS (i.e. has rank + // std::numeric_limits::max()), discard it, otherwise, + // include the vertex if it has larger rank than its maximum rank neighbor + // + + // Use thrust::stable_partition to keep track of vertices that only needs to have + // their property updated + auto last = thrust::stable_partition( + handle.get_thrust_policy(), + pair_idx_vertex_first, // FIXME: Same here, no need to pass an iterator, just follow the pseudo code + pair_idx_vertex_first + remaining_vertices.size(), + [max_rank_neighbor_first = max_outgoing_ranks.begin(), + ranks = raft::device_span(ranks.data(), ranks.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto pair_vidx_v_priority) { + + auto vidx = thrust::get<0>(pair_vidx_v_priority); + auto v = thrust::get<1>(pair_vidx_v_priority); + auto v_offset = v - v_first; + auto max_neighbor_rank = *(max_rank_neighbor_first + vidx); + auto rank_of_v = ranks[v_offset]; + + if (max_neighbor_rank >= std::numeric_limits::max()) { + ranks[v_offset] = std::numeric_limits::lowest(); + return false; + } + + if (rank_of_v >= max_neighbor_rank) { + ranks[v_offset] = std::numeric_limits::max(); + return false; + } + return true; + }); -TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt32Int32FloatFloat) -{ - run_current_test( - override_File_Usecase_with_cmd_line_arguments(GetParam())); -} + nr_remaining_local_vertices_to_check = cuda::std::distance(pair_idx_vertex_first, last), + handle.get_stream(); + + max_outgoing_ranks.resize(0, handle.get_stream()); + max_outgoing_ranks.shrink_to_fit(handle.get_stream()); + + if (multi_gpu) { + // FIXME: rename to 'nr_remaining_vertices_to_check' to + // 'nr_remaining_global_vertices_to_check' + nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), + nr_remaining_local_vertices_to_check, + raft::comms::op_t::SUM, + handle.get_stream()); + } else { + nr_remaining_vertices_to_check = nr_remaining_local_vertices_to_check; + } -TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt64Int64FloatFloat) -{ - run_current_test( - override_File_Usecase_with_cmd_line_arguments(GetParam())); -} + if (nr_remaining_vertices_to_check == 0) { break; } -TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt32Int32FloatFloat) -{ - run_current_test( - override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); + } + + vertex_t nr_vertices_included_in_mis = thrust::count_if( + handle.get_thrust_policy(), ranks.begin(), ranks.end(), [] __device__(auto v_rank) { + return v_rank >= std::numeric_limits::max(); + }); + + // Build MIS and return + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto start_build_mis = high_resolution_clock::now(); + + rmm::device_uvector mis(nr_vertices_included_in_mis, handle.get_stream()); + thrust::copy_if( + handle.get_thrust_policy(), + vertex_begin, + vertex_end, + ranks.begin(), + mis.begin(), + [] __device__(auto v_rank) { return v_rank >= std::numeric_limits::max(); }); + + ranks.resize(0, handle.get_stream()); + ranks.shrink_to_fit(handle.get_stream()); + + if (multi_gpu) { + // FIXME: rename to 'nr_remaining_vertices_to_check' to + // 'nr_remaining_global_vertices_to_check' + mis_size = host_scalar_allreduce(handle.get_comms(), + mis_size, + raft::comms::op_t::SUM, + handle.get_stream()); + + mis_size_perc = host_scalar_allreduce(handle.get_comms(), + mis_size_perc, + raft::comms::op_t::SUM, + handle.get_stream()); + } + + return mis; } +} // namespace detail -TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt64Int64FloatFloat) +template +rmm::device_uvector maximal_independent_set( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state) { - run_current_test( - override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); + return detail::maximal_independent_set(handle, graph_view, rng_state); } -bool constexpr check_correctness = false; -INSTANTIATE_TEST_SUITE_P( - file_test, - Tests_SGMaximalIndependentSet_File, - ::testing::Combine(::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, - MaximalIndependentSet_Usecase{check_correctness}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); - -INSTANTIATE_TEST_SUITE_P( - rmat_small_test, - Tests_SGMaximalIndependentSet_Rmat, - ::testing::Combine( - ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}), - ::testing::Values(cugraph::test::Rmat_Usecase(3, 4, 0.57, 0.19, 0.19, 0, true, false)))); - -INSTANTIATE_TEST_SUITE_P( - rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with - --gtest_filter to select only the rmat_benchmark_test with a specific - vertex & edge type combination) by command line arguments and do not - include more than one Rmat_Usecase that differ only in scale or edge - factor (to avoid running same benchmarks more than once) */ - Tests_SGMaximalIndependentSet_Rmat, - ::testing::Combine( - ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, - MaximalIndependentSet_Usecase{check_correctness}), - ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); - -CUGRAPH_TEST_PROGRAM_MAIN() +} // namespace cugraph From e2277b5f5d391f1e175f74bd9fff2ef3aacfdb58 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 1 Oct 2025 21:04:11 -0700 Subject: [PATCH 3/3] remove unused statement --- cpp/src/components/mis_impl.cuh | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index c567e0320d0..25fc1961642 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -300,20 +300,6 @@ rmm::device_uvector maximal_independent_set( ranks.resize(0, handle.get_stream()); ranks.shrink_to_fit(handle.get_stream()); - if (multi_gpu) { - // FIXME: rename to 'nr_remaining_vertices_to_check' to - // 'nr_remaining_global_vertices_to_check' - mis_size = host_scalar_allreduce(handle.get_comms(), - mis_size, - raft::comms::op_t::SUM, - handle.get_stream()); - - mis_size_perc = host_scalar_allreduce(handle.get_comms(), - mis_size_perc, - raft::comms::op_t::SUM, - handle.get_stream()); - } - return mis; } } // namespace detail