diff --git a/DESCRIPTION b/DESCRIPTION index 5f97e1d0..804fafc6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,12 +8,16 @@ Authors@R: c( person("Robin", "Lovelace", role = "ctb"), person("Andrew", "Smith", role = "ctb"), person("Malcolm", "Morgan", role = "ctb"), - person("Andrea", "Gilardi", role="ctb", comment = c(ORCID = "0000-0002-9424-7439")), - person("Eduardo", "Leoni", role="ctb", comment = c(ORCID = "0000-0003-0955-5232")), + person("Andrea", "Gilardi", role = "ctb", + comment = c(ORCID = "0000-0002-9424-7439")), + person("Eduardo", "Leoni", role = "ctb", + comment = c(ORCID = "0000-0003-0955-5232")), person("Shane", "Saunders", role = "cph", comment = "Original author of included code for priority heaps"), person("Stanislaw", "Adaszewski", role = "cph", - comment = "author of include concaveman-cpp code") + comment = "author of include concaveman-cpp code"), + person("Harry", "Roberts", , "ts22hr@leeds.ac.uk", role = "ctb", + comment = c(ORCID = "0009-0005-5443-0684")) ) Description: Distances on dual-weighted directed graphs using priority-queue shortest paths (Padgham (2019) ). @@ -63,5 +67,5 @@ Encoding: UTF-8 LazyData: true NeedsCompilation: yes Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 SystemRequirements: GNU make diff --git a/NEWS.md b/NEWS.md index 8b9ef46c..a7f8d884 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ - Add `pairwise` parameter to `dodgr_times()`; thanks to @leoniedu (#314) - Fix bug with categorical distances that neglected edges through compound junctions (#305) - Fix bug in duplicating bi-directional edges in weighted sc-class graphs +- Added support for strong components in `dodgr_components()` (#322) +- Added Harry Roberts as a contributor --- diff --git a/R/RcppExports.R b/R/RcppExports.R index 42319336..990ca21d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -213,7 +213,7 @@ rcpp_merge_cols <- function (graph) { #' in \code{sample_one_vertex} #' #' @param edge_map edge_map -#' @return std::vector of 2 elements: [0] with value of largest connected +#' @return std::vector of 2 elements: [0] with value of largest connected #' component; [1] with random index to one edge that is part of that component. #' @noRd NULL @@ -263,7 +263,35 @@ NULL #' identify_graph_components #' #' Identify initial graph components for each **vertex** -#' Identification for edges is subsequently perrformed with +#' Identification for edges is subsequently performed with +#' \code{rcpp_get_component_vector}. +#' +#' @param v unordered_map +#' @param com component map from each vertex to component numbers +#' @noRd +NULL + +#' strong_connect +#' +#' Helper function for applying Tarjan's algroithm to identify +#' strong components recursively +#' +#' @param vt id of vertex currently being evaluated +#' @param v unordered_map +#' @param com component map from each vertex to component numbers +#' @param index_map map from each vertex to index number +#' @param lowlink_map map from each vertex to lowlink number +#' @param on_stack set of all vertices currently on stack +#' @param s stack used to execute algorithm +#' @param index used to execute algoritm +#' @param compnum the current component number +#' @noRd +NULL + +#' identify_graph_strong_components +#' +#' Identify initial graph strong components for each **vertex** +#' Identification for edges is subsequently performed with #' \code{rcpp_get_component_vector}. #' #' @param v unordered_map @@ -278,11 +306,14 @@ NULL #' @param graph graph to be processed; stripped down and standardised to five #' columns #' +#' @param strong A Boolean flag to indicate whether components should be strong, +#' i.e. its vertices connected bidirectionally. Defaults to FALSE. +#' #' @return Two vectors: one of edge IDs and one of corresponding component #' numbers #' @noRd -rcpp_get_component_vector <- function (graph) { - .Call (`_dodgr_rcpp_get_component_vector`, graph) +rcpp_get_component_vector <- function (graph, strong = FALSE) { + .Call (`_dodgr_rcpp_get_component_vector`, graph, strong) } #' rcpp_unique_rownames @@ -484,4 +515,4 @@ rcpp_sf_as_network <- function (sf_lines, pr) { #' @noRd rcpp_route_times <- function (graph, left_side, turn_penalty) { .Call (`_dodgr_rcpp_route_times`, graph, left_side, turn_penalty) -} +} \ No newline at end of file diff --git a/R/graph-functions.R b/R/graph-functions.R index 7dc37ca2..f8d2d4ef 100644 --- a/R/graph-functions.R +++ b/R/graph-functions.R @@ -310,6 +310,8 @@ dodgr_vertices_internal <- function (graph) { #' column to `data.frame`. #' #' @param graph A `data.frame` of edges +#' @param strong A Boolean flag to indicate whether components should be strong, +#' i.e. its vertices connected bidirectionally. Defaults to FALSE. #' @return Equivalent graph with additional `component` column, #' sequentially numbered from 1 = largest component. #' @family modification @@ -317,7 +319,7 @@ dodgr_vertices_internal <- function (graph) { #' @examples #' graph <- weight_streetnet (hampi) #' graph <- dodgr_components (graph) -dodgr_components <- function (graph) { +dodgr_components <- function (graph, strong = FALSE) { graph <- tbl_to_df (graph) @@ -329,15 +331,17 @@ dodgr_components <- function (graph) { if (is.na (gr_cols$edge_id)) { graph2$edge_id <- seq_len (nrow (graph2)) } - cns <- rcpp_get_component_vector (graph2) - + cns <- rcpp_get_component_vector (graph2,strong) + indx <- match (graph2$edge_id, cns$edge_id) component <- cns$edge_component [indx] + + # Handle where component numbers contain gaps using frequencies # Then re-number in order to decreasing component size: - graph$component <- match ( - component, - order (table (component), decreasing = TRUE) - ) + freqs <- table (component) + sorted_components <- names (freqs) [order (freqs, decreasing = TRUE)] + graph$component <- match (as.character (component), sorted_components) + } return (graph) diff --git a/codemeta.json b/codemeta.json index b94d6f3f..cee5b4aa 100644 --- a/codemeta.json +++ b/codemeta.json @@ -4,20 +4,17 @@ "identifier": "dodgr", "description": "Distances on dual-weighted directed graphs using priority-queue shortest paths (Padgham (2019) ). Weighted directed graphs have weights from A to B which may differ from those from B to A. Dual-weighted directed graphs have two sets of such weights. A canonical example is a street network to be used for routing in which routes are calculated by weighting distances according to the type of way and mode of transport, yet lengths of routes must be calculated from direct distances.", "name": "dodgr: Distances on Directed Graphs", - "relatedLink": [ - "https://UrbanAnalyst.github.io/dodgr/", - "https://CRAN.R-project.org/package=dodgr" - ], + "relatedLink": ["https://UrbanAnalyst.github.io/dodgr/", "https://CRAN.R-project.org/package=dodgr"], "codeRepository": "https://github.com/UrbanAnalyst/dodgr", "issueTracker": "https://github.com/UrbanAnalyst/dodgr/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "0.4.3.015", + "version": "0.4.3.15", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.5.1 (2025-06-13)", + "runtimePlatform": "R version 4.5.1 (2025-06-13 ucrt)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -69,6 +66,13 @@ "givenName": "Eduardo", "familyName": "Leoni", "@id": "https://orcid.org/0000-0003-0955-5232" + }, + { + "@type": "Person", + "givenName": "Harry", + "familyName": "Roberts", + "email": "ts22hr@leeds.ac.uk", + "@id": "https://orcid.org/0009-0005-5443-0684" } ], "copyrightHolder": [ @@ -362,7 +366,7 @@ }, "SystemRequirements": "GNU make" }, - "fileSize": "37019.613KB", + "fileSize": "72585.397KB", "citation": [ { "@type": "ScholarlyArticle", @@ -381,10 +385,7 @@ "@type": "PublicationIssue", "datePublished": "2019", "isPartOf": { - "@type": [ - "PublicationVolume", - "Periodical" - ], + "@type": ["PublicationVolume", "Periodical"], "name": "Transport Findings" } } diff --git a/man/dodgr_components.Rd b/man/dodgr_components.Rd index cdb9b646..11dc1a73 100644 --- a/man/dodgr_components.Rd +++ b/man/dodgr_components.Rd @@ -4,10 +4,13 @@ \alias{dodgr_components} \title{Identify connected components of graph.} \usage{ -dodgr_components(graph) +dodgr_components(graph, strong = FALSE) } \arguments{ \item{graph}{A \code{data.frame} of edges} + +\item{strong}{A Boolean flag to indicate whether components should be strong, +i.e. its vertices connected bidirectionally. Defaults to FALSE.} } \value{ Equivalent graph with additional \code{component} column, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 733ce7d7..2a9cc090 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -190,13 +190,14 @@ BEGIN_RCPP END_RCPP } // rcpp_get_component_vector -Rcpp::List rcpp_get_component_vector(const Rcpp::DataFrame& graph); -RcppExport SEXP _dodgr_rcpp_get_component_vector(SEXP graphSEXP) { +Rcpp::List rcpp_get_component_vector(const Rcpp::DataFrame& graph, bool strong); +RcppExport SEXP _dodgr_rcpp_get_component_vector(SEXP graphSEXP, SEXP strongSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const Rcpp::DataFrame& >::type graph(graphSEXP); - rcpp_result_gen = Rcpp::wrap(rcpp_get_component_vector(graph)); + Rcpp::traits::input_parameter< bool >::type strong(strongSEXP); + rcpp_result_gen = Rcpp::wrap(rcpp_get_component_vector(graph, strong)); return rcpp_result_gen; END_RCPP } @@ -441,7 +442,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dodgr_rcpp_contract_graph", (DL_FUNC) &_dodgr_rcpp_contract_graph, 2}, {"_dodgr_rcpp_merge_cols", (DL_FUNC) &_dodgr_rcpp_merge_cols, 1}, {"_dodgr_rcpp_sample_graph", (DL_FUNC) &_dodgr_rcpp_sample_graph, 2}, - {"_dodgr_rcpp_get_component_vector", (DL_FUNC) &_dodgr_rcpp_get_component_vector, 1}, + {"_dodgr_rcpp_get_component_vector", (DL_FUNC) &_dodgr_rcpp_get_component_vector, 2}, {"_dodgr_rcpp_unique_rownames", (DL_FUNC) &_dodgr_rcpp_unique_rownames, 3}, {"_dodgr_rcpp_points_index_par", (DL_FUNC) &_dodgr_rcpp_points_index_par, 2}, {"_dodgr_rcpp_points_to_edges_par", (DL_FUNC) &_dodgr_rcpp_points_to_edges_par, 2}, diff --git a/src/dodgr.dll b/src/dodgr.dll new file mode 100644 index 00000000..c9b9ac15 Binary files /dev/null and b/src/dodgr.dll differ diff --git a/src/graph.cpp b/src/graph.cpp index 89b1f075..933d477d 100644 --- a/src/graph.cpp +++ b/src/graph.cpp @@ -120,7 +120,7 @@ bool graph::graph_from_df (const Rcpp::DataFrame &gr, vertex_map_t &vm, //' identify_graph_components //' //' Identify initial graph components for each **vertex** -//' Identification for edges is subsequently perrformed with +//' Identification for edges is subsequently performed with //' \code{rcpp_get_component_vector}. //' //' @param v unordered_map @@ -177,6 +177,112 @@ size_t graph::identify_graph_components (vertex_map_t &v, return static_cast (largest_id); } +//' strong_connect +//' +//' Helper function for applying Tarjan's algroithm to identify +//' strong components recursively +//' +//' @param vt id of vertex currently being evaluated +//' @param v unordered_map +//' @param com component map from each vertex to component numbers +//' @param index_map map from each vertex to index number +//' @param lowlink_map map from each vertex to lowlink number +//' @param on_stack set of all vertices currently on stack +//' @param s stack used to execute algorithm +//' @param index used to execute algoritm +//' @param compnum the current component number +//' @noRd +void graph::strong_connect(vertex_id_t vt, + vertex_map_t &v, + std::unordered_map &com, + std::unordered_map &index_map, + std::unordered_map &lowlink_map, + std::unordered_set &on_stack, + std::stack &s, + size_t &index, + size_t &compnum) +{ + + Rcpp::checkUserInterrupt (); + + index_map [vt] = index; + lowlink_map [vt] = index; + index++; + s.push (vt); + on_stack.insert (vt); + + vertex_t vtx = v.find (vt)->second; + std::unordered_set nbs = vtx.get_out_neighbours (); + + for (auto nvtx : nbs) + { + if (index_map.find (nvtx) == index_map.end ()) + { + strong_connect (nvtx, v, com, index_map, lowlink_map, on_stack, s, index, compnum); + lowlink_map [vt] = std::min (lowlink_map [vt], lowlink_map [nvtx]); + } + else if (on_stack.find (nvtx) != on_stack.end ()) + { + lowlink_map [vt] = std::min (lowlink_map [vt], index_map [nvtx]); + } + } + + if (lowlink_map [vt] == index_map [vt]) + { + vertex_id_t w; + do + { + w = s.top (); + s.pop (); + on_stack.erase (w); + com [w] = compnum; + } while (w != vt); + compnum++; + } +} + +//' identify_graph_strong_components +//' +//' Identify initial graph strong components for each **vertex** +//' Identification for edges is subsequently performed with +//' \code{rcpp_get_component_vector}. +//' +//' @param v unordered_map +//' @param com component map from each vertex to component numbers +//' @noRd +size_t graph::identify_graph_strong_components (vertex_map_t &v, + std::unordered_map &com) +{ + com.clear (); + + size_t index = 0; + size_t compnum = 0; + + std::unordered_map index_map; + std::unordered_map lowlink_map; + std::unordered_set on_stack; + std::stack s; + + for (auto it : v) + { + vertex_id_t vt = it.first; + if (index_map.find (vt) == index_map.end ()) + strong_connect (vt, v, com, index_map, lowlink_map, on_stack, s, index, compnum); + } + + long int largest_id = 0; + if (compnum > 0) + { + std::vector comp_sizes (compnum + 1, 0); + for (auto c: com) + comp_sizes [c.second]++; + auto maxi = std::max_element (comp_sizes.begin (), comp_sizes.end ()); + + largest_id = std::distance (comp_sizes.begin (), maxi); + } + + return static_cast (largest_id); +} //' rcpp_get_component_vector //' @@ -185,11 +291,14 @@ size_t graph::identify_graph_components (vertex_map_t &v, //' @param graph graph to be processed; stripped down and standardised to five //' columns //' +//' @param strong A Boolean flag to indicate whether components should be strong, +//' i.e. its vertices connected bidirectionally. Defaults to FALSE. +//' //' @return Two vectors: one of edge IDs and one of corresponding component //' numbers //' @noRd // [[Rcpp::export]] -Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph) +Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph, bool strong = false) { vertex_map_t vertices; edge_map_t edge_map; @@ -199,18 +308,21 @@ Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph) has_times = false; // rm unused variable warning std::unordered_map components; - size_t largest_component = - graph::identify_graph_components (vertices, components); + size_t largest_component; + + if (strong) + largest_component = graph::identify_graph_strong_components (vertices, components); + else + largest_component = graph::identify_graph_components (vertices, components); + largest_component++; // suppress unused variable warning // Then map component numbers of vertices onto edges std::unordered_map comp_nums; - for (auto ve: vert2edge_map) + for (auto e : edge_map) { - vertex_id_t vi = ve.first; - std::unordered_set edges = ve.second; - for (edge_id_t e: edges) - comp_nums.emplace (e, components.find (vi)->second); + vertex_id_t vi = e.second.get_from_vertex (); + comp_nums.emplace (e.first, components.find(vi)->second); } Rcpp::StringVector edge_id (comp_nums.size ()); diff --git a/src/graph.h b/src/graph.h index b59a4ca0..ab4d0be1 100644 --- a/src/graph.h +++ b/src/graph.h @@ -8,6 +8,7 @@ #include // stoi #include // round #include // isnan +#include const float INFINITE_FLOAT = std::numeric_limits ::max (); const double INFINITE_DOUBLE = std::numeric_limits ::max (); @@ -146,9 +147,23 @@ bool graph_from_df (const Rcpp::DataFrame &gr, vertex_map_t &vm, size_t identify_graph_components (vertex_map_t &v, std::unordered_map &com); +size_t identify_graph_strong_components (vertex_map_t &v, + std::unordered_map &com); + +void strong_connect (vertex_id_t vt, + vertex_map_t &v, + std::unordered_map &com, + std::unordered_map &index_map, + std::unordered_map &lowlink_map, + std::unordered_set &on_stack, + std::stack &s, + size_t &index, + size_t &compnum); + + } // end namespace graph -Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph); +Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph, bool strong); Rcpp::DataFrame rcpp_unique_rownames (Rcpp::DataFrame xyfrom, Rcpp::DataFrame xyto, const int precision);