mrc-ide · sangeetabhatia03 · Jul 10, 2024 · Apr 26, 2024 · Apr 29, 2024 · Apr 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -55,3 +55,4 @@ inst/doc
 tests/.DS_Store
 *.DS_Store
 tests/.DS_Store
+tests/.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: epireview
 Title: Tools to update and summarise the latest pathogen data from the Pathogen Epidemiology Review Group (PERG)
-Version: 1.2.11
+Version: 1.2.13
 Authors@R: c(
     person("Rebecca", "Nash", email = "r.nash@imperial.ac.uk", role = "aut",
            comment = c(ORCID = "0000-0002-5213-4364")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ export(append_new_entry_to_table)
 export(article_column_type)
 export(assign_qa_score)
 export(check_column_types)
+export(check_df_for_meta)
 export(check_ulim)
 export(color_palette)
 export(country_palette)
@@ -18,6 +19,7 @@ export(delay_table)
 export(delays_to_days)
 export(filter_cols)
 export(filter_df_for_metamean)
+export(filter_df_for_metaprop)
 export(forest_plot)
 export(forest_plot_delay_int)
 export(forest_plot_doubling_time)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# epireview 1.2.13
+
+* FEATURE: new function to format parameter dataset so that it can be input into
+meta-analysis of proportions.
+
 # epireview 1.2.12
 
 * FEATURE: Addresses #98 by including article_info as key column returned when parameter specific getters are invoked.

diff --git a/R/filter_df_for_metamean.R b/R/filter_df_for_metamean.R
@@ -14,7 +14,7 @@
 #' parameter_value_type, parameter_uncertainty_singe_type,
 #' parameter_uncertainty_type, parameter_uncertainty_lower_value,
 #' parameter_uncertainty_upper_value. This will typically be the `params`
-#' data.frame from the output of \code{load_epireview}.
+#' data.frame from the output of \code{load_epidata}.
 #'
 #'
 #' @return a parameter dataframe with relevant rows selected and additional columns
@@ -34,92 +34,49 @@
 #' ## o2h_df_filtered could then be used directly in meta analyses as:
 #' ## mtan <- metamean(data = o2h_df_filtered, ...)
 filter_df_for_metamean <- function(df) {
-
-  # must have the correct columns
   cols_needed <- c("parameter_value", "parameter_unit", "population_sample_size",
                    "parameter_value_type", "parameter_uncertainty_singe_type",
                    "parameter_uncertainty_type", "parameter_uncertainty_lower_value",
                    "parameter_uncertainty_upper_value")
-
-  if (!all(cols_needed %in% colnames(df))) {
-    cols_missing <- cols_needed[!cols_needed %in% colnames(df)]
-    cli_abort(
-      paste("df must have columns named: ", paste(cols_needed, collapse = ", "),
-      ". Columns missing: ", paste(cols_missing, collapse = ", "), sep=""),
-      call = NULL
-    )
-  }
-
-  ## Ensure that there is a single parameter type present
-  if(length(unique(df$parameter_type)) != 1) {
-    cli_abort("parameter_type must be the same across all values.", call = NULL)
-  }
-
-  ## First check that there are no rows where a value is present but unit is
-  ## missing, or vice versa
-  if(any(is.na(df$parameter_value) & !is.na(df$parameter_unit))) {
-    cli_inform("parameter_value must be present if parameter_unit is present.
-            Rows with non-NA parameter_value and NA parameter_unit will be
-            removed.")
-    df <- filter(df,!( is.na(.data[["parameter_value"]]) & !is.na(.data[["parameter_unit"]]) ))
-  }
-
-  if(any(!is.na(df$parameter_value) & is.na(df$parameter_unit))) {
-    cli_inform("parameter_unit is missing but parameter_value is present.
-            Rows with non-NA parameter_value and NA parameter_unit will be
-            removed."
-    )
-    df <- filter(df,!( is.na(.data[["parameter_value"]]) & 
-      !is.na(.data[["parameter_unit"]]) ))
-  }
-
-  # values of the parameter must all have the same units
-  if(length(unique(df$parameter_unit[!is.na(df$parameter_unit)])) != 1) {
-    msg1 <- "parameter_unit must be the same across all values."
-    msg2 <- "Consider calling delays_to_days() if you are working with delays."
-    cli_abort(paste(msg1, msg2), call = NULL)
-  }
-
-  df <- df %>% filter(!is.na(.data[["population_sample_size"]])) %>%
-    filter(!is.na(.data[["parameter_value"]])) %>%
-    filter(
-      (.data[["parameter_value_type"]] == 'Mean' & 
-        grepl(x = tolower(.data[["parameter_uncertainty_singe_type"]]), 
-          pattern = 'standard deviation')) |
-      (.data[["parameter_value_type"]] == 'Median' & 
-        grepl(x = tolower(.data[["parameter_uncertainty_type"]]), 
-          pattern = 'iqr')) |
-      (.data[["parameter_value_type"]] == 'Median' & 
-        grepl(x = tolower(.data[["parameter_uncertainty_type"]]), 
-          pattern = 'range'))
-    )
-
-  df <- mutate(
-    df,
-    xbar = ifelse(
-      .data[["parameter_value_type"]] == "Mean", .data[["parameter_value"]], NA),
-    median = ifelse(
-      .data[["parameter_value_type"]] == "Median", .data[["parameter_value"]], NA
-    ),
-    q1 = ifelse(
-      grepl(x = tolower(.data[["parameter_uncertainty_type"]]), "iqr"),
-      .data[["parameter_uncertainty_lower_value"]], NA),
-    q3 = ifelse(grepl(x = tolower(.data[["parameter_uncertainty_type"]]), "iqr"),
-      .data[["parameter_uncertainty_upper_value"]], NA),
-    min = ifelse(
-      grepl(
-        x = tolower(.data[["parameter_uncertainty_type"]]), pattern = "range"
-      ) &
-      !grepl(x = tolower(.data[["parameter_uncertainty_type"]]), "iqr"),
-      .data[["parameter_uncertainty_lower_value"]], NA
-    ),
-    max = ifelse(
-      grepl(x = tolower(.data[["parameter_uncertainty_type"]]), pattern = "range"
-      ) &
-      !grepl(x = tolower(.data[["parameter_uncertainty_type"]]), pattern = "iqr"),
-      .data[["parameter_uncertainty_upper_value"]], NA
-    )
+
+  df <- check_df_for_meta(df, cols_needed)
+
+  df <- df[!is.na(df[["population_sample_size"]]), ]
+  df <- df[!is.na(df[["parameter_value"]]), ]
+  df <- df[df[["parameter_value_type"]] == 'Mean' & 
+              grepl(x = tolower(df[["parameter_uncertainty_singe_type"]]), 
+                    pattern = 'standard deviation') |
+              df[["parameter_value_type"]] == 'Median' & 
+              grepl(x = tolower(df[["parameter_uncertainty_type"]]), 
+                    pattern = 'iqr') |
+              df[["parameter_value_type"]] == 'Median' & 
+              grepl(x = tolower(df[["parameter_uncertainty_type"]]), 
+                    pattern = 'range'), ]
+
+  df$xbar <- ifelse(
+    df[["parameter_value_type"]] == "Mean", df[["parameter_value"]], NA
   )
-
+  df$median <- ifelse(
+    df[["parameter_value_type"]] == "Median", df[["parameter_value"]], NA
+  )
+  df$q1 <- ifelse(
+    grepl(x = tolower(df[["parameter_uncertainty_type"]]), "iqr"),
+    df[["parameter_uncertainty_lower_value"]], NA
+  )
+  df$q3 <- ifelse(
+    grepl(x = tolower(df[["parameter_uncertainty_type"]]), "iqr"),
+    df[["parameter_uncertainty_upper_value"]], NA
+  )
+  df$min <- ifelse(
+    grepl(x = tolower(df[["parameter_uncertainty_type"]]), pattern = "range") &
+    !grepl(x = tolower(df[["parameter_uncertainty_type"]]), "iqr"),
+    df[["parameter_uncertainty_lower_value"]], NA
+  )
+  df$max <- ifelse(
+    grepl(x = tolower(df[["parameter_uncertainty_type"]]), pattern = "range") &
+    !grepl(x = tolower(df[["parameter_uncertainty_type"]]), pattern = "iqr"),
+    df[["parameter_uncertainty_upper_value"]], NA
+  )
+
   df
 }
diff --git a/R/filter_df_for_metaprop.R b/R/filter_df_for_metaprop.R
@@ -0,0 +1,50 @@
+#' Prepare parameter dataframe for meta analysis of proportions
+#' @details
+#' The function checks that the format of df is adequate for conducting a meta
+#' analysis of proportions. It filters the dataframe to only include rows that
+#' meet the required format by (1) removing rows where the denominator is missing,
+#' and (2) removing rows where both the numerator column or parameter value are 
+#' missing. If the numerator column is missing and the parameter value is present,
+#' the numerator is imputed as the parameter value divided by 100 times the
+#' denominator.
+#' 
+#' @param df a parameter dataframe. This must have columns for each of the
+#' following: parameter_value, parameter_unit, plus two columns for the
+#' numerator and the denominator of the proportion of interest.
+#' This dataframe will typically be the `params`
+#' data.frame from the output of \code{\link{load_epidata}}.
+#' @param num_col a string specifying the column name for the column containing
+#' the numerator of the proportion of interest.
+#' @param denom_col a string specifying the column name for the column
+#' containing the denominator of the proportion of interest.
+#'
+#' @return a parameter dataframe with relevant rows selected to enable
+#' meta analysis of proportions.
+#'
+#' @export
+#'
+#' @examples
+#' ## preparing data for meta analyses of CFR for Lassa
+#'
+#' df <- load_epidata("lassa")[["params"]]
+#' cfr_df <- df[df$parameter_type %in% "Severity - case fatality rate (CFR)",]
+#' cfr_filtered <- filter_df_for_metaprop(cfr_df,
+#'   num_col = "cfr_ifr_numerator", denom_col = "cfr_ifr_denominator")
+#' ## cfr_filtered could then be used directly in meta analyses as:
+#' ## mtan <- metaprop(data = cfr_filtered, ...)
+filter_df_for_metaprop <- function(df, num_col, denom_col) {
+  cols_needed <- c("parameter_value", "parameter_unit", num_col, denom_col)
+
+  df <- check_df_for_meta(df, cols_needed)
+
+  df <- df[!is.na(df[[denom_col]]), ]
+  df <- df[!(is.na(df[[num_col]]) & is.na(df[["parameter_value"]])), ]
+
+  df[[num_col]] <- ifelse(
+    is.na(df[[num_col]]) & !is.na(df[["parameter_value"]]),
+    round((df[["parameter_value"]]/100) * df[[denom_col]]),
+    df[[num_col]]
+  )
+
+  df
+}
diff --git a/R/load_epidata.R b/R/load_epidata.R
@@ -12,7 +12,7 @@
 #' are loaded via \code{\link{load_epidata}}.
 #'
 #' @param x data.frame containing a column called "parameter_type",  This will
-#' typically be the `params`data.frame from the output of \code{load_epireview}.
+#' typically be the `params`data.frame from the output of \code{load_epidata}.
 #' @param parameter_type_full optional. User can specify the full value of a
 #' parameter type not already included in the function.
 #' @param parameter_type_short optional. Shorter value of parameter_type_full

diff --git a/R/utils.R b/R/utils.R
@@ -1,3 +1,66 @@
+#' Sanity checks before meta-analysis
+#' @details
+#' The function carries out a series of checks on the parameter dataframe to
+#' ensure that it is in the correct format for conducting a meta-analysis. 
+#' It checks that the input (1) consists of a single parameter type; (2) has the
+#' columns required for the meta-analysis; (3) does not have any row where a 
+#' value is present but the unit is missing, or vice versa. All such rows are 
+#' removed; and (4) has the same unit across all values of the parameter. The 
+#' function will throw an error if either there is more than one value of 
+#' parameter_type, or if the columns needed for the meta-analysis are missing,
+#' or if the parameter_unit is not the same across all values of the parameter.
+#' @inheritParams filter_df_for_metaprop
+#' @param cols_needed a character vector specifying the names of the columns
+#' required for the meta-analysis.
+#' @return a parameter dataframe with offending rows removed.
+#' @export
+check_df_for_meta <- function(df, cols_needed) {
+
+  ## Ensure that there is a single parameter type present
+  if(length(unique(df$parameter_type)) != 1) {
+    cli_abort("parameter_type must be the same across all values.", call = NULL)
+  }
+
+  if (!all(cols_needed %in% colnames(df))) {
+    cols_missing <- cols_needed[!cols_needed %in% colnames(df)]
+    cli_abort(
+      "df must have columns named: {cols_needed}. 
+      Column{?s} missing: {cols_missing}",
+      call = NULL
+    )
+  }
+
+  ## First check that there are no rows where a value is present but unit is
+  ## missing, or vice versa
+  idx <- is.na(df$parameter_value) & !is.na(df$parameter_unit)
+  remove <- sum(idx)
+  if(any(idx)) {
+    cli_inform("parameter_value must be present if parameter_unit is present.
+            {remove} row{?s} with non-NA parameter_value and NA parameter_unit  
+            will be removed.")
+    df <- df[!(is.na(df$parameter_value) & !is.na(df$parameter_unit)), ]
+  }
+
+  idx <- !is.na(df$parameter_value) & is.na(df$parameter_unit)
+  remove <- sum(idx)
+  if(any(idx)) {
+    cli_inform("parameter_unit is missing but parameter_value is present.
+            {remove} row{?s} with non-NA parameter_value and NA parameter_unit 
+            will be removed.")
+    df <- df[!(is.na(df$parameter_value) & !is.na(df$parameter_unit)), ]
+  }
+
+  # values of the parameter must all have the same units
+  if(length(unique(df$parameter_unit[!is.na(df$parameter_unit)])) != 1) {
+    msg1 <- "parameter_unit must be the same across all values."
+    msg2 <- "Consider calling delays_to_days() if you are working with delays."
+    cli_abort(paste(msg1, msg2), call = NULL)
+  }
+
+
+
+  df
+}
 #' Check upper limit of parameter values
 #'
 #' @details

diff --git a/man/check_df_for_meta.Rd b/man/check_df_for_meta.Rd
diff --git a/man/filter_df_for_metamean.Rd b/man/filter_df_for_metamean.Rd
diff --git a/man/filter_df_for_metaprop.Rd b/man/filter_df_for_metaprop.Rd