Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ inst/doc
tests/.DS_Store
*.DS_Store
tests/.DS_Store
tests/.DS_Store
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: epireview
Title: Tools to update and summarise the latest pathogen data from the Pathogen Epidemiology Review Group (PERG)
Version: 1.2.11
Version: 1.2.13
Authors@R: c(
person("Rebecca", "Nash", email = "r.nash@imperial.ac.uk", role = "aut",
comment = c(ORCID = "0000-0002-5213-4364")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(append_new_entry_to_table)
export(article_column_type)
export(assign_qa_score)
export(check_column_types)
export(check_df_for_meta)
export(check_ulim)
export(color_palette)
export(country_palette)
Expand All @@ -18,6 +19,7 @@ export(delay_table)
export(delays_to_days)
export(filter_cols)
export(filter_df_for_metamean)
export(filter_df_for_metaprop)
export(forest_plot)
export(forest_plot_delay_int)
export(forest_plot_doubling_time)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# epireview 1.2.13

* FEATURE: new function to format parameter dataset so that it can be input into
meta-analysis of proportions.

# epireview 1.2.12

* FEATURE: Addresses #98 by including article_info as key column returned when parameter specific getters are invoked.
Expand Down
123 changes: 40 additions & 83 deletions R/filter_df_for_metamean.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' parameter_value_type, parameter_uncertainty_singe_type,
#' parameter_uncertainty_type, parameter_uncertainty_lower_value,
#' parameter_uncertainty_upper_value. This will typically be the `params`
#' data.frame from the output of \code{load_epireview}.
#' data.frame from the output of \code{load_epidata}.
#'
#'
#' @return a parameter dataframe with relevant rows selected and additional columns
Expand All @@ -34,92 +34,49 @@
#' ## o2h_df_filtered could then be used directly in meta analyses as:
#' ## mtan <- metamean(data = o2h_df_filtered, ...)
filter_df_for_metamean <- function(df) {

# must have the correct columns
cols_needed <- c("parameter_value", "parameter_unit", "population_sample_size",
"parameter_value_type", "parameter_uncertainty_singe_type",
"parameter_uncertainty_type", "parameter_uncertainty_lower_value",
"parameter_uncertainty_upper_value")

if (!all(cols_needed %in% colnames(df))) {
cols_missing <- cols_needed[!cols_needed %in% colnames(df)]
cli_abort(
paste("df must have columns named: ", paste(cols_needed, collapse = ", "),
". Columns missing: ", paste(cols_missing, collapse = ", "), sep=""),
call = NULL
)
}

## Ensure that there is a single parameter type present
if(length(unique(df$parameter_type)) != 1) {
cli_abort("parameter_type must be the same across all values.", call = NULL)
}

## First check that there are no rows where a value is present but unit is
## missing, or vice versa
if(any(is.na(df$parameter_value) & !is.na(df$parameter_unit))) {
cli_inform("parameter_value must be present if parameter_unit is present.
Rows with non-NA parameter_value and NA parameter_unit will be
removed.")
df <- filter(df,!( is.na(.data[["parameter_value"]]) & !is.na(.data[["parameter_unit"]]) ))
}

if(any(!is.na(df$parameter_value) & is.na(df$parameter_unit))) {
cli_inform("parameter_unit is missing but parameter_value is present.
Rows with non-NA parameter_value and NA parameter_unit will be
removed."
)
df <- filter(df,!( is.na(.data[["parameter_value"]]) &
!is.na(.data[["parameter_unit"]]) ))
}

# values of the parameter must all have the same units
if(length(unique(df$parameter_unit[!is.na(df$parameter_unit)])) != 1) {
msg1 <- "parameter_unit must be the same across all values."
msg2 <- "Consider calling delays_to_days() if you are working with delays."
cli_abort(paste(msg1, msg2), call = NULL)
}

df <- df %>% filter(!is.na(.data[["population_sample_size"]])) %>%
filter(!is.na(.data[["parameter_value"]])) %>%
filter(
(.data[["parameter_value_type"]] == 'Mean' &
grepl(x = tolower(.data[["parameter_uncertainty_singe_type"]]),
pattern = 'standard deviation')) |
(.data[["parameter_value_type"]] == 'Median' &
grepl(x = tolower(.data[["parameter_uncertainty_type"]]),
pattern = 'iqr')) |
(.data[["parameter_value_type"]] == 'Median' &
grepl(x = tolower(.data[["parameter_uncertainty_type"]]),
pattern = 'range'))
)

df <- mutate(
df,
xbar = ifelse(
.data[["parameter_value_type"]] == "Mean", .data[["parameter_value"]], NA),
median = ifelse(
.data[["parameter_value_type"]] == "Median", .data[["parameter_value"]], NA
),
q1 = ifelse(
grepl(x = tolower(.data[["parameter_uncertainty_type"]]), "iqr"),
.data[["parameter_uncertainty_lower_value"]], NA),
q3 = ifelse(grepl(x = tolower(.data[["parameter_uncertainty_type"]]), "iqr"),
.data[["parameter_uncertainty_upper_value"]], NA),
min = ifelse(
grepl(
x = tolower(.data[["parameter_uncertainty_type"]]), pattern = "range"
) &
!grepl(x = tolower(.data[["parameter_uncertainty_type"]]), "iqr"),
.data[["parameter_uncertainty_lower_value"]], NA
),
max = ifelse(
grepl(x = tolower(.data[["parameter_uncertainty_type"]]), pattern = "range"
) &
!grepl(x = tolower(.data[["parameter_uncertainty_type"]]), pattern = "iqr"),
.data[["parameter_uncertainty_upper_value"]], NA
)

df <- check_df_for_meta(df, cols_needed)

df <- df[!is.na(df[["population_sample_size"]]), ]
df <- df[!is.na(df[["parameter_value"]]), ]
df <- df[df[["parameter_value_type"]] == 'Mean' &
grepl(x = tolower(df[["parameter_uncertainty_singe_type"]]),
pattern = 'standard deviation') |
df[["parameter_value_type"]] == 'Median' &
grepl(x = tolower(df[["parameter_uncertainty_type"]]),
pattern = 'iqr') |
df[["parameter_value_type"]] == 'Median' &
grepl(x = tolower(df[["parameter_uncertainty_type"]]),
pattern = 'range'), ]

df$xbar <- ifelse(
df[["parameter_value_type"]] == "Mean", df[["parameter_value"]], NA
)

df$median <- ifelse(
df[["parameter_value_type"]] == "Median", df[["parameter_value"]], NA
)
df$q1 <- ifelse(
grepl(x = tolower(df[["parameter_uncertainty_type"]]), "iqr"),
df[["parameter_uncertainty_lower_value"]], NA
)
df$q3 <- ifelse(
grepl(x = tolower(df[["parameter_uncertainty_type"]]), "iqr"),
df[["parameter_uncertainty_upper_value"]], NA
)
df$min <- ifelse(
grepl(x = tolower(df[["parameter_uncertainty_type"]]), pattern = "range") &
!grepl(x = tolower(df[["parameter_uncertainty_type"]]), "iqr"),
df[["parameter_uncertainty_lower_value"]], NA
)
df$max <- ifelse(
grepl(x = tolower(df[["parameter_uncertainty_type"]]), pattern = "range") &
!grepl(x = tolower(df[["parameter_uncertainty_type"]]), pattern = "iqr"),
df[["parameter_uncertainty_upper_value"]], NA
)

df
}
50 changes: 50 additions & 0 deletions R/filter_df_for_metaprop.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Prepare parameter dataframe for meta analysis of proportions
#' @details
#' The function checks that the format of df is adequate for conducting a meta
#' analysis of proportions. It filters the dataframe to only include rows that
#' meet the required format by (1) removing rows where the denominator is missing,
#' and (2) removing rows where both the numerator column or parameter value are
#' missing. If the numerator column is missing and the parameter value is present,
#' the numerator is imputed as the parameter value divided by 100 times the
#' denominator.
#'
#' @param df a parameter dataframe. This must have columns for each of the
#' following: parameter_value, parameter_unit, plus two columns for the
#' numerator and the denominator of the proportion of interest.
#' This dataframe will typically be the `params`
#' data.frame from the output of \code{\link{load_epidata}}.
#' @param num_col a string specifying the column name for the column containing
#' the numerator of the proportion of interest.
#' @param denom_col a string specifying the column name for the column
#' containing the denominator of the proportion of interest.
#'
#' @return a parameter dataframe with relevant rows selected to enable
#' meta analysis of proportions.
#'
#' @export
#'
#' @examples
#' ## preparing data for meta analyses of CFR for Lassa
#'
#' df <- load_epidata("lassa")[["params"]]
#' cfr_df <- df[df$parameter_type %in% "Severity - case fatality rate (CFR)",]
#' cfr_filtered <- filter_df_for_metaprop(cfr_df,
#' num_col = "cfr_ifr_numerator", denom_col = "cfr_ifr_denominator")
#' ## cfr_filtered could then be used directly in meta analyses as:
#' ## mtan <- metaprop(data = cfr_filtered, ...)
filter_df_for_metaprop <- function(df, num_col, denom_col) {
cols_needed <- c("parameter_value", "parameter_unit", num_col, denom_col)

df <- check_df_for_meta(df, cols_needed)

df <- df[!is.na(df[[denom_col]]), ]
df <- df[!(is.na(df[[num_col]]) & is.na(df[["parameter_value"]])), ]

df[[num_col]] <- ifelse(
is.na(df[[num_col]]) & !is.na(df[["parameter_value"]]),
round((df[["parameter_value"]]/100) * df[[denom_col]]),
df[[num_col]]
)

df
}
2 changes: 1 addition & 1 deletion R/load_epidata.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' are loaded via \code{\link{load_epidata}}.
#'
#' @param x data.frame containing a column called "parameter_type", This will
#' typically be the `params`data.frame from the output of \code{load_epireview}.
#' typically be the `params`data.frame from the output of \code{load_epidata}.
#' @param parameter_type_full optional. User can specify the full value of a
#' parameter type not already included in the function.
#' @param parameter_type_short optional. Shorter value of parameter_type_full
Expand Down
63 changes: 63 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,66 @@
#' Sanity checks before meta-analysis
#' @details
#' The function carries out a series of checks on the parameter dataframe to
#' ensure that it is in the correct format for conducting a meta-analysis.
#' It checks that the input (1) consists of a single parameter type; (2) has the
#' columns required for the meta-analysis; (3) does not have any row where a
#' value is present but the unit is missing, or vice versa. All such rows are
#' removed; and (4) has the same unit across all values of the parameter. The
#' function will throw an error if either there is more than one value of
#' parameter_type, or if the columns needed for the meta-analysis are missing,
#' or if the parameter_unit is not the same across all values of the parameter.
#' @inheritParams filter_df_for_metaprop
#' @param cols_needed a character vector specifying the names of the columns
#' required for the meta-analysis.
#' @return a parameter dataframe with offending rows removed.
#' @export
check_df_for_meta <- function(df, cols_needed) {

## Ensure that there is a single parameter type present
if(length(unique(df$parameter_type)) != 1) {
cli_abort("parameter_type must be the same across all values.", call = NULL)
}

if (!all(cols_needed %in% colnames(df))) {
cols_missing <- cols_needed[!cols_needed %in% colnames(df)]
cli_abort(
"df must have columns named: {cols_needed}.
Column{?s} missing: {cols_missing}",
call = NULL
)
}

## First check that there are no rows where a value is present but unit is
## missing, or vice versa
idx <- is.na(df$parameter_value) & !is.na(df$parameter_unit)
remove <- sum(idx)
if(any(idx)) {
cli_inform("parameter_value must be present if parameter_unit is present.
{remove} row{?s} with non-NA parameter_value and NA parameter_unit
will be removed.")
df <- df[!(is.na(df$parameter_value) & !is.na(df$parameter_unit)), ]
}

idx <- !is.na(df$parameter_value) & is.na(df$parameter_unit)
remove <- sum(idx)
if(any(idx)) {
cli_inform("parameter_unit is missing but parameter_value is present.
{remove} row{?s} with non-NA parameter_value and NA parameter_unit
will be removed.")
df <- df[!(is.na(df$parameter_value) & !is.na(df$parameter_unit)), ]
}

# values of the parameter must all have the same units
if(length(unique(df$parameter_unit[!is.na(df$parameter_unit)])) != 1) {
msg1 <- "parameter_unit must be the same across all values."
msg2 <- "Consider calling delays_to_days() if you are working with delays."
cli_abort(paste(msg1, msg2), call = NULL)
}



df
}
#' Check upper limit of parameter values
#'
#' @details
Expand Down
35 changes: 35 additions & 0 deletions man/check_df_for_meta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/filter_df_for_metamean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions man/filter_df_for_metaprop.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading