-
Notifications
You must be signed in to change notification settings - Fork 1
Add mgx mtx function #28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1e95783
c8e7155
c0e989f
c295340
5ec6988
5592946
f760a1c
cf4d8f1
02d852d
6b89442
eae2b54
8b80444
f467117
4d3923b
2829f0c
8210781
ea327bd
fab1aad
a808d02
9d29f3b
cad4045
c6d5646
3a51ec2
9a43554
8567d12
06b1287
d690c7e
1d4f293
a185c5a
7428a5f
2dec46a
610d251
a35985b
6b44a86
8664c45
af85b45
e45b8cf
478318a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| --- | ||
| title: "Benchmark mtxDE with paired metagenomics data" | ||
| author: "Kat Terwelp" | ||
| format: html | ||
| editor: visual | ||
| --- | ||
|
|
||
| ## Benchmark mtxDE with paired metagenomics data | ||
|
|
||
| ```{r setup, include=FALSE} | ||
| # Load required libraries | ||
| library(dplyr) | ||
| library(stringr) | ||
| library(ggplot2) | ||
| library(caret) | ||
| ``` | ||
|
|
||
| **Step One: Download Data** | ||
|
|
||
| ```{r} | ||
| # if synth_mgx_mtx data is not already downloaded, download it | ||
| if (!dir.exists("synth_mgx_mtx")){ | ||
| utils::download.file(url = "http://199.94.60.28/MTX_2021/synth_mgx_mtx.tar.gz", | ||
| destfile = "synth_mgx_mtx.tar.gz") | ||
| utils::untar(tarfile = "synth_mgx_mtx.tar.gz", exdir = "synth_mgx_mtx") | ||
| } | ||
| ``` | ||
|
|
||
| **Step Two: Import metadata, feature table, and dna table** | ||
|
|
||
| ```{r} | ||
| # Read in null df for RNA features | ||
| null.df <- data.table::fread("synth_mgx_mtx/group-null-enc.mtx_abunds_groups.tsv", | ||
| data.table=F) | ||
| null.mgx.df <- data.table::fread("synth_mgx_mtx/group-null-enc.mgx_abunds_groups.tsv", | ||
| data.table=F) | ||
|
|
||
| # Create metadata | ||
| metadata <- data.frame(phenotype=t(null.df[1,]), | ||
| rna_depth=t(null.df[2,]), | ||
| dna_depth=t(null.mgx.df[2,])) | ||
| metadata <- metadata[2:nrow(metadata),] | ||
| colnames(metadata) <- c("phenotype", "rna_depth", "dna_depth") | ||
| metadata$SampleID <- rownames(metadata) | ||
| metadata$phenotype <- as.numeric(metadata$phenotype) | ||
|
|
||
| # Filter the df into a feature table where the samples are rows | ||
| null.feature.table <- null.df[3:nrow(null.df),] | ||
| null.feature.table <- as.data.frame(t(null.feature.table)) | ||
| colnames(null.feature.table) <- null.feature.table["#",] | ||
| null.feature.table <- null.feature.table[2:nrow(null.feature.table),] | ||
| null.feature.table <- dplyr::mutate_all(null.feature.table, as.numeric) | ||
|
|
||
|
|
||
| # Filter the dna df into a dna table where the samples are rows | ||
| null.dna.table <- null.mgx.df[3:nrow(null.mgx.df),] | ||
| null.dna.table <- as.data.frame(t(null.dna.table)) | ||
| colnames(null.dna.table) <- null.dna.table["#",] | ||
| null.dna.table <- null.dna.table[2:nrow(null.dna.table),] | ||
| null.dna.table<- dplyr::mutate_all(null.dna.table, as.numeric) | ||
| ``` | ||
|
|
||
| ```{r} | ||
| # make sure there's no NA values | ||
| null.feature.table[is.na(null.feature.table)] <- 0 | ||
| null.dna.table[is.na(null.dna.table)] <- 0 | ||
| # transform into relative abundance | ||
| null.feature.table.rel <- null.feature.table/rowSums(null.feature.table) | ||
| null.dna.table.rel <- null.dna.table/rowSums(null.dna.table) | ||
|
|
||
|
|
||
| ``` | ||
|
|
||
| ```{r} | ||
| # run the null model if not already available | ||
| if (!file.exists("null_res.RDS")){ | ||
| null.res <- run_mtxDE(formula="phenotype", | ||
| feature.table=null.feature.table.rel, | ||
| metadata=metadata, sampleID="SampleID", reg.method = "zibr", | ||
| ncores = 4, | ||
| dna.table = null.dna.table.rel | ||
| ) | ||
|
|
||
| saveRDS(null.res, file = "null_res.RDS") | ||
| } else { | ||
| null.res <- readRDS("null_res.RDS") | ||
| } | ||
| ``` | ||
|
|
||
| ```{r} | ||
| hist(null.res$q) | ||
|
|
||
| null.res <- null.res %>% | ||
| dplyr::mutate(mgx = dplyr::if_else(stringr::str_detect(term, "_mgx$"), TRUE, FALSE), | ||
| significant = if_else(q < 0.05, TRUE, FALSE)) | ||
|
|
||
| null.res %>% | ||
| ggplot2::ggplot(aes(x = mgx)) + | ||
| ggplot2::geom_bar(ggplot2::aes(fill = significant)) | ||
|
|
||
| null.res.filt <- null.res %>% | ||
| dplyr::filter(term != "intercept") %>% | ||
| dplyr::filter(parameter == "beta") %>% | ||
| dplyr::mutate(expected_significance = mgx) | ||
|
|
||
| null.res.filt %>% | ||
| ggplot2::ggplot(aes(x = mgx)) + | ||
| ggplot2::geom_bar(ggplot2::aes(fill = significant)) | ||
|
|
||
| # confusion matrix | ||
| predicted <- factor(null.res.filt$significant) | ||
| expected <- factor(null.res.filt$expected_significance) | ||
|
|
||
| caret::confusionMatrix(data=predicted, reference = expected) | ||
|
|
||
|
|
||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| --- | ||
| title: "paired_mtxDE_sims" | ||
| author: "John Sterrett" | ||
| date: "2025-04-17" | ||
| output: html_document | ||
| --- | ||
|
|
||
| ```{r setup, include=FALSE} | ||
| knitr::opts_chunk$set(echo = TRUE) | ||
| library(tidyverse) | ||
| library(ggplot2) | ||
| ``` | ||
|
|
||
| # Simulate normal data | ||
| ```{r} | ||
| set.seed(42) | ||
|
|
||
| simulate_norm_run_lm <- function(n=1000, var_explain_by_x=1){ | ||
| x <- rnorm(n=n) | ||
| y_noise <- (1-var_explain_by_x) * rnorm(n=n) | ||
| y <- (var_explain_by_x * x) + y_noise | ||
|
|
||
| df <- data.frame(x=x,y=y) | ||
|
|
||
| return(broom::tidy(lm(y ~ x, df))) | ||
| } | ||
|
|
||
| # set parameters | ||
| N <- 1E5 | ||
| vars_explained <- seq(-2,2,0.1) | ||
| results <- data.frame(matrix(nrow=0, ncol=6)) | ||
| colnames(results) <- c("term", "estimate", "std.error", | ||
| "statistic", "p.value", "true_effect") | ||
|
|
||
| # try simulating with different variance explained | ||
| for (var in vars_explained){ | ||
| new_res <- simulate_norm_run_lm(n=N, var_explain_by_x=var) | ||
| new_res$true_effect <- var | ||
| results <- rbind(results, new_res) | ||
| } | ||
|
|
||
| results %>% | ||
| filter(term=="x") %>% | ||
| ggplot(mapping=aes(x=true_effect, y=estimate)) + | ||
| geom_point() | ||
|
|
||
| ``` | ||
|
|
||
|
|
||
| ```{r} | ||
| x <- rnbinom(n=1000, size=5, mu=100) | ||
| hist(x) | ||
| zero_indexes <- rbinom(n=1000, size=1, prob=0.2) | ||
| x[which(zero_indexes==1)] <- 0 | ||
| hist(x) | ||
|
|
||
| simulate_zero_inflated_rnbinom <- function(n=1000, size=5, mu=100, zero_prob=0.2, var_explain_by_x=1){ | ||
| x <- rnbinom(n=n, size=size, mu=mu) | ||
|
|
||
| y_base <- (1-var_explain_by_x) * rnbinom(n=n, size=size, mu=mu) | ||
| # add effect of x | ||
| y <- (var_explain_by_x * x) + y_base | ||
| # zero inflate our data | ||
| y[which(rbinom(n, size=1, prob=zero_prob)==1)] <- 0 | ||
|
|
||
| df <- data.frame(x=x,y=y) | ||
| return(df) | ||
| } | ||
|
|
||
|
|
||
| simulate_zi_rnbinom_run_lm <- function(n=1000, size=5, mu=100, | ||
| zero_prob=0.2, | ||
| var_explain_by_x=1){ | ||
| df <- simulate_zero_inflated_rnbinom(n, size, mu, zero_prob, var_explain_by_x) | ||
| return(broom::tidy(lm(y ~ x, df))) | ||
| } | ||
|
|
||
| df <- simulate_zero_inflated_rnbinom(var_explain_by_x=0.4) | ||
| ggplot(df, mapping=aes(x=x, y=y)) + | ||
| geom_point() + | ||
| geom_smooth(method="lm") | ||
|
|
||
|
|
||
|
|
||
| # set parameters | ||
| N <- 200 | ||
| vars_explained <- seq(-2,2,0.1) | ||
| results <- data.frame(matrix(nrow=0, ncol=6)) | ||
| colnames(results) <- c("term", "estimate", "std.error", | ||
| "statistic", "p.value", "true_effect") | ||
|
|
||
| # try simulating with different variance explained | ||
| for (var in vars_explained){ | ||
| new_res <- simulate_rnbinom_run_lm(n=N, | ||
| size=1, | ||
| var_explain_by_x=var) | ||
| new_res$true_effect <- var | ||
| results <- rbind(results, new_res) | ||
| } | ||
|
|
||
| results %>% | ||
| filter(term=="x") %>% | ||
| ggplot(mapping=aes(x=true_effect, y=estimate)) + | ||
| geom_point() | ||
|
|
||
| ``` | ||
|
|
||
| # MGX-MTX | ||
| ```{r} | ||
| # simulate MGX ~ neg binomial with zero inflation | ||
|
|
||
| # simulate MTX ~ MGX * noise (multiply to keep zeros zero) | ||
|
|
||
| # simulate MTX ~ MGX * noise * treatment_effect | ||
|
|
||
|
|
||
| ``` |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
specify in docstrings that feature.table is mtx
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
addressed 478318a