-
Notifications
You must be signed in to change notification settings - Fork 19
Description
Hello,
I’m running MultiNicheNet on a mouse scRNA-seq dataset and I’m getting an error when running get_DE_info() only when I try to account for batch (either via the batches argument or by including batch as a covariate).
When I do not include batch, and only gender as covariates, get_DE_info() runs and i have got the prioritisations result
When I do include batch, I get the following error (screenshot):
Data preprocessing / what I did before calling get_DE_info
I start from a full macrophage object and then:
Restrict to F7-only genotypes:
Keep only F7Ctrl and F7LysM.
Drop all other genotypes and unused factor levels.
Restrict to relevant batches: Keep only batches that belong to those F7 samples.
Drop all other batches + unused levels.
Filter cell types:Remove low-abundance clusters (cell types) with too few cells overall.
Additionally remove cell types that don’t have enough cells in at least 2 samples per genotype. from( abundance_info function result)
So every remaining cell type should have:
cells in both F7Ctrl and F7LysM,
and enough cells per group to perform DE.

code
--- Clean column names ---
colData(data)$Sample <- make.names(as.character(colData(data)$Sample))
colData(data)$Genotype <- make.names(as.character(colData(data)$Genotype))
colData(data)$label <- make.names(as.character(colData(data)$label))
colData(data)$batch <- make.names(as.character(colData(data)$batch))
--- Subset to F7Ctrl / F7LysM only ---
sce_F7 <- data[, colData(data)$Genotype %in% c("F7Ctrl", "F7LysM")]
sce_F7$Genotype <- droplevels(factor(sce_F7$Genotype))
sce_F7$batch <- droplevels(factor(sce_F7$batch))
sce_F7$label <- droplevels(factor(sce_F7$label))
--- Remove low-abundance cell types ---
min_cells_cluster <- 50
tab_label_F7 <- table(sce_F7$label)
labels_keep_F7 <- names(tab_label_F7)[tab_label_F7 >= min_cells_cluster]
sce_F7 <- sce_F7[, sce_F7$label %in% labels_keep_F7]
sce_F7$label <- droplevels(sce_F7$label)
6) Parameters -----------------------------------------------------------
min_cells <- 10 # same meaning as before
min_cells_DE <- 5 # more lenient for DE if you want
fraction_cutoff <- 0.05
min_sample_prop <- 0.5
HERE we are allowed to use batch within F7:
batches_F7 <- "batch"
covariates_F7 <- c("Gender") # you can later add "Gender" if it's safe
F7 contrast only
contrasts_oi_F7 <- c("'F7Ctrl-F7LysM'")
7) Abundance info (F7) --------------------------------------------------
table(colData(sce_F7)$Genotype, colData(sce_F7)$batch)
table(colData(sce_F7)$Gender, colData(sce_F7)$Genotype)
abundance_info_F7 <- get_abundance_info(
sce = sce_F7,
sample_id = sample_id,
group_id = group_id,
celltype_id = celltype_id,
min_cells = min_cells,
senders_oi = senders_oi_F7,
receivers_oi = receivers_oi_F7,
batches = batches_F7
)
abundance_df_summarized_F7 <- abundance_info_F7$abundance_data %>%
mutate(keep = as.logical(keep)) %>%
group_by(group_id, celltype_id) %>%
summarise(samples_present = sum(keep), .groups = "drop")
total_nr_conditions_F7 <- SummarizedExperiment::colData(sce_F7)[, group_id] %>%
unique() %>% length()
celltypes_absent_one_condition_F7 <- abundance_df_summarized_F7 %>%
filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
celltypes_present_one_condition_F7 <- abundance_df_summarized_F7 %>%
filter(samples_present >= 2) %>% pull(celltype_id) %>% unique()
condition_specific_celltypes_F7 <- intersect(
celltypes_absent_one_condition_F7,
celltypes_present_one_condition_F7
)
absent_celltypes_F7 <- abundance_df_summarized_F7 %>%
filter(samples_present < 2) %>%
group_by(celltype_id) %>%
count() %>%
filter(n == total_nr_conditions_F7) %>%
pull(celltype_id)
sce_F7 <- sce_F7[, colData(sce_F7)$label %in% absent_celltypes_F7]
sce_F7$label <- droplevels(sce_F7$label)
analyse_condition_specific_celltypes <- FALSE
if (analyse_condition_specific_celltypes) {
senders_oi_F7 <- senders_oi_F7 %>% setdiff(absent_celltypes_F7)
receivers_oi_F7 <- receivers_oi_F7 %>% setdiff(absent_celltypes_F7)
} else {
senders_oi_F7 <- senders_oi_F7 %>%
setdiff(union(absent_celltypes_F7, condition_specific_celltypes_F7))
receivers_oi_F7 <- receivers_oi_F7 %>%
setdiff(union(absent_celltypes_F7, condition_specific_celltypes_F7))
}
sce_F7 <- sce_F7[, SummarizedExperiment::colData(sce_F7)[, celltype_id] %in%
c(senders_oi_F7, receivers_oi_F7)]
8) Gene filtering (F7) --------------------------------------------------
frq_list_F7 <- get_frac_exprs(
sce = sce_F7,
sample_id = sample_id,
celltype_id = celltype_id,
group_id = group_id,
batches = batches_F7,
min_cells = min_cells,
fraction_cutoff = fraction_cutoff,
min_sample_prop = min_sample_prop
)
sum(frq_list_F7$expressed_df$expressed)
length(unique(frq_list_F7$expressed_df$gene[frq_list_F7$expressed_df$expressed]))
genes_oi_F7 <- frq_list_F7$expressed_df %>%
filter(expressed == TRUE) %>% pull(gene) %>% unique()
sce_F7 <- sce_F7[genes_oi_F7, ]
abundance_expression_info_F7 <- process_abundance_expression_info(
sce = sce_F7,
sample_id = sample_id,
group_id = group_id,
celltype_id = celltype_id,
min_cells = min_cells,
senders_oi = senders_oi_F7,
receivers_oi = receivers_oi_F7,
lr_network = lr_network,
batches = batches_F7,
frq_list = frq_list_F7,
abundance_info = abundance_info_F7
)
--- DE step that triggers the error when including batch ---
DE_info_F7 <- get_DE_info(
sce = sce_F7,
sample_id = "Sample",
group_id = "Genotype",
celltype_id = "label",
batches = "batch", # if I include this → error
covariates = c("Gender"), # same error if I set covariates = "batch"
contrasts_oi = "'F7Ctrl-F7LysM'",
min_cells = 10,
expressed_df = frq_list_F7$expressed_df
)