Skip to content

Error in get_DE_info when including batch as batch or covariate after F7-only subsetting #120

@nourmh11

Description

@nourmh11

Hello,

I’m running MultiNicheNet on a mouse scRNA-seq dataset and I’m getting an error when running get_DE_info() only when I try to account for batch (either via the batches argument or by including batch as a covariate).

When I do not include batch, and only gender as covariates, get_DE_info() runs and i have got the prioritisations result
When I do include batch, I get the following error (screenshot):

Image

Data preprocessing / what I did before calling get_DE_info
I start from a full macrophage object and then:
Restrict to F7-only genotypes:
Keep only F7Ctrl and F7LysM.
Drop all other genotypes and unused factor levels.
Restrict to relevant batches: Keep only batches that belong to those F7 samples.
Drop all other batches + unused levels.
Filter cell types:Remove low-abundance clusters (cell types) with too few cells overall.
Additionally remove cell types that don’t have enough cells in at least 2 samples per genotype. from( abundance_info function result)
So every remaining cell type should have:
cells in both F7Ctrl and F7LysM,
and enough cells per group to perform DE.
Image

code

--- Clean column names ---

colData(data)$Sample <- make.names(as.character(colData(data)$Sample))
colData(data)$Genotype <- make.names(as.character(colData(data)$Genotype))
colData(data)$label <- make.names(as.character(colData(data)$label))
colData(data)$batch <- make.names(as.character(colData(data)$batch))

--- Subset to F7Ctrl / F7LysM only ---

sce_F7 <- data[, colData(data)$Genotype %in% c("F7Ctrl", "F7LysM")]
sce_F7$Genotype <- droplevels(factor(sce_F7$Genotype))
sce_F7$batch <- droplevels(factor(sce_F7$batch))
sce_F7$label <- droplevels(factor(sce_F7$label))

--- Remove low-abundance cell types ---

min_cells_cluster <- 50
tab_label_F7 <- table(sce_F7$label)
labels_keep_F7 <- names(tab_label_F7)[tab_label_F7 >= min_cells_cluster]

sce_F7 <- sce_F7[, sce_F7$label %in% labels_keep_F7]
sce_F7$label <- droplevels(sce_F7$label)

6) Parameters -----------------------------------------------------------

min_cells <- 10 # same meaning as before
min_cells_DE <- 5 # more lenient for DE if you want
fraction_cutoff <- 0.05
min_sample_prop <- 0.5

HERE we are allowed to use batch within F7:

batches_F7 <- "batch"
covariates_F7 <- c("Gender") # you can later add "Gender" if it's safe

F7 contrast only

contrasts_oi_F7 <- c("'F7Ctrl-F7LysM'")

7) Abundance info (F7) --------------------------------------------------

table(colData(sce_F7)$Genotype, colData(sce_F7)$batch)
table(colData(sce_F7)$Gender, colData(sce_F7)$Genotype)

abundance_info_F7 <- get_abundance_info(
sce = sce_F7,
sample_id = sample_id,
group_id = group_id,
celltype_id = celltype_id,
min_cells = min_cells,
senders_oi = senders_oi_F7,
receivers_oi = receivers_oi_F7,
batches = batches_F7
)

abundance_df_summarized_F7 <- abundance_info_F7$abundance_data %>%
mutate(keep = as.logical(keep)) %>%
group_by(group_id, celltype_id) %>%
summarise(samples_present = sum(keep), .groups = "drop")

total_nr_conditions_F7 <- SummarizedExperiment::colData(sce_F7)[, group_id] %>%
unique() %>% length()

celltypes_absent_one_condition_F7 <- abundance_df_summarized_F7 %>%
filter(samples_present == 0) %>% pull(celltype_id) %>% unique()

celltypes_present_one_condition_F7 <- abundance_df_summarized_F7 %>%
filter(samples_present >= 2) %>% pull(celltype_id) %>% unique()

condition_specific_celltypes_F7 <- intersect(
celltypes_absent_one_condition_F7,
celltypes_present_one_condition_F7
)

absent_celltypes_F7 <- abundance_df_summarized_F7 %>%
filter(samples_present < 2) %>%
group_by(celltype_id) %>%
count() %>%
filter(n == total_nr_conditions_F7) %>%
pull(celltype_id)
sce_F7 <- sce_F7[, colData(sce_F7)$label %in% absent_celltypes_F7]

sce_F7$label <- droplevels(sce_F7$label)

analyse_condition_specific_celltypes <- FALSE
if (analyse_condition_specific_celltypes) {
senders_oi_F7 <- senders_oi_F7 %>% setdiff(absent_celltypes_F7)
receivers_oi_F7 <- receivers_oi_F7 %>% setdiff(absent_celltypes_F7)
} else {
senders_oi_F7 <- senders_oi_F7 %>%
setdiff(union(absent_celltypes_F7, condition_specific_celltypes_F7))
receivers_oi_F7 <- receivers_oi_F7 %>%
setdiff(union(absent_celltypes_F7, condition_specific_celltypes_F7))
}
sce_F7 <- sce_F7[, SummarizedExperiment::colData(sce_F7)[, celltype_id] %in%
c(senders_oi_F7, receivers_oi_F7)]

8) Gene filtering (F7) --------------------------------------------------

frq_list_F7 <- get_frac_exprs(
sce = sce_F7,
sample_id = sample_id,
celltype_id = celltype_id,
group_id = group_id,
batches = batches_F7,
min_cells = min_cells,
fraction_cutoff = fraction_cutoff,
min_sample_prop = min_sample_prop
)

sum(frq_list_F7$expressed_df$expressed)
length(unique(frq_list_F7$expressed_df$gene[frq_list_F7$expressed_df$expressed]))

genes_oi_F7 <- frq_list_F7$expressed_df %>%
filter(expressed == TRUE) %>% pull(gene) %>% unique()

sce_F7 <- sce_F7[genes_oi_F7, ]

abundance_expression_info_F7 <- process_abundance_expression_info(
sce = sce_F7,
sample_id = sample_id,
group_id = group_id,
celltype_id = celltype_id,
min_cells = min_cells,
senders_oi = senders_oi_F7,
receivers_oi = receivers_oi_F7,
lr_network = lr_network,
batches = batches_F7,
frq_list = frq_list_F7,
abundance_info = abundance_info_F7
)

--- DE step that triggers the error when including batch ---

DE_info_F7 <- get_DE_info(
sce = sce_F7,
sample_id = "Sample",
group_id = "Genotype",
celltype_id = "label",
batches = "batch", # if I include this → error
covariates = c("Gender"), # same error if I set covariates = "batch"
contrasts_oi = "'F7Ctrl-F7LysM'",
min_cells = 10,
expressed_df = frq_list_F7$expressed_df
)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions