Skip to content

Commit d18b82f

Browse files
committed
add seurat option
1 parent b66640a commit d18b82f

File tree

2 files changed

+74
-4
lines changed

2 files changed

+74
-4
lines changed

README.md

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,11 @@ If you have a list with marker genes for each cluster (`list(clusterID0=..., clu
6262
cellIdents.c = unlist(lapply(cellIdents.c, as.character))
6363
6464
expvals = getExprData(scdata, cellIdents.c)
65+
66+
modmarkers = markers[[clusterID]]
67+
modmarkers$gene = rownames(modmarkers)
6568
66-
67-
markerdf = as.data.frame(markers[[clusterID]])
69+
markerdf = as.data.frame(modmarkers)
6870
6971
if ((nrow(markerdf) > 0) && (nrow(expvals) > 0))
7072
{
@@ -86,7 +88,36 @@ If you have a list with marker genes for each cluster (`list(clusterID0=..., clu
8688

8789
}
8890

89-
exprdf = getDEXpressionDF(hybridLib.o, mastAdd.o$hybridDEResults)
91+
makeDEResults = function(inobj, assay="SCT", test="wilcox")
92+
{
93+
clusterIDs = as.character(sort(unique(Idents(inobj))))
94+
95+
retList = list()
96+
97+
for (clusterID in clusterIDs)
98+
{
99+
100+
101+
cellIdents = Idents(inobj)
102+
cellIdents.c = names(cellIdents[cellIdents == clusterID])
103+
cellIdents.c = unlist(lapply(cellIdents.c, as.character))
104+
105+
print(paste("Processing cluster", clusterID, "with a total of", length(cellIdents.c), "cells"))
106+
107+
deMarkers = FindMarkers(inobj, assay=assay, ident.1 = cellIdents.c, test.use=test)
108+
109+
110+
retList[[clusterID]] = deMarkers
111+
112+
}
113+
114+
return(retList)
115+
116+
}
117+
118+
119+
deRes = makeDEResults(seurat_obj, assay="RNA", test="MAST")
120+
exprdf = getDEXpressionDF(seurat_obj, deRes, assay="RNA")
90121
write.table(exprdf, "expr_test.tsv", sep="\t", row.names=F, quote = F)
91122

92123

@@ -127,6 +158,23 @@ and you will receive 1 prediction per cluster. In a real life scenario you might
127158
The output format is as follow:
128159
cluster -> cell_type -> score -> accepted_marker_genes -> marker_genes_of_celltype
129160

161+
162+
### Renaming clusters in Seurat
163+
164+
With the `--seurat` flag, this tool will generate string that can easily be pasted into your R session:
165+
166+
new.cluster.ids <- c("Fibroblasts;Connective tissue", "Smooth muscle cells;Smooth muscle", "Fibroblasts;Connective tissue", "Macrophages;Immune system", "Smooth muscle cells;Smooth muscle", "Endothelial cells;Vasculature", "Fibroblasts;Connective tissue", "T memory cells;Immune system", "Fibroblasts;Connective tissue", "Fibroblasts;Connective tissue", "Pericytes;Vasculature", "Smooth muscle cells;Smooth muscle", "B cells;Immune system", "Plasma cells;Immune system", "Macrophages;Immune system", "Macrophages;Immune system", "Fibroblasts;Connective tissue", "Endothelial cells;Vasculature", "Endothelial cells;Vasculature", "Schwann cells;Brain", "Fibroblasts;Connective tissue", "Gamma delta T cells;Immune system", "Mesothelial cells;Epithelium", "Mast cells;Immune system")
167+
168+
orignames = Idents(seurat_obj)
169+
names(new.cluster.ids) <- levels(orignames)
170+
levels(orignames) = new.cluster.ids
171+
172+
seurat_obj$cellnames = orignames
173+
174+
You can visualize the assigned cell types in a UMAP plot with
175+
176+
DimPlot(obj.integrated, group.by="cellnames", reduction = "umap", label=T)
177+
130178
## Method
131179

132180
This prediction tools makes use of the marker genes provided by PanglaoDB [1]. Together with the reported sensitivity and specificity reported by them as well, the provided marker genes per cell-type and tissue are important. The script will download this marker table automatically.

analyseMarkers.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,14 @@
2323

2424
parser.add_argument('-n', '--predictions', default=10, type=int, help="number of predictions per cluster shown")
2525

26+
parser.add_argument('-s', '--seurat', default=False, action="store_true", help="generate seurat output at the end?")
2627

2728
args = parser.parse_args()
2829

30+
if args.seurat:
31+
print("Setting number of predictions to 1", file=sys.stderr)
32+
args.predictions = 1
33+
2934

3035

3136
gene2clusters = defaultdict(set)
@@ -83,6 +88,8 @@
8388
gene2refcluster = defaultdict(set)
8489
nickname2gene = {}
8590

91+
allFirstHits = []
92+
8693
if args.update_panglao or not os.path.isfile("panglao.tsv"):
8794

8895
print("Did not find panglao file. Downloading it now", file=sys.stderr)
@@ -247,5 +254,20 @@
247254
clusterCounter[refcluster] = totalScore * (accGenes/len(clusterid2genes[refcluster])) # len(clusterid2genes[refcluster])
248255
cluster2accGenes[refcluster] = accGenes
249256

250-
for x in clusterCounter.most_common(args.predictions):
257+
for idx, x in enumerate(clusterCounter.most_common(args.predictions)):
251258
print(cluster, ";".join(x[0]), x[1], cluster2accGenes[x[0]], len(clusterid2genes[x[0]]), sep="\t")
259+
260+
if idx == 0:
261+
allFirstHits.append(";".join(x[0]))
262+
263+
if args.seurat:
264+
265+
outstr = "new.cluster.ids <- c({})".format(
266+
",".join(['"{}"'.format(x) for x in allFirstHits])
267+
)
268+
269+
print(outstr)
270+
print("orignames = Idents(seurat_obj)")
271+
print("names(new.cluster.ids) <- levels(orignames)")
272+
print("levels(orignames) = new.cluster.ids")
273+
print("seurat_obj$cellnames = orignames")

0 commit comments

Comments
 (0)