It is best to save the code below in an R file named mhipp2.R and then run it on the server.
# ---------------------------- packages
library(stadia)
# ---------------------------- data
load("./data/mhipp2.RData")
K <- 14
etas <- 0.15
# ---------------------------- model
d <- 35
set.seed(123)
system.time({
## set hyperparameters
hyper <- HyperParameters(mhipp2, dim = d, eta = etas)
## run model
mhipp2_stadia <- stadia(mhipp2, hyper, dim = d, n_cluster = K, min.features = 200, min.spots = 20,
platform = "others", em.maxiter = 30)
})
# ---------------------------- save result
if(!dir.exists('./result')) dir.create('./result')
save(mhipp2_stadia, file = './result/mhipp2_stadia.RData')
First, load the dependent packages and data.
library(dplyr)
library(mclust)
library(ggplot2)
library(patchwork)
library(corrplot)
library(Seurat)
library(stringr)
library(formattable)
library(tibble)
library(purrr)
library(scattermore)
load("./data/mhipp2.RData")
load("./result/mhipp2_stadia.RData")
clusterCol <- c("#ABEAB2", "#CED8DCFF", "#C6C0FFFF", "#BA6338FF", "#99A358",
"#00A087FF","#704A7F", "#EB91A7", "#4DBBD5FF", "#DBA85A",
"#575C6DFF", "#91D1C2FF", "#3C5488FF", "#E64B35FF")
Merge the list of Seurat object mhipp2 and save the results of STADIA mhipp2_stadia in it.
sample_info <- names(mhipp2)
mhipp2.pp <- mapply(function(x, y) {
x$orig.ident<- y; return(x)}, mhipp2, sample_info)
mhipp2.merge <- merge(
x = mhipp2.pp[[1]],y = mhipp2.pp[-1],
add.cell.id = names(mhipp2.pp), project = "mhipp")
assay <- DefaultAssay(mhipp2.merge)
mhipp2.merge <- mhipp2.merge[,mhipp2.merge@meta.data[,paste0('nFeature_', assay)] > 200]
mhipp2.merge$stadiaAnnotation <- mhipp2_stadia$c_vec %>% as.vector %>% as.factor()
dfb <- mhipp2.merge@meta.data %>%
mutate_at("stadiaAnnotation", as.factor)
Visualize all spatial domains identified by STADIA.
names <- list('191204_01', '200115_08')
pb1 <- lapply(names, function(name){
if(name == "191204_01") {
xlim <- c(600, 5600); ylim <- c(800, 5800)
} else if(name == "200115_08") {
xlim <- c(800, 5700); ylim <- c(700, 5600)
}
mhipp2 %>%
`[[`(name) %>%
`@`(meta.data) %>%
ggplot(aes(x = col, y = row, color = nCount_Spatial)) +
geom_scattermore(shape = 20, alpha = 1, pointsize = 3) +
scale_color_gradientn(colors = colorRampPalette(c("azure2", "brown4"))(100)) +
xlim(xlim[1],xlim[2]) + ylim(ylim[1], ylim[2]) +
coord_equal() + theme_void() +
theme(legend.position = "none",
plot.margin = unit(c(0,0,0,0),"mm"))
})
pb2 <- lapply(names, function(name){
if(name == "191204_01") {
xlim <- c(600, 5600); ylim <- c(800, 5800)
} else if(name == "200115_08") {
xlim <- c(800, 5700); ylim <- c(700, 5600)
}
dfb %>%
filter(orig.ident == name) %>%
ggplot(aes(x = col, y = row, color = .data[['stadiaAnnotation']])) +
geom_scattermore(shape = 20, alpha = 1, pointsize = 3) +
scale_color_manual(values = clusterCol) +
xlim(xlim[1], xlim[2]) + ylim(ylim[1], ylim[2]) +
coord_equal() +
theme(axis.ticks.length = unit(0, "mm"),
panel.border = element_blank(),
panel.background = element_blank()) +
NoAxes() + NoLegend()
})
pb1[[1]] | pb2[[1]] | pb1[[2]] | pb2[[2]]
Find marker gense for each spatial domain using function FindAllMarkers.
Idents(mhipp2.merge) <- "stadiaAnnotation"
Markers <- FindAllMarkers(mhipp2.merge,
logfc.threshold = 0.25, min.pct=0.1, only.pos = T)
Markers <- subset(Markers, Markers$p_val_adj < 5e-02)
save(Markers, file = "./data/scc12_markers.RData")
Visualize the shared domains of the slices and their top marker genes.
## markers
top <- Markers %>% group_by(cluster) %>% top_n(1, wt = avg_log2FC) %>%
mutate(cluster = factor(cluster))
dat <- top %>% arrange(desc(cluster)) %>% select(cluster, gene)
genes <- c()
for(i in 1:14){
dati <- dat %>% filter(cluster == i)
genes <- c(genes, dati$gene)
}
genes <- genes[c(8,9,14,1,4,12)]
data.list <- lapply(mhipp2, function(obj){
obj <- obj[,obj@meta.data[,paste0('nFeature_', DefaultAssay(obj))] > 200]
obj <- obj %>% NormalizeData(verbose = F)
exp <- GetAssayData(obj, slot = "data")[genes, ] %>% as.data.frame() %>% t()
coor <- GetTissueCoordinates(obj) %>% select(x, y)
dat <- cbind(exp=exp, coor)
colnames(dat) <- c(paste0(genes,"_", c(6,7,14,3,5,13)), colnames(coor))
return(dat)
})
genes <- paste0(genes,"_", c(6,7,14,3,5,13))
names(data.list) <- names(mhipp2)
mySpatialPlot2 <- function(name, annot, subtype) {
col2use <- clusterCol
col2use[-subtype] <- "gray90"
if(name == "191204_01") {
xlim <- c(600, 5600); ylim <- c(800, 5800)
} else if(name == "200115_08") {
xlim <- c(800, 5700); ylim <- c(700, 5600)
}
plt <- dfb %>%
filter(orig.ident == name) %>%
ggplot(aes(x = col, y = row, color = .data[[annot]])) +
geom_scattermore(shape = 20, alpha = 1, pointsize = 3) +
scale_color_manual(values = col2use) +
xlim(xlim[1], xlim[2]) + ylim(ylim[1], ylim[2]) +
coord_equal() +
theme(axis.ticks.length = unit(0, "mm"),
panel.border = element_blank()) +
NoAxes() + NoLegend()
return(plt)
}
mySpFeature <- function(name, gene, cutoff = NULL){
dat <- data.list[[name]]
probs <- seq(0, 1, length = 100)
cutoffs <- quantile(dat[,gene], probs = probs)
cutoff <- probs[min(which(cutoffs != 0))]
if(name == "191204_01") {
xlim <- c(600, 5600); ylim <- c(800, 5800)
} else if(name == "200115_08") {
xlim <- c(800, 5700); ylim <- c(700, 5600)
}
plt <- ggplot(dat, aes(x = x, y = y, color = .data[[gene]])) +
geom_scattermore(shape = 20, alpha = 1, pointsize = 3) +
scale_color_viridis_c(
rescaler = function(x, to = c(0, 1), from = NULL){
ifelse(x<cutoff, scales::rescale(x, to = to, from = c(min(x, na.rm = TRUE), cutoff)),
1)}) +
xlim(xlim[1], xlim[2]) + ylim(ylim[1], ylim[2]) +
coord_equal() +
theme(axis.ticks.length = unit(0, "mm"),
panel.border = element_blank()) +
NoAxes() + NoLegend()
return(plt)
}
pc1_domain <- lapply(c(8,9,14,1,4), function(i){
mySpatialPlot2("191204_01", "stadiaAnnotation", i)
})
pc2_domain <- lapply(c(8,9,14,1,4), function(i){
mySpatialPlot2("200115_08", "stadiaAnnotation", i)
})
pc1_marker <- lapply(1:5, function(i) { mySpFeature("191204_01", genes[i]) })
pc2_marker <- lapply(1:5, function(i) { mySpFeature("200115_08", genes[i]) })
wrap_plots(list(wrap_plots(lapply(1:5, function(i){ pc1_domain[[i]] + pc2_domain[[i]] }), nrow = 1),
wrap_plots(lapply(1:5, function(i){ pc1_marker[[i]] + pc2_marker[[i]] }), nrow = 1)),
nrow = 2) &
theme(panel.background = element_blank())
Visualize slice-specific domain.
pd1_domain <- mySpatialPlot2("191204_01", "stadiaAnnotation", 12)
pd2_domain <- mySpatialPlot2("200115_08", "stadiaAnnotation", 12)
pd1_marker <- mySpFeature("191204_01", genes[6])
pd2_marker <- mySpFeature("200115_08", genes[6])
(pd1_domain | pd2_domain) / (pd1_marker | pd2_marker) &
theme(panel.background = element_blank())