gc()
#####安装archr包##别处复制
.libPaths(c("/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/t040413/R/yll/usr/local/lib/R/site-library", "/usr/local/lib/R/library","/home/data/refdir/Rlib/")).libPaths()library(Seurat)
library(ggplot2)
library(dplyr)
getwd()dir.create("~/silicosis/spatial/monocle/silicosis_fibroblasts")
setwd("~/silicosis/spatial/monocle/silicosis_fibroblasts")
print(getwd())##1 加载silicosis数据-------
#load("/home/data/t040413/silicosis/data/tabula_scRNAseq/integration_with_sc_silicosis/silicosis_fibro_AM3_mappedbacked.rds")load('/home/data/t040413/silicosis/fibroblast_myofibroblast2/subset_data_fibroblast_myofibroblast2.rds')
#subset_data=RenameIdents(subset_data,'Specialized fibroblast'='Inmt fibroblast')
#save(subset_data,file ='/home/data/t040413/silicosis/fibroblast_myofibroblast2/subset_data_fibroblast_myofibroblast2.rds' )DimPlot(subset_data,label = TRUE)
subset_data$cell.type=Idents(subset_data)
table(subset_data$cell.type)subset_data@meta.data %>%head()
subset_data$celltype=subset_data$cell.typeDimPlot(subset_data,label = T,group.by = "celltype")##############################################################33###monocle
#################################################subset_data$cell.type=Idents(subset_data)#Idents(subset_data)=subset_data$Idents.subset_data.###注意使用RNA 还是SCTDefaultAssay(subset_data)
DefaultAssay(subset_data)="RNA"
table(duplicated(rownames(subset_data)))
table(duplicated(colnames(subset_data)))
table(Idents(subset_data))
DefaultAssay(subset_data)
new.metadata <- merge(subset_data@meta.data,data.frame(Idents(subset_data)),by = "row.names",sort = FALSE)
head(new.metadata)
rownames(new.metadata)<-new.metadata[,1]#可选
head(subset_data@meta.data)
new.metadata=new.metadata[,-1]
head(subset_data@meta.data)identical(rownames(new.metadata),rownames(subset_data@meta.data))subset_data@meta.data<-new.metadata
table(subset_data$cell.type,Idents(subset_data))
head(subset_data)expression_matrix <- as(as.matrix(subset_data@assays$RNA@counts), 'sparseMatrix')
head(expression_matrix)
identical(colnames(expression_matrix),rownames(new.metadata))cell_metadata <- new('AnnotatedDataFrame',data=subset_data@meta.data)
head(subset_data@meta.data)
head(cell_metadata)gene_annotation <- new('AnnotatedDataFrame',data=data.frame(gene_short_name = row.names(subset_data),row.names = row.names(subset_data)))head(gene_annotation)
'''
head(gene_annotation)
fData(gene_annotation)
phenoData(gene_annotation)
featureData(gene_annotation)
table(subset_data$cell.type)
length(subset_data$cell.type)
table(Idents(subset_data))
length(Idents(subset_data))
'''DimPlot(subset_data,group.by = "cell.type",label = T)
DimPlot(subset_data,label = T)devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")monocle_cds <- monocle::newCellDataSet(expression_matrix,phenoData = cell_metadata,featureData = gene_annotation,lowerDetectionLimit = 0.5,expressionFamily = negbinomial.size())#####################################################################################归一化######
cds <- monocle_cds
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds) ## Removing 110 outliers #下面的cell.type 为subset_Data 的meta信息
library("BiocGenerics")#并行计算
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~ cell.type")### inference the pseudotrajectory########################################################
# step1: select genes for orderding setOrderingFilter() #
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
length(ordering_genes)# 6354
cds <- setOrderingFilter(cds, ordering_genes)
# step2: dimension reduction=> reduceDimension() DDRTree #
cds <- reduceDimension(cds, max_components = 2,method = 'DDRTree')#package.version(pkg = "monocle")
# step3: ordering the cells=> orderCells()
#getwd()
#source("./order_cells.R")
#unloadNamespace('monocle')
#devtools::load_all("../monocle_2.26.0 (1).tar/monocle_2.26.0 (1)/monocle/")
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")cds <- orderCells(cds)pdf("1.pseudutime.cell.type.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "cell.type")
dev.off()pdf("1.pseudutime.stim.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "stim")
dev.off()pdf("1.pseudutime.State.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "State")
dev.off()
###### split ########
pdf("2.split.pseudutime.Seurat.cell.type.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~cell.type)
dev.off()pdf("2.split.pseudutime.stim.pdf")
plot_cell_trajectory(cds, color_by = "stim") + facet_wrap(~stim)
dev.off()pdf("4.split.pseudutime.Seurat.State.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~State)
dev.off()pdf("3.split.pseudutime.Seurat.cell.type_State.pdf")
plot_cell_trajectory(cds, color_by = 'State') + facet_wrap(~cell.type)
dev.off()table(pData(cds)$State,pData(cds)$cell.type)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$cell.type), "State_cellType_summary.xlsx", colnames=T, rownames=T)table(pData(cds)$State,pData(cds)$stim)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$stim), "State_Stim_summary.xlsx", colnames=T, rownames=T)getwd()
##we set the state 2 as root ########state 2 with most cells in Endothelial cells
#这里设置谁为root??
DimPlot(subset_data,label = T)
table(Idents(subset_data))
DefaultAssay(subset_data)
#DefaultAssay(subset_data)<-"SCT"
DefaultAssay(subset_data)<-"RNA"
DimPlot(subset_data,label = T)
dev.off()table(subset_data$cell.type)
getwd()#设置root
ds <- orderCells(cds,root_state=2)getwd()# "/home/data/t040413/ipf/fibro_myofibro_recluster/+meso_monocle"pdf("4.pseudutime.Pseudotime.pdf")
p=plot_cell_trajectory(cds, color_by = "Pseudotime")
print(p)
dev.off()save(cds,file="./cds_fibroblast_using_RNA_slot.rds")
#######################################################save(subset_data,file = "./fibroblast_formonocle.rds")getwd()
load("./cds_fibroblast_using_RNA_slot.rds")Idents(subset_data)
Markers_foreachclustercells=FindAllMarkers(subset_data,only.pos = T,logfc.threshold = 0.5)openxlsx::write.xlsx(Markers_foreachclustercells,file="./Markers_foreachclustercells.xlsx")getwd()
#############https://cloud.tencent.com/developer/article/1692225
#################################3
#Once we have a trajectory, we can use differentialGeneTest() to find genes
#that have an expression pattern that varies according to pseudotime.#高变基因
disp_table <- dispersionTable(cds)
disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
disp.genes <- as.character(disp.genes$gene_id)
disp.genes
diff_test <- differentialGeneTest(cds[disp.genes,], # cores = 4, fullModelFormulaStr = "~sm.ns(Pseudotime)")sig_gene_names <- row.names(subset(diff_test, qval < 1e-04))
p2 = plot_pseudotime_heatmap(cds[sig_gene_names,], num_clusters=5,show_rownames=T, return_heatmap=T)
ggsave("pseudotime_heatmap2.pdf", plot = p2, width = 5, height = 10)plot_pseudotime_heatmap(cds[c('Cx3cr1',"Spp1"),],# num_clusters = 5,# cores = 4,show_rownames = T)###########################cds 里面的内容
fData(cds) %>%head()
pData(cds) %>%head()subset(fData(cds),gene_short_name %in% c("TPM1", "MYH3", "CCNB2", "GAPDH"))#############感兴趣基因的变化图
head(subset_data@meta.data)plot_genes_jitter(cds[c("TPM1", "MYH3", "CCNB2", "GAPDH"),],grouping = "cell.type", color_by = "cell.type", plot_trend = TRUE) +facet_wrap( ~ feature_label, scales= "free_y")#######拟时序热图
sig_gene_names=markers_for_eachcluster %>%group_by(cluster) %>% top_n(n = 5,wt = avg_log2FC) %>% ##加不加引号区别很大select(gene) %>% ungroup() %>%pull(gene)getwd()
p1 = plot_pseudotime_heatmap(cds[sig_gene_names,], num_clusters=3,show_rownames=T, return_heatmap=T)
ggsave("pseudotime/pseudotime_heatmap1.png", plot = p1, width = 5, height = 8)############################3
BEAM分析
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")#单细胞轨迹中通常包括分支,它们的出现是因为细胞的表达模式不同。当细胞做出命运选择时,或者遗传、化学或环境扰动时,就会表现出不同的基因表达模式。BEAM(Branched expression analysis modeling)是一种统计方法,用于寻找以依赖于分支的方式调控的基因。disp_table <- dispersionTable(cds)
disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
disp.genes <- as.character(disp.genes$gene_id)
mycds_sub <- cds[disp.genes,]
plot_cell_trajectory(mycds_sub, color_by = "State")beam_res <- BEAM(mycds_sub, branch_point = 1,##如果大于1 后面一个参数就不需要progenitor_method = "duplicate") #, cores = 8beam_res <- beam_res[order(beam_res$qval),]
beam_res <- beam_res[,c("gene_short_name", "pval", "qval")]
mycds_sub_beam <- mycds_sub[row.names(subset(beam_res, qval < 1e-4)),]
plot_genes_branched_heatmap(mycds_sub_beam, branch_point = 1, num_clusters = 3, show_rownames = T)methods <- c("duplicate", "expression", "cluster")results <- lapply(methods, function(method) {beam_res=BEAM(mycds_sub, branch_point = 1, progenitor_method = method)beam_res <- beam_res[order(beam_res$qval),]beam_res <- beam_res[,c("gene_short_name", "pval", "qval")]mycds_sub_beam <- mycds_sub[row.names(subset(beam_res, qval < 1e-4)),]results= plot_genes_branched_heatmap(mycds_sub_beam, branch_point = 1, num_clusters = 3, show_rownames = T)for (each in names(results)) {pdf(paste0(each,".pdf"),height = 100,width = 10)print(each)dev.off()}
})################################################################################
#https://davetang.org/muse/2017/10/01/getting-started-monocle/my_pseudotime_de %>% arrange(qval) %>% head()# save the top 6 genes
my_pseudotime_de %>% arrange(qval) %>% head() %>% select(id) -> my_pseudotime_gene
my_pseudotime_gene <- my_pseudotime_gene$idplot_genes_in_pseudotime(my_cds_subset[my_pseudotime_gene,])