forked from Coolgenome/TCM
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
148 changed files
with
21,778 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,8 @@ | ||
# TCM | ||
Codes used in pan-T cell analysis | ||
# T Cell Map | ||
Codes used in pan-cancer T cell study | ||
|
||
- "TCellMap.R" is a tool we developed to automatically align and annotate T cells in a scRNA-seq dataset. It uniformly aligns T cells from the query dataset with the T cell maps that we built in our pan-cancer T cell study. | ||
|
||
- The python script "Res_largerT.py" functions as a pipeline for the analysis and visualization of SRT data. | ||
|
||
- The directories titled "fig1/2/3/4/5/6" contain additional scripts used for generating figures in the manuscript. Furthermore, the "data_preprocess" folder hosts scripts designed to preprocess raw data. Those scripts are used for tasks such as batch-correction and dimensional reduction, etc. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
#'-------------------------------------------------------------- | ||
#' filename : SeuratMapping.R | ||
#' Date : 2021-07-21 | ||
#' contributor : Yanshuo Chu | ||
#' function: SeuratMapping | ||
#' R: 4.0.3 | ||
#' seurat: 4 | ||
#'-------------------------------------------------------------- | ||
|
||
print('<==== SeuratMapping.R ====>') | ||
|
||
suppressMessages({ | ||
library(optparse) | ||
library(tidyverse) | ||
library(Seurat) | ||
library(cowplot) | ||
}) | ||
|
||
option_list = list( | ||
make_option(c("-d","--data"), | ||
type = 'character', | ||
help = 'data.rds', | ||
metavar = 'character'), | ||
make_option(c("-o","--out"), | ||
type = 'character', | ||
help = 'out', | ||
metavar = 'character') | ||
); | ||
|
||
opt_parser = OptionParser(option_list = option_list); | ||
opt = parse_args(opt_parser); | ||
|
||
seuratObj <- readRDS(opt$data) | ||
|
||
## NKT_PATH="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/NKTMAIT_V6/nPC_5/UMAP_dist_0.1_nneighbor_35/p1_NKTMAIT_v6_UMAP_dist_0.1_nneighbor_35_CLUSTER_res_0.3/cluster.rds" | ||
## seuratObj <- readRDS(NKT_PATH) | ||
|
||
DEGsT <- read_tsv(file.path(dirname(opt$data), "snn-single-markers.tsv")) | ||
DEGs <- unique(DEGsT %>% pull(gene)) | ||
|
||
Idents(seuratObj) <- seuratObj$batch | ||
sortedBatch <- sort(table(seuratObj$batch), decreasing = T) | ||
totalT <- c() | ||
|
||
############################################################################### | ||
# Here Map to Each Batch, find out best reference # | ||
############################################################################### | ||
# Or based on a score method to map query to reference ######################## | ||
# Compare the score method and together method, check the difference########### | ||
|
||
|
||
for(i in 1:length(sortedBatch)){ | ||
tempRefObj <- subset(seuratObj, idents = names(sortedBatch)[i]) | ||
for(j in 1:length(sortedBatch)){ | ||
|
||
if(i == j){next} | ||
## if(sortedBatch[i] < 500){next} | ||
tryCatch({ | ||
tempQueryObj <- subset(seuratObj, | ||
idents = names(sortedBatch)[j]) | ||
|
||
DefaultAssay(tempRefObj) <- "RNA" | ||
DefaultAssay(tempQueryObj) <- "RNA" | ||
|
||
tempRefObj$reference.cell.type <- tempRefObj$seurat_clusters | ||
|
||
temp.anchors <- FindTransferAnchors(reference = tempRefObj, | ||
query = tempQueryObj, | ||
features = DEGs, | ||
k.filter = NA, | ||
reduction = "pcaproject", | ||
reference.reduction = "pca", | ||
reference.assay = "RNA", | ||
query.assay = "RNA") | ||
|
||
## Error: No features to use in finding transfer anchors. To troubleshoot, try explicitly providing features to the features \ | ||
## 2 parameter and ensure that they are present in both reference and query assays. | ||
## 1 Execution halted | ||
|
||
#' create a new umap model, exactly the same as the existing one ############# | ||
|
||
tempRefObj[["umap.new"]] <- CreateDimReducObject( | ||
embeddings = tempRefObj[["umap"]]@cell.embeddings, key = "UMAPnew_") | ||
umap.new.model <- list() | ||
umap.new.model$n_epochs <- 500 | ||
umap.new.model$alpha <-1 | ||
umap.new.model$method <- "umap" | ||
umap.new.model$negative_sample_rate <- 5 | ||
umap.new.model$gamma <- 1 | ||
umap.new.model$approx_pow <- 0 | ||
umap.new.model$metric$cosine <- list() | ||
umap.new.model$embedding <- tempRefObj[["umap.new"]]@cell.embeddings | ||
ab_param <- uwot:::find_ab_params(spread = 1, min_dist = 0.3) | ||
umap.new.model$a <- ab_param["a"] | ||
umap.new.model$b <- ab_param["b"] | ||
tempRefObj[["umap.new"]]@misc$model <- umap.new.model | ||
|
||
|
||
tempQueryObj <- MapQuery(anchorset = temp.anchors, | ||
reference = tempRefObj, | ||
query = tempQueryObj, | ||
refdata = list(celltype = "reference.cell.type"), | ||
reference.reduction = "pca", | ||
reduction.model = "umap.new") | ||
|
||
correctPosition <- as.numeric( | ||
tempQueryObj$predicted.celltype == tempQueryObj$seurat_clusters) | ||
wrongPosition <- as.numeric( | ||
tempQueryObj$predicted.celltype != tempQueryObj$seurat_clusters) | ||
|
||
tempTibble <- tibble( | ||
QueryDataSet = names(sortedBatch)[j], | ||
RefDataSet = names(sortedBatch)[i], | ||
MatchedCellNum = sum(correctPosition), | ||
QueryDataCellNum = length(correctPosition), | ||
RefDataCellNum = length(Cells(tempRefObj)), | ||
ACC = sum(correctPosition)/length(correctPosition)) | ||
|
||
totalT <- bind_rows(totalT, tempTibble) | ||
|
||
p1 <- DimPlot(tempRefObj, | ||
reduction = "umap", | ||
group.by = "reference.cell.type", | ||
label = TRUE, | ||
label.size = 3, | ||
repel = TRUE) + | ||
NoLegend() + | ||
ggtitle("Reference annotations") | ||
|
||
p2 <- DimPlot(tempQueryObj, | ||
reduction = "ref.umap", | ||
group.by = "predicted.celltype", label = TRUE, | ||
label.size = 3, repel = TRUE) + | ||
NoLegend() + | ||
ggtitle("Query transferred labels") | ||
|
||
p3 <- DimPlot(subset(tempQueryObj, cells = Cells(tempQueryObj)[as.logical(correctPosition)]), | ||
reduction = "umap", | ||
group.by = "predicted.celltype", label = TRUE, | ||
label.size = 3, repel = TRUE) + | ||
NoLegend() + | ||
ggtitle(paste0("Query correct, ACC=", round(sum(correctPosition)/length(correctPosition), 2))) | ||
|
||
p4 <- DimPlot(subset(tempQueryObj, cells = Cells(tempQueryObj)[as.logical(wrongPosition)]), | ||
reduction = "umap", | ||
group.by = "seurat_clusters", label = TRUE, | ||
label.size = 3, repel = TRUE) + | ||
NoLegend() + | ||
ggtitle("Query wrong") | ||
|
||
if(str_detect(opt$data, "CD8")){ | ||
p1 <- p1 + scale_x_reverse() | ||
p2 <- p2 + scale_x_reverse() | ||
p3 <- p3 + scale_x_reverse() | ||
p4 <- p4 + scale_x_reverse() | ||
} | ||
|
||
g <- plot_grid(p1,p2,p3,p4, nrow = 1, axis = "bt", align = 'h') | ||
pdf(file.path(opt$out, paste0("Query-", names(sortedBatch)[j], | ||
"_Ref-", names(sortedBatch)[i], "_umap.pdf")), | ||
width = 12, height=3) | ||
print(g) | ||
dev.off() | ||
}, | ||
error = function(e){print(e)} | ||
) | ||
} | ||
} | ||
|
||
g <- totalT %>% ggplot() + | ||
geom_point(aes(x = RefDataSet, | ||
y = QueryDataSet, | ||
size = ACC, | ||
color = RefDataCellNum, | ||
fill = RefDataCellNum), shape = 22) + | ||
xlab("RefDataSet") + ylab("QueryDataSet") + | ||
theme_classic() + | ||
theme(text = element_text(size = 10), | ||
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) | ||
pdf(file.path(opt$out, "allQuerysACC_point.pdf")) | ||
print(g) | ||
dev.off() | ||
|
||
write_tsv(totalT, file.path(opt$out, paste0('totalT', "_", Sys.Date(), '.tsv'))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
##!/usr/bin/env bash | ||
|
||
module load python/3.7.3-anaconda | ||
module load R/4.0.3 | ||
|
||
mainscriptFolder=${1} | ||
inData=${2} | ||
currentFolder=${3} | ||
res=${4} | ||
reduction=${5} | ||
npc=${6} | ||
parentJobName=${7} | ||
toRunClustering=${8} | ||
toRunCommonAnalysis=${9} | ||
toRunCallBack=${10} | ||
callBackPath=${11} | ||
|
||
echo "FindClusterJobs parameters: | ||
mainscriptFolder=${mainscriptFolder} | ||
inData=${inData} | ||
currentFolder=${currentFolder} | ||
res=${res} | ||
reduction=${reduction} | ||
npc=${npc} | ||
parentJobName=${parentJobName} | ||
toRunClustering=${toRunClustering} | ||
toRunCommonAnalysis=${toRunCommonAnalysis} | ||
toRunCallBack=${toRunCallBack} | ||
callBackPath=${callBackPath} | ||
" | ||
|
||
runR="Rscript --no-save " | ||
|
||
############################################################################### | ||
#' Run Find Cluster '# | ||
############################################################################### | ||
if [ $toRunClustering = "YES" ]; then | ||
${runR} ${HOME}/configs/public/pipeline/UMAP_CLUSTER_JOBS_EMBEDED/FindCluster.R -d ${inData} -o ${currentFolder}/cluster.rds -r ${reduction} -n ${npc} -e ${res} | ||
fi | ||
|
||
|
||
|
||
############################################################################### | ||
#' Run Common Analysis '# | ||
############################################################################### | ||
srcD=${HOME}/configs/public/src | ||
ResD=${currentFolder} | ||
paramD=${HOME}/configs/public/params | ||
databaseD=${HOME}/configs/public/knowledge/database | ||
|
||
if [ $toRunCommonAnalysis = "YES" ]; then | ||
|
||
if [ ! -d ${ResD}/bubbleplot ]; then | ||
mkdir -p ${ResD}/bubbleplot | ||
fi | ||
|
||
${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/TMarkers.txt | ||
${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/topLevel.txt | ||
${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/general/generalAll.txt | ||
|
||
if [ ! -d ${ResD}/featureplot ]; then | ||
mkdir -p ${ResD}/featureplot | ||
fi | ||
|
||
# ${runR} ${srcD}/feature-plot.R -d ${ResD}/cluster.rds -o ${ResD}/featureplot/topLevel -c ${paramD}/feature-plot-origin.json -m ${databaseD}/topLevel.txt | ||
# ${runR} ${srcD}/feature-plot.R -d ${ResD}/cluster.rds -o ${ResD}/featureplot/tmarkers -c ${paramD}/feature-plot-origin.json -m ${databaseD}/TMarkers.txt | ||
|
||
${runR} ${srcD}/qc-by-cluster.R -d ${ResD}/cluster.rds -o ${ResD}/qc-by-cluster.pdf | ||
${runR} ${srcD}/visualize.R -d ${ResD}/cluster.rds | ||
# ${runR} ${srcD}/visualize_batch.R -d ${ResD}/cluster.rds | ||
# ${runR} ${srcD}/findmarker.r -d ${ResD}/cluster.rds -o ${ResD}/snn-single-markers.tsv | ||
|
||
# ${runR} ${srcD}/snn-marker.R -d ${ResD}/cluster.rds -o ${ResD}/snn-markers.tsv -c ${paramD}/snn-marker.json | ||
# ${runR} ${srcD}/snn-heatmap.R -d ${ResD}/cluster.rds -o ${ResD}/markersHeatmap.pdf -c ${paramD}/snn-heatmap.json -m ${ResD}/snn-markers.tsv -p heatmap | ||
# python ${srcD}/statMarkers.py --markersTop ${ResD}/snn-markers.tsv --markersDatabase ${databaseD}/immuneCellMarkerAllinBox_Yanshuo.txt --out ${ResD}/markers.ys_celltype.tsv | ||
# python ${srcD}/statMarkers.py --markersTop ${ResD}/markers.top.tsv --markersDatabase ${databaseD}/immuneCellMarkerAllinBox_Yanshuo.txt --out ${ResD}/markers.top.ys_celltype.tsv | ||
fi | ||
|
||
############################################################################### | ||
#' Run Callback Script '# | ||
############################################################################### | ||
if [ $toRunCallBack = "YES" ]; then | ||
if [ -f $callBackPath ]; then | ||
$callBackPath ${ResD}/cluster.rds | ||
fi | ||
fi |
Oops, something went wrong.