Skip to content

Commit

Permalink
add figs
Browse files Browse the repository at this point in the history
  • Loading branch information
ychu-tech committed Mar 13, 2023
1 parent 4dbc76d commit 869e7c9
Show file tree
Hide file tree
Showing 148 changed files with 21,778 additions and 3 deletions.
Binary file added .DS_Store
Binary file not shown.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
# TCM
Codes used in pan-T cell analysis
# T Cell Map
Codes used in pan-cancer T cell study

- "TCellMap.R" is a tool we developed to automatically align and annotate T cells in a scRNA-seq dataset. It uniformly aligns T cells from the query dataset with the T cell maps that we built in our pan-cancer T cell study.

- The python script "Res_largerT.py" functions as a pipeline for the analysis and visualization of SRT data.

- The directories titled "fig1/2/3/4/5/6" contain additional scripts used for generating figures in the manuscript. Furthermore, the "data_preprocess" folder hosts scripts designed to preprocess raw data. Those scripts are used for tasks such as batch-correction and dimensional reduction, etc.
26 changes: 25 additions & 1 deletion TCellMap.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,42 @@ querySeuratObj <- readRDS(opt$queryData)
DefaultAssay(refSeuratObj) <- "RNA"
DefaultAssay(querySeuratObj) <- "RNA"

cellCycleGeneT1 <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/knowledge/public/database/general/cell-cy\
cle-gene-list.txt")
cellCycleGeneT2 <- read_tsv("/rsrch3/scratch/genomic_med/ychu2/projects/p1review/R3Q7/knowledge/public/database/general/regev_l\
ab_cell_cycle_genes.txt")

HSMarkers <- c("HSPA6", "HSPA1A", "HSPA1B", "DNAJB1", "HSPH1", "HSP90AA1", "HSPE1", "HSPB1", "BAG3", "HSPD1", "DNAJA1", "HSP90A\
B1", "HSPA8", "DNAJB4", "DNAJA4")

DEG_path <- file.path(dirname(opt$referenceData), "snn-single-markers.tsv")
## DEG_path <- file.path(dirname(CD4_ReferenceDataPath), "snn-single-markers.tsv")
DEGs <- read_tsv(DEG_path) %>%
arrange(cluster, desc(avg_logFC)) %>%
filter(avg_logFC > 0) %>%
group_by(cluster) %>%
top_n(100) %>%
pull(gene)

refSeuratObj <- refSeuratObj %>%
NormalizeData(verbose = T) %>%
FindVariableFeatures(selection.method = "vst")
hvgR = VariableFeatures(object = refSeuratObj)
hvgR <- union(hvgR, HSMarkers)
hvgR <- intersect(hvgR, DEGs)
hvgR <- setdiff(hvgR, cellCycleGeneT1$marker)
hvgR <- setdiff(hvgR, cellCycleGeneT2$marker)
refSeuratObj <- refSeuratObj %>%
ScaleData(verbose = T) %>%
RunPCA(verbose = T, features = hvgR)

querySeuratObj <- querySeuratObj %>%
NormalizeData(verbose = T) %>%
FindVariableFeatures(selection.method = "vst")
hvgQ = VariableFeatures(object = querySeuratObj)
hvgR <- union(hvgR, HSMarkers)
hvgR <- intersect(hvgR, DEGs)
hvgR <- setdiff(hvgR, cellCycleGeneT1$marker)
hvgR <- setdiff(hvgR, cellCycleGeneT2$marker)
querySeuratObj <- querySeuratObj %>%
ScaleData(verbose = T) %>%
RunPCA(verbose = T, features = hvgQ)
Expand Down
184 changes: 184 additions & 0 deletions TCellMap2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#'--------------------------------------------------------------
#' filename : SeuratMapping.R
#' Date : 2021-07-21
#' contributor : Yanshuo Chu
#' function: SeuratMapping
#' R: 4.0.3
#' seurat: 4
#'--------------------------------------------------------------

print('<==== SeuratMapping.R ====>')

suppressMessages({
library(optparse)
library(tidyverse)
library(Seurat)
library(cowplot)
})

option_list = list(
make_option(c("-d","--data"),
type = 'character',
help = 'data.rds',
metavar = 'character'),
make_option(c("-o","--out"),
type = 'character',
help = 'out',
metavar = 'character')
);

opt_parser = OptionParser(option_list = option_list);
opt = parse_args(opt_parser);

seuratObj <- readRDS(opt$data)

## NKT_PATH="/rsrch3/scratch/genomic_med/ychu2/data/tmp/Tcellproject/analysis/validate/NKTMAIT_V6/nPC_5/UMAP_dist_0.1_nneighbor_35/p1_NKTMAIT_v6_UMAP_dist_0.1_nneighbor_35_CLUSTER_res_0.3/cluster.rds"
## seuratObj <- readRDS(NKT_PATH)

DEGsT <- read_tsv(file.path(dirname(opt$data), "snn-single-markers.tsv"))
DEGs <- unique(DEGsT %>% pull(gene))

Idents(seuratObj) <- seuratObj$batch
sortedBatch <- sort(table(seuratObj$batch), decreasing = T)
totalT <- c()

###############################################################################
# Here Map to Each Batch, find out best reference #
###############################################################################
# Or based on a score method to map query to reference ########################
# Compare the score method and together method, check the difference###########


for(i in 1:length(sortedBatch)){
tempRefObj <- subset(seuratObj, idents = names(sortedBatch)[i])
for(j in 1:length(sortedBatch)){

if(i == j){next}
## if(sortedBatch[i] < 500){next}
tryCatch({
tempQueryObj <- subset(seuratObj,
idents = names(sortedBatch)[j])

DefaultAssay(tempRefObj) <- "RNA"
DefaultAssay(tempQueryObj) <- "RNA"

tempRefObj$reference.cell.type <- tempRefObj$seurat_clusters

temp.anchors <- FindTransferAnchors(reference = tempRefObj,
query = tempQueryObj,
features = DEGs,
k.filter = NA,
reduction = "pcaproject",
reference.reduction = "pca",
reference.assay = "RNA",
query.assay = "RNA")

## Error: No features to use in finding transfer anchors. To troubleshoot, try explicitly providing features to the features \
## 2 parameter and ensure that they are present in both reference and query assays.
## 1 Execution halted

#' create a new umap model, exactly the same as the existing one #############

tempRefObj[["umap.new"]] <- CreateDimReducObject(
embeddings = tempRefObj[["umap"]]@cell.embeddings, key = "UMAPnew_")
umap.new.model <- list()
umap.new.model$n_epochs <- 500
umap.new.model$alpha <-1
umap.new.model$method <- "umap"
umap.new.model$negative_sample_rate <- 5
umap.new.model$gamma <- 1
umap.new.model$approx_pow <- 0
umap.new.model$metric$cosine <- list()
umap.new.model$embedding <- tempRefObj[["umap.new"]]@cell.embeddings
ab_param <- uwot:::find_ab_params(spread = 1, min_dist = 0.3)
umap.new.model$a <- ab_param["a"]
umap.new.model$b <- ab_param["b"]
tempRefObj[["umap.new"]]@misc$model <- umap.new.model


tempQueryObj <- MapQuery(anchorset = temp.anchors,
reference = tempRefObj,
query = tempQueryObj,
refdata = list(celltype = "reference.cell.type"),
reference.reduction = "pca",
reduction.model = "umap.new")

correctPosition <- as.numeric(
tempQueryObj$predicted.celltype == tempQueryObj$seurat_clusters)
wrongPosition <- as.numeric(
tempQueryObj$predicted.celltype != tempQueryObj$seurat_clusters)

tempTibble <- tibble(
QueryDataSet = names(sortedBatch)[j],
RefDataSet = names(sortedBatch)[i],
MatchedCellNum = sum(correctPosition),
QueryDataCellNum = length(correctPosition),
RefDataCellNum = length(Cells(tempRefObj)),
ACC = sum(correctPosition)/length(correctPosition))

totalT <- bind_rows(totalT, tempTibble)

p1 <- DimPlot(tempRefObj,
reduction = "umap",
group.by = "reference.cell.type",
label = TRUE,
label.size = 3,
repel = TRUE) +
NoLegend() +
ggtitle("Reference annotations")

p2 <- DimPlot(tempQueryObj,
reduction = "ref.umap",
group.by = "predicted.celltype", label = TRUE,
label.size = 3, repel = TRUE) +
NoLegend() +
ggtitle("Query transferred labels")

p3 <- DimPlot(subset(tempQueryObj, cells = Cells(tempQueryObj)[as.logical(correctPosition)]),
reduction = "umap",
group.by = "predicted.celltype", label = TRUE,
label.size = 3, repel = TRUE) +
NoLegend() +
ggtitle(paste0("Query correct, ACC=", round(sum(correctPosition)/length(correctPosition), 2)))

p4 <- DimPlot(subset(tempQueryObj, cells = Cells(tempQueryObj)[as.logical(wrongPosition)]),
reduction = "umap",
group.by = "seurat_clusters", label = TRUE,
label.size = 3, repel = TRUE) +
NoLegend() +
ggtitle("Query wrong")

if(str_detect(opt$data, "CD8")){
p1 <- p1 + scale_x_reverse()
p2 <- p2 + scale_x_reverse()
p3 <- p3 + scale_x_reverse()
p4 <- p4 + scale_x_reverse()
}

g <- plot_grid(p1,p2,p3,p4, nrow = 1, axis = "bt", align = 'h')
pdf(file.path(opt$out, paste0("Query-", names(sortedBatch)[j],
"_Ref-", names(sortedBatch)[i], "_umap.pdf")),
width = 12, height=3)
print(g)
dev.off()
},
error = function(e){print(e)}
)
}
}

g <- totalT %>% ggplot() +
geom_point(aes(x = RefDataSet,
y = QueryDataSet,
size = ACC,
color = RefDataCellNum,
fill = RefDataCellNum), shape = 22) +
xlab("RefDataSet") + ylab("QueryDataSet") +
theme_classic() +
theme(text = element_text(size = 10),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
pdf(file.path(opt$out, "allQuerysACC_point.pdf"))
print(g)
dev.off()

write_tsv(totalT, file.path(opt$out, paste0('totalT', "_", Sys.Date(), '.tsv')))
86 changes: 86 additions & 0 deletions data_preprocess/0_run_seurat_pipeline/FindClusterJobs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
##!/usr/bin/env bash

module load python/3.7.3-anaconda
module load R/4.0.3

mainscriptFolder=${1}
inData=${2}
currentFolder=${3}
res=${4}
reduction=${5}
npc=${6}
parentJobName=${7}
toRunClustering=${8}
toRunCommonAnalysis=${9}
toRunCallBack=${10}
callBackPath=${11}

echo "FindClusterJobs parameters:
mainscriptFolder=${mainscriptFolder}
inData=${inData}
currentFolder=${currentFolder}
res=${res}
reduction=${reduction}
npc=${npc}
parentJobName=${parentJobName}
toRunClustering=${toRunClustering}
toRunCommonAnalysis=${toRunCommonAnalysis}
toRunCallBack=${toRunCallBack}
callBackPath=${callBackPath}
"

runR="Rscript --no-save "

###############################################################################
#' Run Find Cluster '#
###############################################################################
if [ $toRunClustering = "YES" ]; then
${runR} ${HOME}/configs/public/pipeline/UMAP_CLUSTER_JOBS_EMBEDED/FindCluster.R -d ${inData} -o ${currentFolder}/cluster.rds -r ${reduction} -n ${npc} -e ${res}
fi



###############################################################################
#' Run Common Analysis '#
###############################################################################
srcD=${HOME}/configs/public/src
ResD=${currentFolder}
paramD=${HOME}/configs/public/params
databaseD=${HOME}/configs/public/knowledge/database

if [ $toRunCommonAnalysis = "YES" ]; then

if [ ! -d ${ResD}/bubbleplot ]; then
mkdir -p ${ResD}/bubbleplot
fi

${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/TMarkers.txt
${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/topLevel.txt
${runR} ${srcD}/bubble-plot.R -d ${ResD}/cluster.rds -o ${ResD}/bubbleplot -m ${databaseD}/general/generalAll.txt

if [ ! -d ${ResD}/featureplot ]; then
mkdir -p ${ResD}/featureplot
fi

# ${runR} ${srcD}/feature-plot.R -d ${ResD}/cluster.rds -o ${ResD}/featureplot/topLevel -c ${paramD}/feature-plot-origin.json -m ${databaseD}/topLevel.txt
# ${runR} ${srcD}/feature-plot.R -d ${ResD}/cluster.rds -o ${ResD}/featureplot/tmarkers -c ${paramD}/feature-plot-origin.json -m ${databaseD}/TMarkers.txt

${runR} ${srcD}/qc-by-cluster.R -d ${ResD}/cluster.rds -o ${ResD}/qc-by-cluster.pdf
${runR} ${srcD}/visualize.R -d ${ResD}/cluster.rds
# ${runR} ${srcD}/visualize_batch.R -d ${ResD}/cluster.rds
# ${runR} ${srcD}/findmarker.r -d ${ResD}/cluster.rds -o ${ResD}/snn-single-markers.tsv

# ${runR} ${srcD}/snn-marker.R -d ${ResD}/cluster.rds -o ${ResD}/snn-markers.tsv -c ${paramD}/snn-marker.json
# ${runR} ${srcD}/snn-heatmap.R -d ${ResD}/cluster.rds -o ${ResD}/markersHeatmap.pdf -c ${paramD}/snn-heatmap.json -m ${ResD}/snn-markers.tsv -p heatmap
# python ${srcD}/statMarkers.py --markersTop ${ResD}/snn-markers.tsv --markersDatabase ${databaseD}/immuneCellMarkerAllinBox_Yanshuo.txt --out ${ResD}/markers.ys_celltype.tsv
# python ${srcD}/statMarkers.py --markersTop ${ResD}/markers.top.tsv --markersDatabase ${databaseD}/immuneCellMarkerAllinBox_Yanshuo.txt --out ${ResD}/markers.top.ys_celltype.tsv
fi

###############################################################################
#' Run Callback Script '#
###############################################################################
if [ $toRunCallBack = "YES" ]; then
if [ -f $callBackPath ]; then
$callBackPath ${ResD}/cluster.rds
fi
fi
Loading

0 comments on commit 869e7c9

Please sign in to comment.