.Rhistory

inputFile <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Data\\Analysis\\Genotyping\\SNParray6\\180125_SNP6_ATAC-TCGA_correlation_howchang.txt"
input <- read.table(inputFile,header = TRUE, row.names=1, sep="\t")
head(input)
rownames(nput)
rownames(input)
matrix(unlist(strsplit(rownames(input), "_")), nrow=nrow(input), byrow=TRUE)
matrix(unlist(strsplit(rownames(input), "_")), nrow=nrow(input), byrow=TRUE)
tcgaUUID <- matrix(unlist(strsplit(rownames(input), "_")), nrow=nrow(input), byrow=TRUE)[,1]
head(tcgaUUID)
head(rownames(input))
length(tcgaUUID)
rownames(input) <- matrix(unlist(strsplit(rownames(input), "_")), nrow=nrow(input), byrow=TRUE)[,1]
head(input)
getMetaDataTCGA <- function(suUUID)
{
suUUID <- tolower(suUUID)
# suMetaDataFile <- "/scratch/users/mcorces/TCGA_ATAC/metadata/TCGA_StanfordCentric_metadata_useful.csv"
suMetaDataFile <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Metadata\\TCGA_StanfordCentric_metadata_useful.csv"
suMetaData <- read.csv(suMetaDataFile, header = TRUE, row.names = NULL)
suMetaData$stanfordUUID <- tolower(suMetaData$stanfordUUID)
subset <- matrix(NA,nrow = 0, ncol = ncol(suMetaData))
colnames(subset) <- colnames(suMetaData)
for (i in 1:length(suUUID))
{
index <- which(suMetaData$stanfordUUID == suUUID[i])
try(if(length(index) != 1) stop(paste("UUID",suUUID[i],"was not found or is not unique in metadata!", sep=" ")))
subset<- rbind(subset,suMetaData[index,])
}
return(subset)
}
temp <- getMetaDataTCGA(colnames(input))
colnames(input)
input <- read.table(inputFile,header = TRUE, row.names=1, check.names = FALSE, sep="\t")
colnames(input)
colnames(input)
?substr
substr(colnames(input),6,41)
metadata <- getMetaDataTCGA(substr(colnames(input),6,41))
head(metadataa)
head(metadata)
head(metadata$case_id)
matchUpUUID <- metadata$case_id
head(matchUpUUID)
matchUpUUID <- as.vector(metadata$case_id)
head(matchUpUUID)
#create empty vector to store the matched and next highest correlations
matchCorrelations <- as.data.frame(matrix(0,nrow=ncol(input), ncol=2))
rownames(matchCorrelations) <- colnames(input)
colnames(matchCorrelations) <- c("match","nextClosest")
head(matchCorrelations)
#create empty vector to store the matched and next highest correlations
matchCorrelations <- as.data.frame(matrix(0,nrow=ncol(input), ncol=2))
rownames(matchCorrelations) <- colnames(input)
colnames(matchCorrelations) <- c("match","nextClosest")
#for each column
for (i in 1:ncol(input))
{
#identify the y index corresponding to that TCGA UUID
rowIndex <- which(rownames(input) == matchUpUUID[i])
match <- input[rowIndex,i]
#identify the next highest correlation
nextClosest <- max(input[-(which(rownames(input) == matchUpUUID[i])),i])
matchCorrelations[i,] <- c(match,nextClosest)
}
i <- 1
rowIndex <- which(rownames(input) == matchUpUUID[i])
rowIndex
rownames(input)
input <- read.table(inputFile,header = TRUE, row.names=1, check.names = FALSE, sep="\t")
rownames(input) <- matrix(unlist(strsplit(rownames(input), "_")), nrow=nrow(input), byrow=TRUE)[,1]
metadata <- getMetaDataTCGA(substr(colnames(input),6,41))
matchUpUUID <- as.vector(metadata$case_id)
matchCorrelations <- as.data.frame(matrix(0,nrow=ncol(input), ncol=2))
rownames(matchCorrelations) <- colnames(input)
colnames(matchCorrelations) <- c("match","nextClosest")
i <- 1
rowIndex <- which(rownames(input) == matchUpUUID[i])
rowIndex
match <- input[rowIndex,i]
match
nextClosest <- max(as.numeric(input[-(which(rownames(input) == matchUpUUID[i])),i]))
matchCorrelations[i,] <- c(match,nextClosest)
#create empty vector to store the matched and next highest correlations
matchCorrelations <- as.data.frame(matrix(0,nrow=ncol(input), ncol=2))
rownames(matchCorrelations) <- colnames(input)
colnames(matchCorrelations) <- c("match","nextClosest")
#for each column
for (i in 1:ncol(input))
{
#identify the y index corresponding to that TCGA UUID
rowIndex <- which(rownames(input) == matchUpUUID[i])
match <- input[rowIndex,i]
#identify the next highest correlation
nextClosest <- max(as.numeric(input[-(which(rownames(input) == matchUpUUID[i])),i]))
matchCorrelations[i,] <- c(match,nextClosest)
}
head(matchCorrelations)
matchCorrelations$match - matchCorrelations$nextClosest
?cbind
inputFile <- "C:\\Users\\Ryan\\Downloads\\180228_Final_Data_Freeze_birdseedConcat.txt"
atacBirdseed <- read.table(inputFile, header=TRUE, row.names=1, sep="\t", as.is=TRUE, check.names=FALSE)
ncol(atacBirdseed)
926/26
926/24
926/23
chunkSize <- 24
numSplits <- ceiling(ncol(atacBirdseed)/chunkSize)
prefix <- gsub(".txt","",inputFile)
for (x in 1:numSplits)
{
lowerLimit <- (((x-1)*chunkSize)+1)
upperLimit <- (x*chunkSize)
print(paste(x,lowerLimit,upperLimit, sep=" "))
if (upperLimit > ncol(atacBirdseed)) { upperLimit <- ncol(atacBirdseed) }
subset <- atacBirdseed[,lowerLimit:upperLimit]
write.table(subset, paste(prefix,"_",x,"subset.txt",sep=""),sep="\t", col.names = NA, quote = FALSE)
}
inputFile <- "C:\\Users\\Ryan\\Downloads\\180228_Final_Data_Freeze_birdseedConcat.txt"
tcgaDirectory <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Data\\Analysis\\Genotyping\\SNParray6\\rds\\"
outputFile <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Data\\Analysis\\Genotyping\\SNParray6\\FinalDataFreeze\\TEST.txt"
cores <- 1
## Load dependencies
library(optparse)
library(foreach)
library(doParallel)
install.packages("optparse")
inputFile <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Data\\Analysis\\Genotyping\\SNParray6\\FinalDataFreeze\\180228_Final_Data_Freeze_birdseedConcat_1subset_TEST.txt"
tcgaDirectory <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Data\\Analysis\\Genotyping\\SNParray6\\rds\\"
outputFile <- "C:\\Users\\Ryan\\Dropbox\\Work\\Chang Lab\\Projects\\TCGA_ATAC\\Data\\Analysis\\Genotyping\\SNParray6\\FinalDataFreeze\\TEST.txt"
cores <- 1
atacBirdseed <- read.table(inputFile, header=TRUE, row.names=1, sep="\t", as.is=TRUE, check.names=FALSE)
head(atacBirdseed)
tcgaFiles <- list.files(path = tcgaDirectory, pattern = "*.birdseed.data.rds")
tcgaFiles
correlations <- as.data.frame(matrix(0,nrow=length(tcgaFiles), ncol=ncol(atacBirdseed)))
colnames(correlations) <- colnames(atacBirdseed)
rownames(correlations) <- matrix(unlist(strsplit(tcgaFiles, "\\.")), nrow=length(tcgaFiles), byrow=TRUE)[,1]#take the stuff before the first "." as the row name
registerDoParallel(cores = cores)
c1 <- makeCluster(cores, type="FORK")
#for each ATAC sample (each column in atacBirdseed)
for (i in 1:ncol(atacBirdseed))
{
print(i)
#find the snp probes that actually have a call (ie not -1)
atac <- atacBirdseed[which(atacBirdseed[,i] != -1),i,drop=FALSE]
probes <- rownames(atac)
#run parallel correlations with each individual TCGA RDS file and combine them into a column with rbind
currentCor <- foreach(j = 1:length(tcgaFiles), .combine=rbind) %dopar%
{
#read in TCGA file
tcga <- readRDS(paste(tcgaDirectory,tcgaFiles[j],sep=""))
tcga <- tcga[probes,]
#subset TCGA file to only contain the desired probes
cor(x = as.integer(atac[,1]), y = as.integer(tcga$call))
}
correlations[,i] <- currentCor[,1]
}
install.packages("doParallel")
install.packages("foreach")
#make empty vector to hold correlations
correlations <- as.data.frame(matrix(0,nrow=length(tcgaFiles), ncol=ncol(atacBirdseed)))
colnames(correlations) <- colnames(atacBirdseed)
rownames(correlations) <- matrix(unlist(strsplit(tcgaFiles, "\\.")), nrow=length(tcgaFiles), byrow=TRUE)[,1]#take the stuff before the first "." as the row name
#establish parallel environment for correlations with TCGA RDS files
registerDoParallel(cores = cores)
c1 <- makeCluster(cores, type="FORK")
library(optparse)
library(foreach)
library(doParallel)
#make empty vector to hold correlations
correlations <- as.data.frame(matrix(0,nrow=length(tcgaFiles), ncol=ncol(atacBirdseed)))
colnames(correlations) <- colnames(atacBirdseed)
rownames(correlations) <- matrix(unlist(strsplit(tcgaFiles, "\\.")), nrow=length(tcgaFiles), byrow=TRUE)[,1]#take the stuff before the first "." as the row name
#establish parallel environment for correlations with TCGA RDS files
registerDoParallel(cores = cores)
c1 <- makeCluster(cores, type="FORK")
#for each ATAC sample (each column in atacBirdseed)
for (i in 1:ncol(atacBirdseed))
{
print(i)
#find the snp probes that actually have a call (ie not -1)
atac <- atacBirdseed[which(atacBirdseed[,i] != -1),i,drop=FALSE]
probes <- rownames(atac)
#run parallel correlations with each individual TCGA RDS file and combine them into a column with rbind
currentCor <- foreach(j = 1:length(tcgaFiles), .combine=rbind) %dopar%
{
#read in TCGA file
tcga <- readRDS(paste(tcgaDirectory,tcgaFiles[j],sep=""))
tcga <- tcga[probes,]
#subset TCGA file to only contain the desired probes
cor(x = as.integer(atac[,1]), y = as.integer(tcga$call))
}
correlations[,i] <- currentCor[,1]
}
head(correlations)
#make empty vector to hold correlations
correlations <- as.data.frame(matrix(0,nrow=length(tcgaFiles), ncol=ncol(atacBirdseed)))
colnames(correlations) <- colnames(atacBirdseed)
rownames(correlations) <- matrix(unlist(strsplit(tcgaFiles, "\\.")), nrow=length(tcgaFiles), byrow=TRUE)[,1]#take the stuff before the first "." as the row name
i <- 2
print(i)
atac <- atacBirdseed[which(atacBirdseed[,i] != -1),i,drop=FALSE]
head(atac)
probes <- rownames(atac)
probes
j <- 1
tcga <- readRDS(paste(tcgaDirectory,tcgaFiles[j],sep=""))
head(tcga)
tcga <- tcga[probes,]
head(tcga)
head(probes)
cor(x = as.integer(atac[,1]), y = as.integer(tcga$call))
as.integer(atac[,1])
length(as.integer(atac[,1]))
length(as.integer(tcga$call))
as.integer(tcga$call)
which(tcga == NA)
which(tcga$call == NA)
tcga$call[436]
tcga$call[435]
tcga[435]
tcga[436,]
atacBirdseed[436,]
temp1 <- readRDS(paste(tcgaDirectory,tcgaFiles[j],sep=""))
rownames(temp1)
which(rownames(temp1) == "SNP_A-8593968")
temp1[293783,]
atac[436,]
rownames(atac)[436]
which(rownames(temp1) == "SNP_A-1898440")
source("https://bioconductor.org/biocLite.R")
biocLite()
source("https://bioconductor.org/biocLite.R")
biocLite("BSgenome")
source("https://bioconductor.org/biocLite.R")
biocLite("TFBSTools")
source("https://bioconductor.org/biocLite.R")
biocLite("InteractionSet")
source("https://bioconductor.org/biocLite.R")
biocLite("GenomicFeatures")
source("https://bioconductor.org/biocLite.R")
biocLite("SingleCellExperiment")
source("https://bioconductor.org/biocLite.R")
biocLite("edgeR")
source("https://bioconductor.org/biocLite.R")
biocLite("limma")
install.packages("stringi")
source("https://bioconductor.org/biocLite.R")
biocLite("Rsamtools")
devtools::install_github("jgranja24/ArchR", auth_token="8afac408a17ecd5b22c38ff12fb7194364bf1fa1")
devtools::install_github("jgranja24/ArchR", auth_token="8afac408a17ecd5b22c38ff12fb7194364bf1fa1")
library(ArchR)
library(GenomicRanges)
library(ArchRx)
snp_file <- "K:/Shared drives/Brain_Merged/Analysis/GWAS/191105_ld_buddies_table_stage3.tsv"
devtools::install_github("jgranja24/ArchRx",auth_token = "33778e5f74933557038673a06058e3a51c9a3f95")
devtools::install_github("jgranja24/ArchRX",auth_token = "33778e5f74933557038673a06058e3a51c9a3f95")
devtools::install_github("jgranja24/ArchR",auth_token = "33778e5f74933557038673a06058e3a51c9a3f95")
install.packages("pkgdown")
setwd("GitHub/ArchR/")
library(pkgdown)
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
pkgdown::build_site()
install.packages("knitr")
install.packages("knitr")
library(knitr)
pkgdown::build_site()
pkgdown::build_site()
install.packages(dplyr)
install.packages("dplyr")
install.packages("dplyr")
pkgdown::build_site()