From e1b8cb608c002bacdad199c4f6c5c0e7a4083740 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Fri, 3 Apr 2020 14:47:51 -0700 Subject: [PATCH] Dev 200402 (#72) * bug fix mat column order * force names for getting matrices * force matching cell names * fix * bug fix name order --- .DS_Store | Bin 12292 -> 12292 bytes R/ArrowRead.R | 113 ++++++++++++++++++++++++++++++++++------------ R/VisualizeData.R | 3 +- 3 files changed, 84 insertions(+), 32 deletions(-) diff --git a/.DS_Store b/.DS_Store index 07f36dea23ce47847e40f19e608fbe61a1978369..0415aa6e0acfd4f9150a02376ff6b96a08020f8c 100644 GIT binary patch delta 1349 zcmdUuUrZcT6vofDEO1ArFblXlK+7y_DPWdA1=>PtfpyW^7TN|OfF+e(W~!4ev#_(J z*s5qs(loVC6Ai7J*qEkiYm5n%rlr*fV?vDjAlo!04G)cf(x;NB55`1$XNCtgCcf%C z%)RG+GjqP2-#0TOGb0~gDYQbt8MdXdF&u1eYOy}rRBnA8+!zM+Cd|obJdunOc^2a1 z3EfOLHZH@H<;~8?Wx`{5;yU@+UAy;mi&n%Nwchi$TC?6@aeiQZMb#4xEp6@5Oo8Mp z6pKZ_H4%`fpVw8*Nb84mEhYFoA)(KRjcLv!a*15Vq!1fC64xh==;@a;g_5seL%A#t ztFg4!lT_`uROu5YI7(%?EFZ9yXiDREl`*$dwOlJmG1h15$J`H#VXLVHG|t)O(T?|<8+2* z={#MfdAdfQ(^r(CW%`aD(klH#Khv-9APd>ZK>=h`p&B))MHAZ5fllngZgisuy*Pkj z9K;JyF$M$Un7~oIjyG@uGk6nk;ccA7IedT*v7qA?KE-EP#J5;N2Dh<-AF+yG@VjG^ zJaAXqySA<^_=9iUY>cmExQaL^&i#M^h-lAb5t`_-FR_eQaAN@LX&cB4lGkvQYtHz%0|W~F1e*xt>g(j7p_*V zgc*}O8M&mitiC}Jr7WHX!6}>x=dk*~$wHV?!j09(w z#1xL<6}*bqa1y6*n$tVO>CJL_@8Dfrz(ribWf@m+74x``8(6?ie1UIp2lw#374R$g fVQxkC=g#E?@8|H}@;^KJg1?P^&Rg@~e`WX)2KF^K delta 1378 zcmd6lUu;ul7{=eHblZ2>2Yl;TTMmU5%DOpL`IxG@ntLZUH2B_=9H(V-?LkZ6=3A^eFmL_{wLUU0*WvKtcMYfl4&zzr9Kb8+7D zzE95kJ^6mmP|r}$@na7BW`60|^MZSDup!hC^vw!ogTBG~`fy{=7oH!^W)hxGTH+ee=A}+w=hufnJNJ`&(o2RE)@WJk}RC(wR``3T$KSC8cFd8C%ZBsmt1z zE$1EG#)ee#bz?v`f2`~^KektzE%qv}KN6CAnRMn@5@7Q`Rn$cILG^5whwSuBfRcA1^ zE`=lSQDsGd1qQ05m9C>o*Cr4bF-1uM7nrXsGh5<eG`CY_&1AHm|g3 znyxXwbpu5HNUinRnT#fMMSvJhpp$W}cfTeg9tI>sStVIkPVPF$BBZC3#LKeHRAMfBG-i3(| z@hLvT=lBA{IDzk-_yMPJ0T*!zf8i?r#ZCN&|II(#-lkFM*9t*jF$9Mm9|BJy1cAqf zV9J9K{5jqtU*&Z9s_R~y8(y@;^t!st12Y1-$&*y~wCNs?$X_fQ$+v+T4;PT|l|nnS zMyfWc$)rr?A~uny~?KSZP-+pz|%8(7kSalR@ zvEc9FTW)9B!9A|wa^JPmS^D->eHj_)^cpML&mj!yyu@-}TQ2J>M8nS7%TH-N@l11c E4@bN$bpQYW diff --git a/R/ArrowRead.R b/R/ArrowRead.R index ec247173..9094dcfe 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -171,23 +171,37 @@ getMatrixFromProject <- function( ArrowFiles <- getArrowFiles(ArchRProj) + cellNames <- ArchRProj$cellNames + + seL <- ArchR:::.safelapply(seq_along(ArrowFiles), function(x){ .messageDiffTime(paste0("Reading ", useMatrix," : ", names(ArrowFiles)[x], "(",x," of ",length(ArrowFiles),")"), tstart, verbose = verbose) - o <- getMatrixFromArrow( - ArrowFile = ArrowFiles[x], - useMatrix = useMatrix, - useSeqnames = useSeqnames, - cellNames = ArchRProj$cellNames, - ArchRProj = ArchRProj, - verbose = verbose, - binarize = binarize - ) + allCells <- .availableCells(ArrowFile = ArrowFiles[x], subGroup = useMatrix) + allCells <- allCells[allCells %in% cellNames] + + if(length(allCells) != 0){ + + o <- getMatrixFromArrow( + ArrowFile = ArrowFiles[x], + useMatrix = useMatrix, + useSeqnames = useSeqnames, + cellNames = allCells, + ArchRProj = ArchRProj, + verbose = verbose, + binarize = binarize + ) + + .messageDiffTime(paste0("Completed ", useMatrix," : ", names(ArrowFiles)[x], "(",x," of ",length(ArrowFiles),")"), tstart, verbose = verbose) - .messageDiffTime(paste0("Completed ", useMatrix," : ", names(ArrowFiles)[x], "(",x," of ",length(ArrowFiles),")"), tstart, verbose = verbose) + o + + }else{ - o + NULL + + } }, threads = threads) @@ -281,6 +295,14 @@ getMatrixFromArrow <- function( featureDF <- featureDF[BiocGenerics::which(featureDF$seqnames %bcin% seqnames), ] .messageDiffTime(paste0("Getting ",useMatrix," from ArrowFile : ", basename(ArrowFile)), tstart) + + if(!is.null(cellNames)){ + allCells <- .availableCells(ArrowFile = ArrowFile, subGroup = useMatrix) + if(!all(cellNames %in% allCells)){ + stop("cellNames must all be within the ArrowFile!!!!") + } + } + mat <- .getMatFromArrow( ArrowFile = ArrowFile, featureDF = featureDF, @@ -437,11 +459,14 @@ getMatrixFromArrow <- function( mat <- mat[rownames(featureDF), , drop = FALSE] rownames(mat) <- NULL + if(!is.null(cellNames)){ + mat <- mat[,cellNames,drop=FALSE] + } + return(mat) } - #################################################################### # Helper read functioning #################################################################### @@ -471,6 +496,16 @@ getMatrixFromArrow <- function( rownames(featureDF) <- paste0("f", seq_len(nrow(featureDF))) cellNames <- unlist(groupList, use.names = FALSE) ### UNIQUE here? doublet check JJJ + allCellsList <- lapply(seq_along(ArrowFiles), function(x){ + allCells <- .availableCells(ArrowFile = ArrowFiles[x], subGroup = useMatrix) + allCells <- allCells[allCells %in% cellNames] + if(length(allCells) != 0){ + allCells + }else{ + NULL + } + }) + mat <- .safelapply(seq_along(seqnames), function(x){ .messageDiffTime(sprintf("Constructing Group Matrix %s of %s", x, length(seqnames)), tstart, verbose = verbose) @@ -484,29 +519,36 @@ getMatrixFromArrow <- function( rownames(matChr) <- rownames(featureDFx) for(y in seq_along(ArrowFiles)){ - - maty <- .getMatFromArrow( - ArrowFile = ArrowFiles[y], - useMatrix = useMatrix, - featureDF = featureDFx, - cellNames = cellNames, - useIndex = useIndex - ) - for(z in seq_along(groupList)){ + allCells <- allCellsList[[y]] + + if(!is.null(allCells)){ + + maty <- .getMatFromArrow( + ArrowFile = ArrowFiles[y], + useMatrix = useMatrix, + featureDF = featureDFx, + cellNames = allCells, + useIndex = useIndex + ) - #Check Cells In Group - cellsGroupz <- groupList[[z]] - idx <- BiocGenerics::which(colnames(maty) %in% cellsGroupz) + for(z in seq_along(groupList)){ + + #Check Cells In Group + cellsGroupz <- groupList[[z]] + idx <- BiocGenerics::which(colnames(maty) %in% cellsGroupz) + + #If In Group RowSums + if(length(idx) > 0){ + matChr[,z] <- matChr[,z] + Matrix::rowSums(maty[,idx,drop=FALSE]) + } - #If In Group RowSums - if(length(idx) > 0){ - matChr[,z] <- matChr[,z] + Matrix::rowSums(maty[,idx,drop=FALSE]) } - } + rm(maty) - rm(maty) + } + if(y %% 20 == 0 | y %% length(ArrowFiles) == 0){ gc() @@ -564,11 +606,22 @@ getMatrixFromArrow <- function( .messageDiffTime(sprintf("Getting Partial Matrix %s of %s", x, length(ArrowFiles)), tstart, verbose = verbose) + allCells <- .availableCells(ArrowFile = ArrowFiles[x], subGroup = useMatrix) + allCells <- allCells[allCells %in% cellNames] + + if(length(allCells) == 0){ + if(doSampleCells){ + return(list(mat = NULL, out = NULL)) + }else{ + return(NULL) + } + } + o <- h5closeAll() matx <- .getMatFromArrow( ArrowFile = ArrowFiles[x], featureDF = featureDF, - cellNames = cellNames, + cellNames = allCells, useMatrix = useMatrix, useIndex = useIndex ) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index 4d4d04a1..621f8b58 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -553,7 +553,7 @@ plotGroups <- function( if(!inherits(values, "matrix")){ values <- matrix(as.matrix(values), ncol = nCells(ArchRProj)) - colnames(values) <- unlist(cellNamesList) + colnames(values) <- ArchRProj$cellNames } #Values Summary @@ -570,7 +570,6 @@ plotGroups <- function( } - .fixPlotSize <- function( p = NULL, plotWidth = unit(6, "in"),