Skip to content

Commit

Permalink
Merge branch 'develop' into feat/FastSNN
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewwbutler committed Mar 20, 2018
2 parents 71f409a + 91ebd4c commit a67997c
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 46 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ Imports:
lmtest,
cluster,
fitdistrplus,
png
png,
doSNOW,
foreach
LinkingTo: Rcpp, RcppEigen, RcppProgress
License: GPL-3 | file LICENSE
LazyData: true
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ export(Shuffle)
export(SplitDotPlotGG)
export(SplitObject)
export(StashIdent)
export(SubsetByPredicate)
export(SubsetColumn)
export(SubsetData)
export(SubsetRow)
Expand Down Expand Up @@ -264,6 +265,7 @@ import(Matrix)
import(RColorBrewer)
import(SDMTools)
import(diffusionMap)
import(doSNOW)
import(fpc)
import(ggplot2)
import(gridExtra)
Expand Down Expand Up @@ -312,6 +314,8 @@ importFrom(dplyr,top_n)
importFrom(dplyr,ungroup)
importFrom(dtw,dtw)
importFrom(fitdistrplus,fitdist)
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(gdata,drop.levels)
importFrom(gdata,interleave)
importFrom(ggplot2,annotation_raster)
Expand Down
48 changes: 28 additions & 20 deletions R/dimensional_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,14 @@ RunICA <- function(
#' @param genes.use If set, run the tSNE on this subset of genes
#' (instead of running on a set of reduced dimensions). Not set (NULL) by default
#' @param seed.use Random seed for the t-SNE
#' @param do.fast If TRUE, uses the Barnes-hut implementation, which runs
#' faster, but is less flexible. TRUE by default.
#' @param do.approx Run FIt-SNE implementation, based on Kluger Lab code on
#' https://github.com/ChristophH/FIt-SNE
#' @param tsne.method Select the method to use to compute the tSNE. Available
#' methods are:
#' \itemize{
#' \item{Rtsne: }{Use the Rtsne package Barnes-Hut implementation of tSNE (default)}
#' \item{tsne: }{standard tsne - not recommended for large datasets}
#' \item{FIt-SNE: }{Use the FFT-accelerated Interpolation-based t-SNE. Based on
#' Kluger Lab code found here: https://github.com/ChristophH/FIt-SNE}
#' }
#' @param add.iter If an existing tSNE has already been computed, uses the
#' current tSNE to seed the algorithm and then adds additional iterations on top
#' of this
Expand Down Expand Up @@ -261,8 +265,7 @@ RunTSNE <- function(
dims.use = 1:5,
genes.use = NULL,
seed.use = 1,
do.fast = TRUE,
do.approx = FALSE,
tsne.method = "Rtsne",
add.iter = 0,
dim.embed = 2,
distance.matrix = NULL,
Expand All @@ -286,24 +289,29 @@ RunTSNE <- function(
genes.use = genes.use))
}
set.seed(seed = seed.use)
if (do.fast) {
if (do.approx & is.null(x = distance.matrix)) {
data.tsne <- fftRtsne(X = as.matrix(x = data.use), dims = dim.embed, rand_seed = seed.use, ...)

if(tsne.method == "Rtsne"){
if (is.null(x = distance.matrix)) {
data.tsne <- Rtsne(X = as.matrix(x = data.use),
dims = dim.embed,
pca = FALSE, ...)
} else {
if (is.null(x = distance.matrix)) {
data.tsne <- Rtsne(X = as.matrix(x = data.use), dims = dim.embed, ...)
} else {
data.tsne <- Rtsne(
X = as.matrix(x = distance.matrix),
dims = dim.embed,
is_distance=TRUE
)
}
data.tsne <- data.tsne$Y
data.tsne <- Rtsne(
X = as.matrix(x = distance.matrix),
dims = dim.embed,
is_distance = TRUE,
...
)
}
} else {
data.tsne <- data.tsne$Y
} else if (tsne.method == "FIt-SNE" & is.null(x = distance.matrix)) {
data.tsne <- fftRtsne(X = as.matrix(x = data.use), dims = dim.embed, rand_seed = seed.use, ...)
} else if (tsne.method == "tsne") {
data.tsne <- tsne(X = data.use, k = dim.embed, ...)
} else {
stop ("Invalid tsne.method: Please select from Rtsne, tsne, or FIt-SNE")
}

if (add.iter > 0) {
data.tsne <- tsne(
X = data.use,
Expand Down
2 changes: 1 addition & 1 deletion R/dimensional_reduction_internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ fftRtsne <- function(
stop('tsne call failed');
}
f <- file(description = result_path, open = "rb")
# initialError <- readBin(f, integer(), n = 1, size = 8) # Not used
initialError <- readBin(f, integer(), n = 1, size = 8)
n <- readBin(con = f, what = integer(), n = 1, size = 4)
d <- readBin(con = f, what = integer(), n = 1, size = 4)
Y <- readBin(con = f, what = numeric(), n = n * d)
Expand Down
46 changes: 44 additions & 2 deletions R/interaction.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,44 @@ AddSamples <- function(
return(new.object)
}



#' Return a subset of the Seurat object.
#'
#' Creates a Seurat object containing only a subset of the cells in the
#' original object. Forms a dataframe by fetching the variables in \code{vars.use}, then
#' subsets it using \code{base::subset} with \code{predicate} as the filter.
#' Returns the corresponding subset of the Seurat object.
#'
#' @param object Seurat object
#' @param vars.use Variables to fetch for use in base::subset. Character vector.
#' @param predicate String to be parsed into an R expression and evaluated as an input to base::subset.
#'
#' @export
#'
#' @examples
#' pbmc1 <- SubsetByPredicate(object = pbmc_small,
#' vars.use = c("nUMI", "res.1"),
#' predicate = "nUMI < 200 & res.1=='3'")
#' pbmc1
#'
SubsetByPredicate = function(
object,
vars.use,
predicate
){
if( typeof(vars.use) != "character"){
stop("predicate should be a character vector. It will be parsed in `subset` as an R expression.")
}
if( typeof(predicate) != "character"){
stop("vars.use should be a character vector. These variables will be passed to FetchData.")
}
df <- FetchData(object, vars.use) %>% as.data.frame
cu <- df %>% subset(eval(parse(text=predicate))) %>% rownames
object <- SubsetData(object, cells.use = cu)
return( object )
}

#' Return a subset of the Seurat object
#'
#' Creates a Seurat object containing only a subset of the cells in the
Expand Down Expand Up @@ -725,7 +763,9 @@ WhichCells <- function(
if(accept.low >= accept.high) {
stop("accept.low greater than or equal to accept.high")
}
set.seed(seed = random.seed)
if (!is.na(x = random.seed)) {
set.seed(seed = random.seed)
}
cells.use <- SetIfNull(x = cells.use, default = object@cell.names)
ident <- SetIfNull(x = ident, default = unique(x = object@ident))
bad.remove.idents <- ident.remove[! (ident.remove %in% unique(x = object@ident))]
Expand Down Expand Up @@ -1085,7 +1125,9 @@ AddMetaData <- function(object, metadata, col.name = NULL) {
colnames(x = metadata) <- col.name
}
cols.add <- colnames(x = metadata)
meta.add <- metadata[rownames(x = object@meta.data), cols.add]
#meta.add <- metadata[rownames(x = [email protected]), cols.add]
meta.order <- match(rownames(object@meta.data), rownames(metadata))
meta.add <- metadata[meta.order, ]
if (all(is.null(x = meta.add))) {
stop("Metadata provided doesn't match the cells in this object")
}
Expand Down
11 changes: 9 additions & 2 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,9 @@ ScaleDataR <- function(
#' differences in numerical precision which could affect downstream calculations.
#' @param check.for.norm Check to see if data has been normalized, if not,
#' output a warning (TRUE by default)
#' @param do.par use parallel processing for regressing out variables faster.
#' If set to TRUE, will use half of the machines available cores (FALSE by default)
#' @param num.cores If do.par = TRUE, specify the number of cores to use.
#'
#' @return Returns a seurat object with object@@scale.data updated with scaled
#' and/or centered data.
Expand Down Expand Up @@ -461,7 +464,9 @@ ScaleData <- function(
display.progress = TRUE,
assay.type = "RNA",
do.cpp = TRUE,
check.for.norm = TRUE
check.for.norm = TRUE,
do.par = FALSE,
num.cores = 1
) {
data.use <- SetIfNull(
x = data.use,
Expand Down Expand Up @@ -495,7 +500,9 @@ ScaleData <- function(
genes.regress = genes.use,
use.umi = use.umi,
model.use = model.use,
display.progress = display.progress
display.progress = display.progress,
do.par = do.par,
num.cores = num.cores
)
if (model.use != "linear") {
use.umi <- TRUE
Expand Down
58 changes: 47 additions & 11 deletions R/preprocessing_internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,27 @@
# @param model.use Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'
# @param use.umi Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'
# @param display.progress display progress bar for regression procedure.
# @param do.par use parallel processing for regressing out variables faster.
# If set to TRUE, will use half of the machines available cores (FALSE by default)
# @param num.cores If do.par = TRUE, specify the number of cores to use.
#
# @return Returns the residuals from the regression model
#
#' @import Matrix
#' @import doSNOW
#' @importFrom stats as.formula lm residuals glm
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom foreach foreach %dopar%
#
RegressOutResid <- function(
object,
vars.to.regress,
genes.regress = NULL,
model.use = 'linear',
use.umi = FALSE,
display.progress = TRUE
display.progress = TRUE,
do.par = FALSE,
num.cores = 1
) {
possible.models <- c("linear", "poisson", "negbinom")
if (! model.use %in% possible.models){
Expand Down Expand Up @@ -56,7 +63,38 @@ RegressOutResid <- function(
if (use.umi) {
data.use <- object@raw.data[genes.regress, object@cell.names, drop = FALSE]
}
for (i in 1:max.bin) {

# input checking for parallel options
if(do.par){
if(num.cores == 1){
num.cores <- detectCores() / 2
} else {
if(num.cores > detectCores()){
num.cores <- detectCores() - 1
warning(paste0("num.cores set greater than number of available cores(", detectCores(), "). Setting num.cores to ", num.cores, "."))
}
}
} else {
if(num.cores != 1){
num.cores <- 1
warning("For parallel processing, please set do.par to TRUE.")
}
}
cl<- parallel::makeCluster(num.cores)

# using doSNOW library because it supports progress bar update
registerDoSNOW(cl)

opts <- list()
if(display.progress)
{
# define progress bar function
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
time_elapsed <- Sys.time()
}

data.resid <- foreach(i = 1:max.bin, .combine = "rbind", .options.snow = opts) %dopar% {
genes.bin.regress <- rownames(x = data.use)[bin.ind == i]
gene.expr <- as.matrix(x = data.use[genes.bin.regress, , drop = FALSE])
new.data <- do.call(
Expand Down Expand Up @@ -96,19 +134,17 @@ RegressOutResid <- function(
}
)
)
if (i == 1) {
data.resid=new.data
}
if (i > 1) {
data.resid=rbind(data.resid,new.data)
}
if(display.progress) {
setTxtProgressBar(pb, i)
}
new.data
}

if (display.progress) {
time_elapsed <- Sys.time() - time_elapsed
cat(paste("\nTime Elapsed: ",time_elapsed, units(time_elapsed)))
close(pb)
}

stopCluster(cl)

rownames(x = data.resid) <- genes.regress
if (use.umi) {
data.resid <- log1p(
Expand Down
19 changes: 11 additions & 8 deletions man/RunTSNE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/ScaleData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/SubsetByPredicate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a67997c

Please sign in to comment.