Skip to content

Commit

Permalink
bplapply backend for more functions
Browse files Browse the repository at this point in the history
  • Loading branch information
mjz1 committed Jun 21, 2024
1 parent 655165d commit 7da0cdb
Show file tree
Hide file tree
Showing 13 changed files with 49 additions and 39 deletions.
4 changes: 2 additions & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
^docs$
^pkgdown$
^\.github$
^\.vscode$
^example$
^vignettes/example$
^vignettes/results$
^results$
Dockerfile
^docker*
^dev$
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export(segment_cnv)
export(smooth_counts)
export(summarise_chr_arm)
import(SingleCellExperiment)
importFrom(BiocParallel,bplapply)
importFrom(Matrix,t)
importFrom(S4Vectors,mcols)
importFrom(SummarizedExperiment,"assay<-")
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# scatools (development version)

* Replace `perform_gc_cor` parallel backend to use `BiocParallel`
* Added `segment=FALSE` as default
* Replace `add_gc_cor`, `segment_cnv`, and `merge_segments` parallel backend to use `BiocParallel`

# scatools 0.1.1

Expand Down
21 changes: 11 additions & 10 deletions R/cnv_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,23 @@ smooth_counts <- function(sce, assay_name, ncores = 1, smooth_name = paste(assay
#' @param assay_name Name of the assay to segment
#' @param new_assay Name of the new assay
#' @param verbose Verbosity
#' @param ncores Number of cores to use
#' @param ... Additional parameters to pass to [DNAcopy::segment]
#'
#' @inheritParams DNAcopy::segment
#' @inheritParams run_scatools
#'
#' @return A `SingleCellExperiment` object
#' @export
#'
segment_cnv <- function(sce, assay_name, new_assay = paste(assay_name, "segment", sep = "_"), alpha = 0.2, nperm = 10, min.width = 2, undo.splits = "none", verbose = 0, ncores = 1, ...) {
segment_cnv <- function(sce, assay_name, new_assay = paste(assay_name, "segment", sep = "_"), alpha = 0.2, nperm = 10, min.width = 2, undo.splits = "none", verbose = 0, bpparam = BiocParallel::SerialParam(), ...) {
chrs <- as.vector(GenomeInfoDb::seqnames(SummarizedExperiment::rowRanges(sce)))
starts <- GenomicRanges::start(SummarizedExperiment::rowRanges(sce))
sample_ids <- colnames(sce)
# Perform segmentation
# TODO: Make this function chromosome arm aware (ie segment within arms rather than chrs)
logger::log_info("Segmenting CNVs")
segmented_counts <- pbmcapply::pbmclapply(1:ncol(sce), mc.cores = ncores, FUN = function(i) {
x <- as.vector(assay(sce, assay_name)[, i])
segmented_counts <- BiocParallel::bplapply(X = 1:ncol(sce), BPPARAM = bpparam, FUN = function(i) {
x <- as.vector(SummarizedExperiment::assay(sce, assay_name)[, i])
obj <- DNAcopy::CNA(genomdat = x, chrom = chrs, maploc = starts, data.type = "logratio", sampleid = sample_ids[i], presorted = T)
res <- withr::with_seed(3, smoothed_CNA_counts <- DNAcopy::segment(obj,
alpha = alpha,
Expand All @@ -80,30 +80,31 @@ segment_cnv <- function(sce, assay_name, new_assay = paste(assay_name, "segment"
segmented_counts <- as.matrix(dplyr::bind_rows(segmented_counts))
rownames(segmented_counts) <- rownames(sce)

assay(sce, new_assay) <- segmented_counts
SummarizedExperiment::assay(sce, new_assay) <- segmented_counts
return(sce)
}


#' Merge segment levels
#'
#' @inheritParams segment_cnv
#' @inheritParams run_scatools
#' @param smooth_assay name of assay with smoothed counts
#' @param segment_assay name of assay with segmented counts
#'
#' @return A `SingleCellExperiment` object
#' @export
#'
merge_segments <- function(sce, smooth_assay, segment_assay, new_assay = "segment_merged", ncores = 1) {
merge_segments <- function(sce, smooth_assay, segment_assay, new_assay = "segment_merged", bpparam = bpparam) {
smooth_counts <- log2(assay(sce, smooth_assay))

segment_df <- as.data.frame(assay(sce, segment_assay))
segment_df <- as.data.frame(SummarizedExperiment::assay(sce, segment_assay))
segment_df[segment_df == 0] <- 1e-4
segment_df <- log2(segment_df)

logger::log_info("Merging segments using {ncores} cores for {ncol(segment_df)} cells")
logger::log_info("Merging segments using {bpparam$workers} cores for {ncol(segment_df)} cells")

seg_ml_list <- pbmcapply::pbmclapply(seq_along(segment_df), mc.cores = ncores, function(i) {
seg_ml_list <- BiocParallel::bplapply(X = seq_along(segment_df), BPPARAM = bpparam, function(i) {
cell_name <- names(segment_df)[i]
seg_means_ml <- mergeLevels(
vecObs = smooth_counts[, i],
Expand Down Expand Up @@ -169,7 +170,7 @@ identify_normal <- function(sce, assay_name, group_by = "clusters", method = c("
n_normal_clusts <- 1
}

sce$seg_sd <- colSds(assay(sce, assay_name), na.rm = TRUE)
sce$seg_sd <- colSds(SummarizedExperiment::assay(sce, assay_name), na.rm = TRUE)


# Take the per cluster medians
Expand Down
19 changes: 9 additions & 10 deletions R/gc_correction.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' @export
#'
add_gc_cor <- function(sce,
gc = rowData(sce)$gc,
gc = SummarizedExperiment::rowData(sce)$gc,
assay_name = "counts",
method = c("modal", "copykit", "loess"),
verbose = FALSE,
Expand All @@ -27,19 +27,19 @@ add_gc_cor <- function(sce,
gc_slot <- paste0("counts_gc_", method)

# Check if valid bins exists and pass correctly
if ("valid_bins" %in% names(assays(sce))) {
if ("valid_bins" %in% names(SummarizedExperiment::assays(sce))) {
if (verbose) {
logger::log_info("Found valid bins in sce object")
}
valid_mat <- assay(sce, "valid_bins")
valid_mat <- SummarizedExperiment::assay(sce, "valid_bins")
} else {
warning("No valid bins matrix in sce object. Defaulting to all valid.")
valid_mat <- NULL
}

if (method == "modal") {
res <- perform_gc_cor(
mat = assay(sce, assay_name),
mat = SummarizedExperiment::assay(sce, assay_name),
gc = gc,
valid_mat = valid_mat,
method = method,
Expand All @@ -49,16 +49,16 @@ add_gc_cor <- function(sce,
...
)

assay(sce, gc_slot) <- res$counts
SummarizedExperiment::assay(sce, gc_slot) <- res$counts

sce$modal_quantile <- as.integer(gsub("q", "", res$modal_quantiles))
} else {
assay(sce, gc_slot) <- perform_gc_cor(
mat = assay(sce, assay_name),
SummarizedExperiment::assay(sce, gc_slot) <- perform_gc_cor(
mat = SummarizedExperiment::assay(sce, assay_name),
gc = gc,
valid_mat = valid_mat,
method = method,
ncores = ncores,
bpparam = bpparam,
verbose = verbose,
...
)
Expand All @@ -80,7 +80,6 @@ add_gc_cor <- function(sce,
#' @param gc GC corresponding to bins (rows) in the matrix
#' @param valid_mat Matrix of TRUE/FALSE for valid bins. If none provided defaults to all TRUE
#' @param method Specifies the type of GC correction to perform. One of `'modal', 'copykit', or 'loess'`
#' @param ncores Number of cores to use if parallel backend is available
#' @param bpparam BiocParallel params
#' @param verbose Message verbosity (TRUE/FALSE)
#' @param ... Additional arguments to be passed to GC correction methods
Expand All @@ -95,7 +94,7 @@ perform_gc_cor <- function(mat,
bpparam,
verbose = FALSE, ...) {
if (verbose) {
logger::log_info("Performing GC correction on {ncol(mat)} cells")
logger::log_info("Performing GC correction on {ncol(mat)} cells using {bpparam$workers} cores")
logger::log_info("GC correction method: {method}")
}

Expand Down
2 changes: 1 addition & 1 deletion R/load_atac_bins.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ load_atac_bins <- function(bin_dir,
sce <- scuttle::addPerFeatureQCMetrics(sce, subsets = get_f_idx(sce$Sample))

if (!is.null(save_to)) {
.save_to(object = sce, save_to = save_to, verbose = verbose)
save_to(object = sce, save_to = save_to, verbose = verbose)
}

if (verbose) {
Expand Down
16 changes: 11 additions & 5 deletions R/run_scatools.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @param outdir Output directory
#' @param rmsb_size Remove small bins below a given size. Useful in cases where small bins are leftover at the ends of chromosomes. Defaults to 10% of the binwidth.
#' @param gc_range GC range for bins to keep. Removes large GC outliers
#' @param segment Logical. Determines whether to run segmentation or not
#' @param min_bin_counts A bin requires at least `min_bin_counts` across `min_bin_prop` proportion of cells to be kept
#' @param min_bin_prop Minimum proportion of cells with at least `min_bin_counts` per bin in order to keep a bin
#' @param min_cell_counts A cell requires at least `min_cell_counts` across `min_cell_prop` proportion of bins to be kept
Expand All @@ -31,6 +32,7 @@ run_scatools <- function(sample_id,
bin_name = prettyMb(getmode(width(bins))),
blacklist = NULL,
outdir = sample_id,
segment = TRUE,
rmsb_size = 0.1*getmode(width(bins)),
gc_range = c(0.3, 0.8),
overwrite = FALSE,
Expand Down Expand Up @@ -110,11 +112,15 @@ run_scatools <- function(sample_id,
smooth_counts(assay_name = "counts_gc_modal", ncores = ncores) %>%
calc_ratios(assay_name = "counts_gc_modal_smoothed") %>%
logNorm(assay_name = "counts_gc_modal_smoothed_ratios", name = "logr_modal") %>%
cluster_seurat(assay_name = "counts_gc_modal_smoothed_ratios", resolution = 0.5, verbose = FALSE) %>%
segment_cnv(assay_name = "counts_gc_modal_smoothed", ncores = ncores) %>%
merge_segments(smooth_assay = "counts_gc_modal_smoothed", segment_assay = "counts_gc_modal_smoothed_segment", ncores = ncores) %>%
identify_normal(assay_name = "segment_merged_logratios", group_by = "clusters", method = "gmm")
save_to(object = sce_processed, save_to = final_out)
cluster_seurat(assay_name = "counts_gc_modal_smoothed_ratios", resolution = 0.5, verbose = FALSE)

if (segment) {
sce_processed <- segment_cnv(assay_name = "counts_gc_modal_smoothed", bpparam = bpparam) %>%
merge_segments(smooth_assay = "counts_gc_modal_smoothed", segment_assay = "counts_gc_modal_smoothed_segment", bpparam = bpparam) %>%
identify_normal(assay_name = "segment_merged_logratios", group_by = "clusters", method = "gmm")
save_to(object = sce_processed, save_to = final_out)
}


# Save anndata
if (save_h5ad == TRUE) {
Expand Down
7 changes: 4 additions & 3 deletions R/scatools-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@ utils::globalVariables("where")


## usethis namespace: start
#' @importFrom BiocParallel bplapply
#' @importFrom methods as
#' @importFrom methods is
#' @importFrom S4Vectors mcols
#' @importFrom SummarizedExperiment assay
#' @importFrom SummarizedExperiment assay<-
#' @importFrom SummarizedExperiment assays
#' @importFrom stats acf
#' @importFrom stats approx
#' @importFrom stats as.dist
Expand All @@ -34,6 +32,9 @@ utils::globalVariables("where")
#' @importFrom stats start
#' @importFrom stats var
#' @importFrom stats weighted.mean
#' @importFrom SummarizedExperiment assay
#' @importFrom SummarizedExperiment assay<-
#' @importFrom SummarizedExperiment assays
#' @importFrom utils read.table
#' @importFrom utils write.table
## usethis namespace: end
Expand Down
2 changes: 1 addition & 1 deletion man/add_gc_cor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/merge_segments.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions man/perform_gc_cor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/run_scatools.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/segment_cnv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7da0cdb

Please sign in to comment.