Skip to content

Commit

Permalink
minor linting/changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mjz1 committed Jul 9, 2024
1 parent bd8df13 commit d2b8e7a
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 45 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ Dockerfile
^dev$
^doc$
^Meta$
^vignettes/*_files$
^vignettes/articles$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ example
/Meta/
.vscode/
docs
results/
28 changes: 13 additions & 15 deletions R/bin_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ bin_atac_frags <- function(sample_id,
bpparam = BiocParallel::SerialParam(),
overwrite = FALSE,
return_mat = FALSE) {

stopifnot(class(bins) %in% "GRanges")

if (is.null(bin_name)) {
bin_name = prettyMb(getmode(width(bins)))
bin_name <- prettyMb(getmode(width(bins)))
}

# Compute fragments per bins and combine
Expand All @@ -33,7 +32,6 @@ bin_atac_frags <- function(sample_id,

# Only bin frags if not done already
if (!file.exists(file.path(bin_dir, "matrix.mtx.gz")) | overwrite) {

# Convert to GenomicRanges
logger::log_info("{sample_id} -- Loading fragments...")
fragments <- data.table::fread(fragment_file, header = FALSE)
Expand All @@ -42,13 +40,13 @@ bin_atac_frags <- function(sample_id,

# If no cells provided use all barcodes and warn
if (is.null(cells)) {
cells = unique(fragments$barcode)
cells <- unique(fragments$barcode)
if (length(cells) >= 1e5) {
logger::log_warn("No cell barcodes specified. {length(cells)} unique cells in fragments file..." )
logger::log_warn("No cell barcodes specified. {length(cells)} unique cells in fragments file...")
}
}

fragments <- fragments[fragments$barcode %in% cells & fragments$chr %in% GenomeInfoDb::seqlevelsInUse(bins),]
fragments <- fragments[fragments$barcode %in% cells & fragments$chr %in% GenomeInfoDb::seqlevelsInUse(bins), ]
fragments <- GenomicRanges::makeGRangesFromDataFrame(fragments, keep.extra.columns = T)
fragments <- GenomicRanges::split(fragments, GenomeInfoDb::seqnames(fragments))

Expand Down Expand Up @@ -168,9 +166,9 @@ bin_frags_chr <- function(fragments_chr,
# Name matrix
colnames(mat) <- cells
rownames(mat) <- paste(GenomicRanges::seqnames(bins_chr),
GenomicRanges::start(bins_chr),
GenomicRanges::end(bins_chr),
sep = "_"
GenomicRanges::start(bins_chr),
GenomicRanges::end(bins_chr),
sep = "_"
)

return(mat)
Expand Down Expand Up @@ -265,8 +263,8 @@ get_tiled_bins <- function(bs_genome = NULL,
unlist()
} else {
bins <- GenomicRanges::tileGenome(BSgenome::seqinfo(bs_genome)[select_chrs],
tilewidth = tilewidth,
cut.last.tile.in.chrom = TRUE
tilewidth = tilewidth,
cut.last.tile.in.chrom = TRUE
)
}

Expand All @@ -281,7 +279,7 @@ get_tiled_bins <- function(bs_genome = NULL,
bins <- add_gc_freq(bs_genome, bins)
bins <- sort(bins)
idx <- which(as.vector(GenomeInfoDb::seqnames(bins)) %in% select_chrs)
bins <- bins[idx,]
bins <- bins[idx, ]

return(bins)
}
Expand All @@ -298,9 +296,9 @@ get_cytobands <- function(genome = "hg38") {
cyto <- readr::read_delim(file = cyto_url, col_names = c("CHROM", "start", "end", "cytoband", "unsure"), show_col_types = FALSE) %>%
dplyr::filter(!is.na(cytoband)) %>%
dplyr::mutate(dplyr::across(where(is.character), as.factor),
"start" = start + 1,
"arm" = factor(substr(cytoband, 0, 1)),
"genome" = genome
"start" = start + 1,
"arm" = factor(substr(cytoband, 0, 1)),
"genome" = genome
)
return(cyto)
}
Expand Down
31 changes: 18 additions & 13 deletions R/gc_correction.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,13 @@ add_gc_cor <- function(sce,
#' @return Sparse matrix of corrected counts
#'
#' @export
perform_gc_cor <- function(mat,
gc,
valid_mat = NULL,
method = c("modal", "copykit", "loess"),
bpparam,
verbose = FALSE, ...) {
perform_gc_cor <- function(
mat,
gc,
valid_mat = NULL,
method = c("modal", "copykit", "loess"),
bpparam,
verbose = FALSE, ...) {
if (verbose) {
logger::log_info("Performing GC correction on {ncol(mat)} cells using {bpparam$workers} cores")
logger::log_info("GC correction method: {method}")
Expand All @@ -116,14 +117,18 @@ perform_gc_cor <- function(mat,
)

# Parallel apply over the matrix
counts_gc_list <- BiocParallel::bplapply(X = seq_len(ncol(mat)),
BPPARAM = bpparam,
counts_gc_list <- BiocParallel::bplapply(
X = seq_len(ncol(mat)),
BPPARAM = bpparam,
FUN = function(i) {
FUN(gc = gc,
counts = as.vector(mat[, i]),
valid = as.vector(valid_mat[, i]),
bin_ids = rownames(mat), ...)
})
FUN(
gc = gc,
counts = as.vector(mat[, i]),
valid = as.vector(valid_mat[, i]),
bin_ids = rownames(mat), ...
)
}
)

if (verbose) {
logger::log_success("GC correction completed!")
Expand Down
22 changes: 12 additions & 10 deletions R/run_scatools.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ run_scatools <- function(sample_id,
bin_name = prettyMb(getmode(width(bins))),
blacklist = NULL,
outdir = sample_id,
segment = TRUE,
rmsb_size = 0.1*getmode(width(bins)),
segment = FALSE,
rmsb_size = 0.1 * getmode(width(bins)),
gc_range = c(0.3, 0.8),
overwrite = FALSE,
verbose = TRUE,
Expand Down Expand Up @@ -102,23 +102,25 @@ run_scatools <- function(sample_id,
sce_processed <- length_normalize(sce_processed, assay_name = "counts", assay_to = "counts")

sce_processed <- sce_processed %>%
filter_sce(gc_range = gc_range,
min_bin_counts = min_bin_counts,
min_bin_prop = min_bin_prop,
filter_sce(
gc_range = gc_range,
min_bin_counts = min_bin_counts,
min_bin_prop = min_bin_prop,
min_cell_counts = min_cell_counts,
min_cell_prop = min_cell_prop) %>%
min_cell_prop = min_cell_prop
) %>%
add_ideal_mat(verbose = TRUE, ncores = ncores) %>%
add_gc_cor(method = "modal", verbose = TRUE, bpparam = bpparam) %>%
smooth_counts(assay_name = "counts_gc_modal", ncores = ncores) %>%
calc_ratios(assay_name = "counts_gc_modal_smoothed") %>%
logNorm(assay_name = "counts_gc_modal_smoothed_ratios", name = "logr_modal") %>%
cluster_seurat(assay_name = "counts_gc_modal_smoothed_ratios", resolution = 0.5, verbose = FALSE)
cluster_seurat(assay_name = "counts_gc_modal_smoothed_ratios", resolution = 0.5, verbose = FALSE)

if (segment) {
if (segment) {
sce_processed <- segment_cnv(assay_name = "counts_gc_modal_smoothed", bpparam = bpparam) %>%
merge_segments(smooth_assay = "counts_gc_modal_smoothed", segment_assay = "counts_gc_modal_smoothed_segment", bpparam = bpparam) %>%
identify_normal(assay_name = "segment_merged_logratios", group_by = "clusters", method = "gmm")
}
}

save_to(object = sce_processed, save_to = final_out)
logger::log_success("SCATools run completed!")
Expand All @@ -131,7 +133,7 @@ run_scatools <- function(sample_id,
logger::log_info("Writing output to anndata")
# Save raw anndata
zellkonverter::writeH5AD(sce, file = file.path(outdir, glue::glue("{bin_name}_raw.h5ad")), compression = "gzip")
zellkonverter::writeH5AD(sce_processed, file = file.path(outdir, glue::glue("{bin_name}_processed.h5ad")), compression = "gzip")
zellkonverter::writeH5AD(sce_processed, file = file.path(outdir, glue::glue("{bin_name}_processed.h5ad")), compression = "gzip")
}
}

Expand Down
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: ~
url: https://mjz1.github.io/scatools/
template:
bootstrap: 5

1 change: 1 addition & 0 deletions vignettes/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.html
*.R
example/
*_files
13 changes: 7 additions & 6 deletions vignettes/scatools.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ knitr::opts_chunk$set(

# Introduction

We start by loading `scatools` and `ArchR`, as well as the filepath to an example `fragments.bed.gz` file containing fragments from 100 normal mammary cells.
We start by loading `scatools` and a filepath to an example `fragments.bed.gz` file containing fragments from 100 normal mammary cells.

Note this vignette is currently set up with dummy normal data and is purely intended to demonstrate package functionality. There are no CNV changes in the test data.

Expand Down Expand Up @@ -49,14 +49,15 @@ Now we process this data using `scatools`. Helper functions help us to create `G

```{r, eval=FALSE}
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# install.packages("BiocManager")
# BiocManager::install("BSgenome.Hsapiens.UCSC.hg38")
bins <- get_tiled_bins(bs_genome = BSgenome.Hsapiens.UCSC.hg38,
tilewidth = 1e7,
respect_chr_arms = TRUE
)
bins <- get_tiled_bins(
bs_genome = BSgenome.Hsapiens.UCSC.hg38,
tilewidth = 1e7,
respect_chr_arms = TRUE
)
```

```{r, include=FALSE}
Expand Down

0 comments on commit d2b8e7a

Please sign in to comment.