addressing warnings

mjz1 · Jun 19, 2024 · 1f67389 · 1f67389
1 parent 78f2415
commit 1f67389
Show file tree

Hide file tree

Showing 33 changed files with 305 additions and 153 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,6 +12,7 @@ Depends:
     SingleCellExperiment
 Imports: 
     BiocGenerics,
+    BiocParallel,
     circlize,
     ComplexHeatmap,
     data.table,
@@ -55,16 +56,22 @@ Imports:
     sparseMatrixStats,
     stats,
     stringr,
+    SummarizedExperiment,
     tibble,
     tidyr,
     utils,
     vcfR,
     vegan,
     withr
 Suggests: 
+    anndata,
+    BSgenome,
+    BSgenome.Hsapiens.UCSC.hg38,
     dittoSeq,
+    HGC,
     knitr,
-    rmarkdown
+    rmarkdown,
+    zellkonverter
 VignetteBuilder: 
     knitr
 Remotes: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,26 +1,22 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(merge,sparse)
 export("%>%")
 export(EM2)
 export(add_gc_cor)
 export(add_hmmcopy)
 export(bin_atac_frags)
 export(bin_snp_data)
+export(bin_snp_data2)
 export(bind_sublist)
 export(calc_ai)
 export(calc_allelic)
 export(calc_clonal_diversity)
 export(calc_cnv_score)
-export(calc_ratios)
 export(calc_snn_specificity)
 export(cloneCnaHeatmap)
 export(cluster_seurat)
 export(cnaHeatmap)
-export(col_tumor_cells)
-export(combine.func)
 export(correct_atac_bias)
-export(counts_col_fun)
 export(disjoint_bins_map)
 export(do_qc)
 export(filter_sce)
@@ -29,15 +25,11 @@ export(gc_modal_qc_filter)
 export(get_assay_dat)
 export(get_blacklist)
 export(get_chr_arm_bins)
-export(get_counts)
-export(get_counts1)
 export(get_f_idx)
 export(get_gene_copy)
-export(get_label_centers)
 export(get_snp_bidx)
 export(get_snp_counts)
 export(get_tiled_bins)
-export(getmode)
 export(grab_hmm_res)
 export(hmmcopy_singlecell)
 export(identify_normal)
@@ -46,9 +38,7 @@ export(leiden_wrapper)
 export(load_atac_bins)
 export(logNorm)
 export(logr_col_fun)
-export(mergeLevels)
 export(merge_segments)
-export(numbat_to_sce)
 export(overlap_genes)
 export(params_sc_hmm)
 export(perform_gc_cor)
@@ -59,21 +49,16 @@ export(plot_cell_psuedobulk_cna)
 export(plot_clone_comp)
 export(plot_gene_cna)
 export(plot_segs)
-export(prettyMb)
 export(pseudo_groups)
 export(pseudo_join)
 export(pseudobulk_sce)
 export(read_vartrix)
 export(rebin_sce)
-export(run_sc_hmmcopy)
 export(run_scatools)
-export(save_to)
-export(scale_mat)
 export(scale_sub)
 export(segment_cnv)
 export(smooth_counts)
 export(summarise_chr_arm)
-export(vcf_to_df)
 import(SingleCellExperiment)
 importFrom(Matrix,t)
 importFrom(S4Vectors,mcols)

diff --git a/R/aspcf.R b/R/aspcf.R
@@ -98,6 +98,6 @@ atac_ascpf <- function(...) {
   assay(sce_bulk_new, "ratios_jointseg")[, tum_idx] <- tum_logr_seg[rownames(sce_bulk_new), ]
 
 
-  sce_bulk_new <- logNorm(sce_bulk_new, transform = "log2", assay = "ratios_jointseg", name = "logratios_jointseg")
+  sce_bulk_new <- logNorm(sce_bulk_new, transform = "log2", assay_name = "ratios_jointseg", name = "logratios_jointseg")
   # assay(sce_bulk_new, "ratios_jointseg")[, ] <- tum_logr_seg[rownames(sce_bulk_new), ]
 }
diff --git a/R/bin_utils.R b/R/bin_utils.R
@@ -13,7 +13,7 @@ bin_atac_frags <- function(sample_id,
                            fragment_file,
                            cells = NULL,
                            bins,
-                           bin_name = prettyMb(getmode(width(bins))),
+                           bin_name = NULL,
                            blacklist = NULL,
                            outdir,
                            ncores = 1,
@@ -23,6 +23,10 @@ bin_atac_frags <- function(sample_id,
 
   stopifnot(class(bins) %in% "GRanges")
 
+  if (is.null(bin_name)) {
+    bin_name = prettyMb(getmode(width(bins)))
+  }
+
   # Compute fragments per bins and combine
   bin_dir <- normalizePath(file.path(outdir, bin_name))
 
@@ -291,8 +295,6 @@ get_tiled_bins <- function(bs_genome = NULL,
 #'
 #' @return Dataframe of genome cytobands
 #'
-#' @examples
-#' hg38_cyto <- get_cytobands("hg38")
 get_cytobands <- function(genome = "hg38") {
   cyto_url <- paste0("http://hgdownload.cse.ucsc.edu/goldenpath/", genome, "/database/cytoBand.txt.gz")
   cyto <- readr::read_delim(file = cyto_url, col_names = c("CHROM", "start", "end", "cytoband", "unsure"), show_col_types = FALSE) %>%
@@ -329,9 +331,10 @@ add_gc_freq <- function(bs_genome, bins) {
 #'
 #' Given a matrix of bin counts, bin gc and N frequency, and filtering parameters, return a boolean matrix flagging ideal bins
 #'
-#'
 #' @param mat,sce A count matrix or SCE object depending on the function
 #' @param ncores number of cores for parallel evaluation (requires `pbmcapply` package)
+#' @param assay_name Name of assay
+#' @param verbose message verbosity
 #'
 #' @inherit is_ideal_bin
 #' @return Boolean matrices of ideal and valid bins

diff --git a/R/clustering.R b/R/clustering.R
@@ -82,7 +82,7 @@ cluster_seurat <- function(sce,
   } else if (features.pca == "variable") {
     if (is.null(nvar.features)) {
       logger::log_error("Variable features must provide 'nvar.features")
-      break
+      stop()
     }
     srt <- Seurat::FindVariableFeatures(srt)
     # Will use Seurats find variable features

diff --git a/R/cnv_tools.R b/R/cnv_tools.R
@@ -87,8 +87,6 @@ segment_cnv <- function(sce, assay_name, new_assay = paste(assay_name, "segment"
 
 #' Merge segment levels
 #'
-#' Wrapper for [mergeLevels()] to merge segments
-#'
 #' @inheritParams segment_cnv
 #' @param smooth_assay name of assay with smoothed counts
 #' @param segment_assay name of assay with segmented counts
@@ -126,7 +124,7 @@ merge_segments <- function(sce, smooth_assay, segment_assay, new_assay = "segmen
   rownames(seg_ratios) <- rownames(sce)
   assay(sce, paste(new_assay, "ratios", sep = "_")) <- as.matrix(round(seg_ratios, 2))
 
-  sce <- logNorm(sce, assay = paste(new_assay, "ratios", sep = "_"), name = paste(new_assay, "logratios", sep = "_"))
+  sce <- logNorm(sce, assay_name = paste(new_assay, "ratios", sep = "_"), name = paste(new_assay, "logratios", sep = "_"))
 
   logger::log_info("Merged segments in: {new_assay}")
   logger::log_info("Merged segments ratios in: {paste(new_assay, 'ratios', sep = '_')}")
@@ -235,8 +233,6 @@ identify_normal <- function(sce, assay_name, group_by = "clusters", method = c("
   return(sce)
 }
 
-#' @export
-#' @noRd
 calc_ratios <- function(sce, assay_name, fun = c("mean", "median"), new_assay = paste(assay_name, "ratios", sep = "_")) {
   fun <- match.arg(fun)
 
@@ -256,9 +252,7 @@ calc_ratios <- function(sce, assay_name, fun = c("mean", "median"), new_assay =
 
 # This is taken from copykit to avoid additional dependencies
 
-#' @export
 #' @importFrom stats ansari.test wilcox.test
-#' @noRd
 mergeLevels <- function(vecObs, vecPred, pv.thres = 1e-04, ansari.sign = 0.05,
                         thresMin = 0.05, thresMax = 0.5, verbose = 1, scale = TRUE) {
   if (thresMin > thresMax) {
@@ -357,9 +351,6 @@ mergeLevels <- function(vecObs, vecPred, pv.thres = 1e-04, ansari.sign = 0.05,
   ))
 }
 
-
-#' @export
-#' @noRd
 combine.func <- function(diff, vecObs, vecPredNow, mnNow, mn1, mn2, pv.thres = 1e-04,
                          thresAbs = 0) {
   vec1 <- vecObs[which(vecPredNow == mn1)]
@@ -401,7 +392,7 @@ combine.func <- function(diff, vecObs, vecPredNow, mnNow, mn1, mn2, pv.thres = 1
 #' @param scCNA scCNA object.
 #' @param transform String specifying the transformation to apply to the selected
 #' assay.
-#' @param assay String with the name of the assay to pull data from to run the
+#' @param assay_name String with the name of the assay to pull data from to run the
 #' segmentation.
 #' @param name String with the name for the target slot for the resulting
 #' transformed counts.

diff --git a/R/colors.R b/R/colors.R
@@ -10,20 +10,25 @@ state_cn_colors <- function() {
 }
 
 
+
+#' Log Ratio Colors
+#'
+#' Easy color mappings for log2 color scales in `ComplexHeatmap`
+#'
+#' @param breaks Value breaks
+#' @param colors Colors
+#'
+#' @return Color function
 #' @export
-#' @noRd
+#'
 logr_col_fun <- function(breaks = c(-2, 0, 2), colors = c("blue", "white", "red")) {
   circlize::colorRamp2(breaks = breaks, colors = colors)
 }
 
-#' @export
-#' @noRd
 counts_col_fun <- function(breaks = c(0, 2, 8), colors = c("blue", "white", "red")) {
   circlize::colorRamp2(breaks = breaks, colors = colors)
 }
 
-#' @export
-#' @noRd
 col_tumor_cells <- function() {
   c(`TRUE` = "#5E5E5E", `FALSE` = "#FF9A85")
 }
diff --git a/R/data.R b/R/data.R
@@ -13,6 +13,6 @@
 #' A subset of cells from normal mammary
 #'
 #' @format ## `bins_10mb`
-#' A GenomicRanges object with 331 bins.
+#' A GenomicRanges object with 324 bins.
 #'
 "bins_10mb"
diff --git a/R/hmmcopy_utils.R b/R/hmmcopy_utils.R
@@ -6,7 +6,7 @@
 #' @param save_raw_hmm Path to save raw hmm data in an `rda` file
 #' @param slot_suffix Suffix to add to newly created `copy` and `state` assay slots.
 #' @param assay_name Name of the assay with counts to input into HMMcopy. Ideally these are GC corrected.
-#' @param ... Additional parameters to pass to [run_sc_hmmcopy], such as `param`
+#' @param ... Additional parameters to pass to [run_sc_hmmcopy()], such as `param`
 #'
 #' @return an sce object with hmm copy metadata added to coldata, and new slots `copy` and `state`
 #' @export
@@ -87,7 +87,7 @@ add_hmmcopy <- function(sce,
   }
 
   # Merge the metadata
-  colData(sce) <- cbind(colData(sce), hmm_metadata[match(hmm_metadata$cell_id, rownames(colData(sce))), ])
+  SummarizedExperiment::colData(sce) <- cbind(colData(sce), hmm_metadata[match(hmm_metadata$cell_id, rownames(colData(sce))), ])
 
   if (verbose) {
     logger::log_info("Adding HMMcopy data to sce")
@@ -315,8 +315,6 @@ hmmcopy_singlecell <- function(chr, start, end, counts, reads, ideal = rep(TRUE,
 #' @param multipliers Positive integer list of ploidy multipliers to test
 #' @param return a character. One of `best` or `all` to either return the result for the best ploidy only, or a list of results for all ploidies
 #'
-#' @export
-#' @noRd
 run_sc_hmmcopy <- function(chr, start, end, counts, reads, ideal = rep(TRUE, length(counts)), param = params_sc_hmm(), cell_id, multipliers = 1:6, verbose = FALSE, maxiter = 200, n_cutoff = NULL, return = c("best", "all")) {
   # check integer multipliers
   if (!all(multipliers %% 1 == 0) | any(multipliers < 0)) {

diff --git a/R/load_atac_bins.R b/R/load_atac_bins.R
@@ -34,7 +34,7 @@ load_atac_bins <- function(bin_dir,
   if (verbose) {
     logger::log_info("Adding cellwise and binwise QC metrics")
   }
-  
+
   sce <- scuttle::addPerCellQCMetrics(sce)
   sce <- scuttle::addPerFeatureQCMetrics(sce, subsets = get_f_idx(sce$Sample))
 
@@ -51,45 +51,17 @@ load_atac_bins <- function(bin_dir,
 }
 
 
-#' @export
-#' @noRd
-vcf_to_df <- function(vcf, verbose = FALSE) {
-  vcf <- vcfR::read.vcfR(file = vcf, verbose = verbose)
-  gts <- vcfR::extract.gt(vcf, element = "GT", return.alleles = F)[, 1] %>% as.factor()
-  alleles <- vcfR::extract.gt(vcf, element = "GT", return.alleles = T)[, 1] %>% stringr::str_split_fixed(string = ., pattern = "/|\\|", n = 2)
-  stopifnot(all(names(gts) == names(alleles)))
-
-  # Remove indels and non het SNPs
-  keeps <- names(which((!vcfR::is.indel(vcf) & vcfR::is_het(as.matrix(gts)))[, 1]))
-  vcf_df <- cbind.data.frame(gts, alleles)[keeps, ] %>%
-    dplyr::mutate(across(where(is.character), as.factor))
-  colnames(vcf_df) <- c("gt", "ref", "alt")
-  vcf_df$snp_id <- rownames(vcf_df)
-
-  pos <- stringr::str_split_fixed(vcf_df$snp_id, pattern = "_", n = 2)
-  vcf_df$chr <- as.factor(pos[, 1])
-  vcf_df$start <- vcf_df$end <- as.integer(pos[, 2])
-
-  # Reordering columns and sorting
-  vcf_df <- vcf_df %>%
-    dplyr::select(snp_id, chr, start, end, ref, alt, gt) %>%
-    dplyr::mutate(chr = factor(chr, levels = chr_reorder(unique(levels(chr))))) %>%
-    dplyr::arrange(chr, start)
-
-  return(vcf_df)
-}
-
-#' Title
+#' Bin SNP Data
 #'
-#' @param snp_granges SNP granges object with GT column
+#' @param snp_sce SCE with snp data
 #' @param binsize Size of bins
 #' @param select_chrs Chromosomes to include
 #' @param bins Optional override of bins
 #'
 #' @return SCE object with phased binned snps
 #' @export
 #'
-bin_snp_data <- function(snp_sce, binsize = 500000, select_chrs = NULL, bins = NULL) {
+bin_snp_data2 <- function(snp_sce, binsize = 500000, select_chrs = NULL, bins = NULL) {
   # THIS FUNCTION NEEDS WORK
   # TODO
   if (is.null(select_chrs)) {