From 8fc52a5f0a4fc4a8f96371436938a67fe9c811ca Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 6 Mar 2021 01:35:27 -0500 Subject: [PATCH] Add tests --- R/preprocessing.R | 42 +++++++++++++++++++++++++++------- man/ReadMtx.Rd | 6 +++++ tests/testthat/test_read_mtx.R | 32 ++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/test_read_mtx.R diff --git a/R/preprocessing.R b/R/preprocessing.R index b1323e2b9..cfac4118e 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1037,6 +1037,8 @@ Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) { #' @param features Name or remote URL of the features/genes file #' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 #' @param cell.column Specify which column of cells file to use for cell names; default is 1 +#' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names +#' @param skip.feature Number of lines to skip in the features file before beginning to gene names #' @param unique.features Make feature names unique (default TRUE) #' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. #' @@ -1072,6 +1074,8 @@ ReadMtx <- function( features, cell.column = 1, feature.column = 2, + skip.cell = 0, + skip.feature = 0, unique.features = TRUE, strip.suffix = FALSE ) { @@ -1104,13 +1108,15 @@ ReadMtx <- function( file = all.files[['barcode list']], header = FALSE, sep = '\t', - row.names = NULL + row.names = NULL, + skip = skip.cell ) feature.names <- read.table( file = all.files[['feature list']], header = FALSE, sep = '\t', - row.names = NULL + row.names = NULL, + skip = skip.feature ) # read barcodes bcols <- ncol(x = cell.barcodes) @@ -1160,9 +1166,8 @@ ReadMtx <- function( "Some features names are NA in column ", feature.column, ". Try specifiying a different column.", - call. = FALSE, - immediate. = TRUE - ) + call. = FALSE + ) } else { warning( "Some features names are NA in column ", @@ -1170,9 +1175,8 @@ ReadMtx <- function( ". Replacing NA names with ID from column ", replacement.column, ".", - call. = FALSE, - immediate. = TRUE - ) + call. = FALSE + ) } feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column] } @@ -1183,8 +1187,30 @@ ReadMtx <- function( data <- readMM(file = all.files[['expression matrix']]) + if (length(cell.names)!=ncol(data)){ + stop( + "Matrix has ", + ncol(data), + " columns but found ", length(cell.names), + " barcodes. ", + ifelse(test = length(cell.names) > ncol(data), yes = "Try increasing `skip.cell`. ", no = ""), + call. = FALSE + ) + } + if (length(feature.names)!=nrow(data)){ + stop( + "Matrix has ", + ncol(data), + " rows but found ", length(feature.names), + " features. ", + ifelse(test = length(feature.names) > nrow(data), yes = "Try increasing `skip.feature`. ", no = ""), + call. = FALSE + ) + } + colnames(x = data) <- cell.names rownames(x = data) <- feature.names + data <- as(data, Class="dgCMatrix") return(data) } diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index 65c9b8243..c11d20426 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -10,6 +10,8 @@ ReadMtx( features, cell.column = 1, feature.column = 2, + skip.cell = 0, + skip.feature = 0, unique.features = TRUE, strip.suffix = FALSE ) @@ -25,6 +27,10 @@ ReadMtx( \item{feature.column}{Specify which column of features files to use for feature/gene names; default is 2} +\item{skip.cell}{Number of lines to skip in the cells file before beginning to read cell names} + +\item{skip.feature}{Number of lines to skip in the features file before beginning to gene names} + \item{unique.features}{Make feature names unique (default TRUE)} \item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} diff --git a/tests/testthat/test_read_mtx.R b/tests/testthat/test_read_mtx.R new file mode 100644 index 000000000..7c8dc1615 --- /dev/null +++ b/tests/testthat/test_read_mtx.R @@ -0,0 +1,32 @@ +context("ReadMtx") + +mtx <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_matrix.mtx.gz" +features <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_genes.csv.gz" +cells <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_barcodes.csv.gz" +counts1 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1, skip.cell = 1, skip.feature = 1) + + +mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcount%5Fmatrix%2Emtx%2Egz" +cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcell%2Etsv%2Egz" +features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fgene%2Etsv%2Egz" +counts2 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1) + + +test_that("skip.cell and skip.feature work", { + expect_is(counts1, "dgCMatrix") + expect_equal(ncol(counts1), 1436) + expect_equal(nrow(counts1), 29445) + expect_equal(colnames(counts1)[5], "MD5828a_GGGCATCCAATGAAAC-1") + expect_equal(rownames(counts1)[2], "A1BG-AS1") +}) + + +test_that("ReadMtx works", { + expect_is(counts2, "dgCMatrix") + expect_equal(ncol(counts2), 27714) + expect_equal(nrow(counts2), 62046) + expect_equal(colnames(counts2)[1], "Mixture1.Smart-seq2.p2_A4") + expect_equal(rownames(counts2)[2], "hg19_ENSG00000000003_hg19_TSPAN6") +}) + +