From 8fc52a5f0a4fc4a8f96371436938a67fe9c811ca Mon Sep 17 00:00:00 2001
From: Saket Choudhary <saketkc@gmail.com>
Date: Sat, 6 Mar 2021 01:35:27 -0500
Subject: [PATCH] Add tests

---
 R/preprocessing.R              | 42 +++++++++++++++++++++++++++-------
 man/ReadMtx.Rd                 |  6 +++++
 tests/testthat/test_read_mtx.R | 32 ++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 8 deletions(-)
 create mode 100644 tests/testthat/test_read_mtx.R

diff --git a/R/preprocessing.R b/R/preprocessing.R
index b1323e2b9..cfac4118e 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -1037,6 +1037,8 @@ Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) {
 #' @param features Name or remote URL of the features/genes file
 #' @param feature.column Specify which column of features files to use for feature/gene names; default is 2
 #' @param cell.column Specify which column of cells file to use for cell names; default is 1
+#' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names
+#' @param skip.feature Number of lines to skip in the features file before beginning to gene names
 #' @param unique.features Make feature names unique (default TRUE)
 #' @param strip.suffix Remove trailing "-1" if present in all cell barcodes.
 #'
@@ -1072,6 +1074,8 @@ ReadMtx <- function(
   features,
   cell.column = 1,
   feature.column = 2,
+  skip.cell = 0,
+  skip.feature = 0,
   unique.features = TRUE,
   strip.suffix = FALSE
 ) {
@@ -1104,13 +1108,15 @@ ReadMtx <- function(
     file = all.files[['barcode list']],
     header = FALSE,
     sep = '\t',
-    row.names = NULL
+    row.names = NULL,
+    skip = skip.cell
   )
   feature.names <- read.table(
     file = all.files[['feature list']],
     header = FALSE,
     sep = '\t',
-    row.names = NULL
+    row.names = NULL,
+    skip = skip.feature
   )
   # read barcodes
   bcols <- ncol(x = cell.barcodes)
@@ -1160,9 +1166,8 @@ ReadMtx <- function(
         "Some features names are NA in column ",
         feature.column,
         ". Try specifiying a different column.",
-        call. = FALSE,
-        immediate. = TRUE
-      )
+        call. = FALSE
+        )
     } else {
       warning(
         "Some features names are NA in column ",
@@ -1170,9 +1175,8 @@ ReadMtx <- function(
         ". Replacing NA names with ID from column ",
         replacement.column,
         ".",
-        call. = FALSE,
-        immediate. = TRUE
-      )
+        call. = FALSE
+        )
     }
     feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column]
   }
@@ -1183,8 +1187,30 @@ ReadMtx <- function(
 
   data <- readMM(file = all.files[['expression matrix']])
 
+  if (length(cell.names)!=ncol(data)){
+    stop(
+      "Matrix has ",
+      ncol(data),
+      " columns but found ", length(cell.names),
+      " barcodes. ",
+      ifelse(test = length(cell.names) > ncol(data), yes = "Try increasing `skip.cell`. ", no = ""),
+      call. = FALSE
+      )
+  }
+  if (length(feature.names)!=nrow(data)){
+    stop(
+      "Matrix has ",
+      ncol(data),
+      " rows but found ", length(feature.names),
+      " features. ",
+      ifelse(test = length(feature.names) > nrow(data), yes = "Try increasing `skip.feature`. ", no = ""),
+      call. = FALSE
+      )
+  }
+
   colnames(x = data) <- cell.names
   rownames(x = data) <- feature.names
+  data <- as(data, Class="dgCMatrix")
   return(data)
 }
 
diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd
index 65c9b8243..c11d20426 100644
--- a/man/ReadMtx.Rd
+++ b/man/ReadMtx.Rd
@@ -10,6 +10,8 @@ ReadMtx(
   features,
   cell.column = 1,
   feature.column = 2,
+  skip.cell = 0,
+  skip.feature = 0,
   unique.features = TRUE,
   strip.suffix = FALSE
 )
@@ -25,6 +27,10 @@ ReadMtx(
 
 \item{feature.column}{Specify which column of features files to use for feature/gene names; default is 2}
 
+\item{skip.cell}{Number of lines to skip in the cells file before beginning to read cell names}
+
+\item{skip.feature}{Number of lines to skip in the features file before beginning to gene names}
+
 \item{unique.features}{Make feature names unique (default TRUE)}
 
 \item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.}
diff --git a/tests/testthat/test_read_mtx.R b/tests/testthat/test_read_mtx.R
new file mode 100644
index 000000000..7c8dc1615
--- /dev/null
+++ b/tests/testthat/test_read_mtx.R
@@ -0,0 +1,32 @@
+context("ReadMtx")
+
+mtx <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_matrix.mtx.gz"
+features <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_genes.csv.gz"
+cells <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_barcodes.csv.gz"
+counts1 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1, skip.cell = 1, skip.feature = 1)
+
+
+mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcount%5Fmatrix%2Emtx%2Egz"
+cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcell%2Etsv%2Egz"
+features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fgene%2Etsv%2Egz"
+counts2 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1)
+
+
+test_that("skip.cell and skip.feature work", {
+  expect_is(counts1, "dgCMatrix")
+  expect_equal(ncol(counts1), 1436)
+  expect_equal(nrow(counts1), 29445)
+  expect_equal(colnames(counts1)[5], "MD5828a_GGGCATCCAATGAAAC-1")
+  expect_equal(rownames(counts1)[2], "A1BG-AS1")
+})
+
+
+test_that("ReadMtx works", {
+  expect_is(counts2, "dgCMatrix")
+  expect_equal(ncol(counts2), 27714)
+  expect_equal(nrow(counts2), 62046)
+  expect_equal(colnames(counts2)[1], "Mixture1.Smart-seq2.p2_A4")
+  expect_equal(rownames(counts2)[2], "hg19_ENSG00000000003_hg19_TSPAN6")
+})
+
+