From de3196212e55df9c88e9632752c929d18b80fb7e Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 10 Sep 2019 10:57:38 -0400 Subject: [PATCH 1/9] warn for features not present in AddModuleScore --- R/utilities.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index 263c72b30..d7c5e5a52 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -92,6 +92,10 @@ AddModuleScore <- function( features <- lapply( X = features, FUN = function(x) { + missing.features <- setdiff(x = x, y = rownames(x = object)) + if (length(x = missing.features) > 0) { + warning("The following features are not present in the object: ", paste(missing.features, collapse = ", "), call. = F) + } return(intersect(x = x, y = rownames(x = object))) } ) From 3df6e7157aa9c5946682964ee88e0f9a3599b2ac Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 12 Sep 2019 12:31:21 -0400 Subject: [PATCH 2/9] Add updated cell cycle gene symols New symbols present in cc.genes.updated.2019 --- R/data.R | 37 +++++++++++++++++++++++++++-- data/cc.genes.updated.2019.rda | Bin 0 -> 543 bytes man/cc.genes.Rd | 2 +- man/cc.genes.updated.2019.Rd | 42 +++++++++++++++++++++++++++++++++ man/pbmc_small.Rd | 2 +- 5 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 data/cc.genes.updated.2019.rda create mode 100644 man/cc.genes.updated.2019.Rd diff --git a/R/data.R b/R/data.R index da3bf888e..90f7a6217 100644 --- a/R/data.R +++ b/R/data.R @@ -7,9 +7,41 @@ #' \item{s.genes}{Genes associated with S-phase} #' \item{g2m.genes}{Genes associated with G2M-phase} #' } -#' @source http://science.sciencemag.org/content/352/6282/189 +#' @source \url{http://science.sciencemag.org/content/352/6282/189} +#' "cc.genes" +#' Cell cycle genes: 2019 update +#' +#' A list of genes used in cell-cycle regression, updated with 2019 symbols +#' +#' @section Updated symbols: +#' The following symbols were updated from \code{\link{cc.genes}} +#' \describe{ +#' \item{s.genes}{ +#' \itemize{ +#' \item \emph{MLF1IP}: \emph{CENPU} +#' } +#' } +#' \item{g2m.genes}{ +#' \itemize{ +#' \item \emph{FAM64A}: \emph{PICALM} +#' \item \emph{HN1}: \emph{JPT} +#' } +#' } +#' } +#' +#' @format A list of two vectors +#' \describe{ +#' \item{s.genes}{Genes associated with S-phase} +#' \item{g2m.genes}{Genes associated with G2M-phase} +#' } +#' @source \url{http://science.sciencemag.org/content/352/6282/189} +#' +#' @seealso \code{\link{cc.genes}} +#' +"cc.genes.updated.2019" + #' A small example version of the PBMC dataset #' #' A subsetted version of 10X Genomics' 3k PBMC dataset @@ -32,5 +64,6 @@ #' \item{version}{Seurat version used to create the object} #' \item{commands}{Command history} #' } -#' @source https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k +#' @source \url{https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k} +#' "pbmc_small" diff --git a/data/cc.genes.updated.2019.rda b/data/cc.genes.updated.2019.rda new file mode 100644 index 0000000000000000000000000000000000000000..0cdfb579d86d727b3b47930c3b442ca5504c6c47 GIT binary patch literal 543 zcmV+)0^t2ZT4*^jL0KkKSqroIs{jIme}Mn+N&o-}f8am=-rz2SP5=M_Kmpy`U2GVE z6I0avO-HC`%6^bEXaOFYX)pn*dJ_hwjGAexV44j80Ay$kpQ)1+z!MRZ69Nor88To& zB9KOn08KWhl)wQE0il8f_Gg?`n36|1Xo^BU(;}8Cm?SBj%vh?UeFH)<#R^i9zQ-WU z2*v^l!7?K$O;ZKrUIz*qO!lN6@v}2K*O;qR7ruV+`YdL>+KmOr>M4FrfOIO0RnX%&T+ze z90pB0r?gjU6dv}sB7wW41u5>sq>lvNWK}{oDsBB(#Z9G+#=)@iiVcxFAx)z+mE*wz zKwMS_Z+#@`;OyX{PRc+96J3@SMUPV+MJY~JlwgYzdhZ|&8WEiG$vl-@EG#dH1qrme zvAG6$-%1$_GfE1>%nV{vl=v*pkc<{of)iXt#K0eGU(!PP&TV=K3KU2s7MhX`i-bVn zyOnUwdiS~h+AziR7=l%huDKRVmMmzt(rJ!KsR(PDlGCcGbXvt=h8U`?x8>JAQStQi h`s4ZTp|PXp!QJjeAA~yzY_L9yxgwk>NCnya)qp@x@4Ns2 literal 0 HcmV?d00001 diff --git a/man/cc.genes.Rd b/man/cc.genes.Rd index f1947c039..1b07701e8 100644 --- a/man/cc.genes.Rd +++ b/man/cc.genes.Rd @@ -10,7 +10,7 @@ \item{g2m.genes}{Genes associated with G2M-phase} }} \source{ -http://science.sciencemag.org/content/352/6282/189 +\url{http://science.sciencemag.org/content/352/6282/189} } \usage{ cc.genes diff --git a/man/cc.genes.updated.2019.Rd b/man/cc.genes.updated.2019.Rd new file mode 100644 index 000000000..ffaee80ac --- /dev/null +++ b/man/cc.genes.updated.2019.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{cc.genes.updated.2019} +\alias{cc.genes.updated.2019} +\title{Cell cycle genes: 2019 update} +\format{A list of two vectors +\describe{ + \item{s.genes}{Genes associated with S-phase} + \item{g2m.genes}{Genes associated with G2M-phase} +}} +\source{ +\url{http://science.sciencemag.org/content/352/6282/189} +} +\usage{ +cc.genes.updated.2019 +} +\description{ +A list of genes used in cell-cycle regression, updated with 2019 symbols +} +\section{Updated symbols}{ + +The following symbols were updated from \code{\link{cc.genes}} +\describe{ + \item{s.genes}{ + \itemize{ + \item \emph{MLF1IP}: \emph{CENPU} + } + } + \item{g2m.genes}{ + \itemize{ + \item \emph{FAM64A}: \emph{PICALM} + \item \emph{HN1}: \emph{JPT} + } + } +} +} + +\seealso{ +\code{\link{cc.genes}} +} +\keyword{datasets} diff --git a/man/pbmc_small.Rd b/man/pbmc_small.Rd index 8b7a38389..6721bdfed 100644 --- a/man/pbmc_small.Rd +++ b/man/pbmc_small.Rd @@ -23,7 +23,7 @@ \item{commands}{Command history} }} \source{ -https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k +\url{https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k} } \usage{ pbmc_small From 327f718725e7542520c469c031f7c600cd611437 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 13 Sep 2019 12:28:30 -0400 Subject: [PATCH 3/9] Add new gene symbol updater functions GeneSymbolThesarus queries symbols against the HGNC's gene names database for updated symbols UpdateSymbolList modifies a symbol list with updated symbols --- NAMESPACE | 7 ++ R/utilities.R | 168 ++++++++++++++++++++++++++++++++++++++++ man/UpdateSymbolList.Rd | 61 +++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 man/UpdateSymbolList.Rd diff --git a/NAMESPACE b/NAMESPACE index d292182cb..671813cce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -212,6 +212,7 @@ export(FindNeighbors) export(FindTransferAnchors) export(FindVariableFeatures) export(FontSize) +export(GeneSymbolThesarus) export(GetAssay) export(GetAssayData) export(GetIntegrationData) @@ -302,6 +303,7 @@ export(TopFeatures) export(TransferData) export(UMAPPlot) export(UpdateSeuratObject) +export(UpdateSymbolList) export(VariableFeaturePlot) export(VariableFeatures) export(VizDimLoadings) @@ -439,6 +441,11 @@ importFrom(graphics,smoothScatter) importFrom(grid,grobName) importFrom(grid,grobTree) importFrom(grid,unit) +importFrom(httr,GET) +importFrom(httr,accept_json) +importFrom(httr,content) +importFrom(httr,status_code) +importFrom(httr,timeout) importFrom(ica,icafast) importFrom(ica,icaimax) importFrom(ica,icajade) diff --git a/R/utilities.R b/R/utilities.R index d7c5e5a52..e1f79f76c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -806,6 +806,128 @@ ExpVar <- function(x) { return(log1p(x = var(x = expm1(x = x)))) } +#' Get updated synonyms for gene symbols +#' +#' Find current gene symbols based on old or alias symbols using the gene +#' names database from the HUGO Gene Nomenclature Committee (HGNC) +#' +#' @details For each symbol passed, we query the HGNC gene names database for +#' current symbols that have the provided symbol as either an alias +#' (\code{alias_symbol}) or old (\code{prev_symbol}) symbol. All other queries +#' are \strong{not} supported. +#' +#' @note This function requires internet access +#' +#' @param symbols A vector of gene symbols +#' @param timeout Time to wait before cancelling query in seconds +#' @param several.ok Allow several current gene sybmols for each provided symbol +#' @param verbose Show a progress bar depicting search progress +#' @param ... Extra parameters passed to \code{\link[httr]{GET}} +#' +#' @return For \code{GeneSymbolThesarus}, if \code{several.ok}, a named list +#' where each entry is the current symbol found for each symbol provided and the +#' names are the provided symbols. Otherwise, a named vector with the same information. +#' +#' @source \url{https://www.genenames.org/} \url{http://rest.genenames.org/} +#' +#' @importFrom utils txtProgressBar setTxtProgressBar +#' @importFrom httr GET accept_json timeout status_code content +#' +#' @rdname UpdateSymbolList +#' @name UpdateSymbolList +#' +#' @export +#' +#' @seealso \code{\link[httr]{GET}} +#' +#' @examples +#' \dontrun{ +#' GeneSybmolThesarus(symbols = c("FAM64A")) +#' } +#' +GeneSymbolThesarus <- function( + symbols, + timeout = 10, + several.ok = FALSE, + verbose = TRUE, + ... +) { + db.url <- 'http://rest.genenames.org/fetch' + search.types <- c('alias_symbol', 'prev_symbol') + synonyms <- vector(mode = 'list', length = length(x = symbols)) + not.found <- vector(mode = 'logical', length = length(x = symbols)) + multiple.found <- vector(mode = 'logical', length = length(x = symbols)) + names(x = multiple.found) <- names(x = not.found) <- names(x = synonyms) <- symbols + if (verbose) { + pb <- txtProgressBar(max = length(x = symbols), style = 3, file = stderr()) + } + for (symbol in symbols) { + sym.syn <- character() + for (type in search.types) { + response <- GET( + url = paste(db.url, type, symbol, sep = '/'), + config = c(accept_json(), timeout(seconds = timeout)), + ... + ) + if (!identical(x = status_code(x = response), y = 200L)) { + next + } + response <- content(x = response) + if (response$response$numFound != 1) { + if (response$response$numFound > 1) { + warning( + "Multiple hits found for ", + symbol, + " as ", + type, + ", skipping", + call. = FALSE, + immediate. = TRUE + ) + } + next + } + sym.syn <- c(sym.syn, response$response$docs[[1]]$symbol) + } + not.found[symbol] <- length(x = sym.syn) < 1 + multiple.found[symbol] <- length(x = sym.syn) > 1 + if (length(x = sym.syn) == 1 || (length(x = sym.syn) > 1 && several.ok)) { + synonyms[[symbol]] <- sym.syn + } + if (verbose) { + setTxtProgressBar(pb = pb, value = pb$getVal() + 1) + } + } + if (verbose) { + close(con = pb) + } + if (sum(not.found) > 0) { + warning( + "The following symbols had no synonyms: ", + paste(names(x = which(x = not.found)), collapse = ', '), + call. = FALSE, + immediate. = TRUE + ) + } + if (sum(multiple.found) > 0) { + msg <- paste( + "The following symbols had multiple synonyms:", + paste(names(x = which(x = multiple.found)), sep = ', ') + ) + if (several.ok) { + message(msg) + message("Including anyways") + } else { + warning(msg, call. = FALSE, immediate. = TRUE) + } + } + synonyms <- Filter(f = Negate(f = is.null), x = synonyms) + if (!several.ok) { + synonyms <- unlist(x = synonyms) + } + return(synonyms) +} + #' Calculate the variance to mean ratio of logged values #' #' Calculate the variance to mean ratio (VMR) in non-logspace (return answer in @@ -992,6 +1114,52 @@ StopCellbrowser <- function() { } } +#' @rdname UpdateSymbolList +#' +#' @return For \code{UpdateSymbolList}, \code{symbols} with updated symbols from +#' HGNC's gene names database +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' UpdateSymbolList(symbols = cc.genes$s.genes) +#' } +#' +UpdateSymbolList <- function( + symbols, + timeout = 10, + several.ok = FALSE, + verbose = TRUE, + ... +) { + new.symbols <- suppressWarnings(expr = GeneSymbolThesarus( + symbols = symbols, + timeout = timeout, + several.ok = several.ok, + verbose = verbose, + ... + )) + if (length(x = new.symbols) < 1) { + warning("No updated symbols found", call. = FALSE, immediate. = TRUE) + } else { + if (verbose) { + message("Found updated symbols for ", length(x = new.symbols), " symbols") + x <- sapply(X = new.symbols, FUN = paste, collapse = ', ') + message(paste(names(x = x), x, sep = ' -> ', collapse = '\n')) + } + for (sym in names(x = new.symbols)) { + index <- which(x = symbols == sym) + symbols <- append( + x = symbols[-index], + values = new.symbols[[sym]], + after = index - 1 + ) + } + } + return(symbols) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/man/UpdateSymbolList.Rd b/man/UpdateSymbolList.Rd new file mode 100644 index 000000000..58d378599 --- /dev/null +++ b/man/UpdateSymbolList.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{UpdateSymbolList} +\alias{UpdateSymbolList} +\alias{GeneSymbolThesarus} +\title{Get updated synonyms for gene symbols} +\source{ +\url{https://www.genenames.org/} \url{http://rest.genenames.org/} +} +\usage{ +GeneSymbolThesarus(symbols, timeout = 10, several.ok = FALSE, + verbose = TRUE, ...) + +UpdateSymbolList(symbols, timeout = 10, several.ok = FALSE, + verbose = TRUE, ...) +} +\arguments{ +\item{symbols}{A vector of gene symbols} + +\item{timeout}{Time to wait before cancelling query in seconds} + +\item{several.ok}{Allow several current gene sybmols for each provided symbol} + +\item{verbose}{Show a progress bar depicting search progress} + +\item{...}{Extra parameters passed to \code{\link[httr]{GET}}} +} +\value{ +For \code{GeneSymbolThesarus}, if \code{several.ok}, a named list +where each entry is the current symbol found for each symbol provided and the +names are the provided symbols. Otherwise, a named vector with the same information. + +For \code{UpdateSymbolList}, \code{symbols} with updated symbols from +HGNC's gene names database +} +\description{ +Find current gene symbols based on old or alias symbols using the gene +names database from the HUGO Gene Nomenclature Committee (HGNC) +} +\details{ +For each symbol passed, we query the HGNC gene names database for +current symbols that have the provided symbol as either an alias +(\code{alias_symbol}) or old (\code{prev_symbol}) symbol. All other queries +are \strong{not} supported. +} +\note{ +This function requires internet access +} +\examples{ +\dontrun{ +GeneSybmolThesarus(symbols = c("FAM64A")) +} + +\dontrun{ +UpdateSymbolList(symbols = cc.genes$s.genes) +} + +} +\seealso{ +\code{\link[httr]{GET}} +} From d28233a843ede385acdee1cae7e9fb9d70e76078 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 13 Sep 2019 12:31:04 -0400 Subject: [PATCH 4/9] Update cc.genes.updated.2019 using UpdateSymbolList --- R/data.R | 14 ++++++++++++-- data/cc.genes.updated.2019.rda | Bin 543 -> 538 bytes man/cc.genes.updated.2019.Rd | 15 +++++++++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/R/data.R b/R/data.R index 90f7a6217..2a80e6d85 100644 --- a/R/data.R +++ b/R/data.R @@ -20,13 +20,16 @@ #' \describe{ #' \item{s.genes}{ #' \itemize{ +#' \item \emph{MCM2}: \emph{MCM7} #' \item \emph{MLF1IP}: \emph{CENPU} +#' \item \emph{RPA2}: \emph{POLR1B} +#' \item \emph{BRIP1}: \emph{MRPL36} #' } #' } #' \item{g2m.genes}{ #' \itemize{ -#' \item \emph{FAM64A}: \emph{PICALM} -#' \item \emph{HN1}: \emph{JPT} +#' \item \emph{FAM64A}: \emph{PIMREG} +#' \item \emph{HN1}: \emph{JPT1} #' } #' } #' } @@ -40,6 +43,13 @@ #' #' @seealso \code{\link{cc.genes}} #' +#' @examples +#' \dontrun{ +#' cc.genes.updated.2019 <- cc.genes +#' cc.genes.updated.2019$s.genes <- UpdateSymbolList(symbols = cc.genes.updated.2019$s.genes) +#' cc.genes.updated.2019$g2m.genes <- UpdateSymbolList(symbols = cc.genes.updated.2019$g2m.genes) +#' } +#' "cc.genes.updated.2019" #' A small example version of the PBMC dataset diff --git a/data/cc.genes.updated.2019.rda b/data/cc.genes.updated.2019.rda index 0cdfb579d86d727b3b47930c3b442ca5504c6c47..2af1adfaf1b0b80c18dcdd5f2e80917e2e828c5d 100644 GIT binary patch delta 527 zcmV+q0`UEx1eyd9LRx4!F+o`-Q(2CEuz&ypgpm;!e{Hp}VkV7EJyXdVHlWke42=M2 zX`ldVo|*s%(^DkPKpFr6paTE^00E!?0003Zh*A2J^-PTno=rn0h9->~X_2Oy1o@{l zax{`g`m2p4AM!>fi&_#CoSHCOqqZTMY`Gw#JBFw&7&kEyH%UZvLdsgV8Y&uXRKkT! zQi7;Bf4N7hSNRjvxh8F-4R|#K7s`(UNU|P52b^|z>f?rh052pUO(BgHdBG%r@L~ZP z=LJhP{@mQ^&>L!GfAQDRubOs(MTxeZh$vq=Lvfx}0qu|# z98SY;Gyagc2&!mh3Y`V@oE`DrzGE^#Zq+IAZL6 zbsH4pOeV5yka=bzOaba^`U>gW99~2Pit+}Bngig)%J8Be42q*t$(nD3y&b`ZISPVO zK~GEy%f75=Qi-n`>Xad_X_n2oo3d*YL^1<3sb21j_QUU=FFyW#=!*>=>_*?NVm}Hs Ri7vqYF64@Ep&=amVF2iX=Dz>{ delta 532 zcmV+v0_**n1fK*ELRx4!F+o`-Q&|hM`Ktf|f{_syf7@Mb7=aU0)cs9IsA$T5kThrk z9-3(|0jhcv2BwUfX{unF4FCXSXbhjJlN7)c5t9=F3~3oMU_l~~MvVYXHm8)p0Sy76 zf&}(woK={TM>%MULO#{}_ z3K~rIf23h3At7zg=;Bno9Z@7qy0^CUYGjQv22ynr!c!81vQ3JF$|52rv>;6(jS}pD z0u0RnBj(%`jOG7h^~sq>lvNWK}{oDsBB(#Z9G+#=)@iiVcxFAx)z+mE*wzKwMS_Z+#@`;OyX{ zPRc+96J3@SMUPV+MJY~JlwgYzdhZ|&8WEiG$vl-@EG#dH1qrmevAG6$-%1$_GfE1> zddv)BQiNvQ~Hnv&D1sdQS!V1^i~t+(aZKT+}Y^ZMiY?xC@x=E2?W WL?47Z32d-Fi@744C`bj_{MCRO-0B1X diff --git a/man/cc.genes.updated.2019.Rd b/man/cc.genes.updated.2019.Rd index ffaee80ac..def9478ad 100644 --- a/man/cc.genes.updated.2019.Rd +++ b/man/cc.genes.updated.2019.Rd @@ -24,18 +24,29 @@ The following symbols were updated from \code{\link{cc.genes}} \describe{ \item{s.genes}{ \itemize{ + \item \emph{MCM2}: \emph{MCM7} \item \emph{MLF1IP}: \emph{CENPU} + \item \emph{RPA2}: \emph{POLR1B} + \item \emph{BRIP1}: \emph{MRPL36} } } \item{g2m.genes}{ \itemize{ - \item \emph{FAM64A}: \emph{PICALM} - \item \emph{HN1}: \emph{JPT} + \item \emph{FAM64A}: \emph{PIMREG} + \item \emph{HN1}: \emph{JPT1} } } } } +\examples{ +\dontrun{ +cc.genes.updated.2019 <- cc.genes +cc.genes.updated.2019$s.genes <- UpdateSymbolList(symbols = cc.genes.updated.2019$s.genes) +cc.genes.updated.2019$g2m.genes <- UpdateSymbolList(symbols = cc.genes.updated.2019$g2m.genes) +} + +} \seealso{ \code{\link{cc.genes}} } From 1c3c90cfbb53eb62642260eccc8ea3f960c532d9 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 13 Sep 2019 12:52:12 -0400 Subject: [PATCH 5/9] Fix dependency issues --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index f5927dfda..24286ce12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,6 +33,7 @@ Imports: graphics, grDevices, grid, + httr, ica, igraph, irlba, From 95f4cceae7007fe38b788827a6d1c6531d596520 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 13 Sep 2019 14:57:26 -0400 Subject: [PATCH 6/9] Allow AddModuleScore to update feature lists with new gene symbols --- R/utilities.R | 39 ++++++++++++++++++++++++++++++++++++--- man/AddModuleScore.Rd | 4 +++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index e1f79f76c..90e9ac87f 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -22,10 +22,10 @@ NULL #' @param assay Name of assay to use #' @param name Name for the expression programs #' @param seed Set a random seed +#' @param ... Extra parameters passed to \code{\link{UpdateSymbolList}} #' #' @return Returns a Seurat object with module scores added to object meta data #' -# @importFrom Hmisc cut2 #' @importFrom ggplot2 cut_number #' @importFrom Matrix rowMeans colMeans #' @@ -70,7 +70,8 @@ AddModuleScore <- function( k = FALSE, assay = NULL, name = 'Cluster', - seed = 1 + seed = 1, + ... ) { set.seed(seed = seed) assay.old <- DefaultAssay(object = object) @@ -94,7 +95,39 @@ AddModuleScore <- function( FUN = function(x) { missing.features <- setdiff(x = x, y = rownames(x = object)) if (length(x = missing.features) > 0) { - warning("The following features are not present in the object: ", paste(missing.features, collapse = ", "), call. = F) + warning( + "The following features are not present in the object: ", + paste(missing.features, collapse = ", "), + ", attempting to find updated synonyms", + call. = FALSE, + immediate. = TRUE + ) + tryCatch( + expr = { + updated.features <- UpdateSymbolList(symbols = missing.features, ...) + names(x = updated.features) <- missing.features + for (miss in names(x = updated.features)) { + index <- which(x == miss) + x[index] <- updated.features[miss] + } + }, + error = function(...) { + warning( + "Could not reach HGNC's gene names database", + call. = FALSE, + immediate. = TRUE + ) + } + ) + missing.features <- setdiff(x = x, y = rownames(x = object)) + if (length(x = missing.features) > 0) { + warning( + "The following features are still not present in the object: ", + paste(missing.features, collapse = ", "), + call. = FALSE, + immediate. = TRUE + ) + } } return(intersect(x = x, y = rownames(x = object))) } diff --git a/man/AddModuleScore.Rd b/man/AddModuleScore.Rd index f28fc6ad3..2f240bddb 100644 --- a/man/AddModuleScore.Rd +++ b/man/AddModuleScore.Rd @@ -5,7 +5,7 @@ \title{Calculate module scores for feature expression programs in single cells} \usage{ AddModuleScore(object, features, pool = NULL, nbin = 24, ctrl = 100, - k = FALSE, assay = NULL, name = "Cluster", seed = 1) + k = FALSE, assay = NULL, name = "Cluster", seed = 1, ...) } \arguments{ \item{object}{Seurat object} @@ -25,6 +25,8 @@ AddModuleScore(object, features, pool = NULL, nbin = 24, ctrl = 100, \item{name}{Name for the expression programs} \item{seed}{Set a random seed} + +\item{...}{Extra parameters passed to \code{\link{UpdateSymbolList}}} } \value{ Returns a Seurat object with module scores added to object meta data From 471c6089efed546143ef44e5111ed9a041b691e3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 16 Sep 2019 15:54:38 -0400 Subject: [PATCH 7/9] Make searching optional --- R/utilities.R | 50 ++++++++++++++++++++++++++----------------- man/AddModuleScore.Rd | 7 +++++- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 90e9ac87f..c37a73f85 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -22,6 +22,9 @@ NULL #' @param assay Name of assay to use #' @param name Name for the expression programs #' @param seed Set a random seed +#' @param search Search for symbol synonyms for features in \code{features} that +#' don't match features in \code{object}? Searches the HGNC's gene names database; +#' see \code{\link{UpdateSymbolList}} for more details #' @param ... Extra parameters passed to \code{\link{UpdateSymbolList}} #' #' @return Returns a Seurat object with module scores added to object meta data @@ -71,6 +74,7 @@ AddModuleScore <- function( assay = NULL, name = 'Cluster', seed = 1, + search = TRUE, ... ) { set.seed(seed = seed) @@ -98,35 +102,41 @@ AddModuleScore <- function( warning( "The following features are not present in the object: ", paste(missing.features, collapse = ", "), - ", attempting to find updated synonyms", + ifelse( + test = search, + yes = ", attempting to find updated synonyms", + no = "Not searching for symbol synonyms" + ), call. = FALSE, immediate. = TRUE ) - tryCatch( - expr = { - updated.features <- UpdateSymbolList(symbols = missing.features, ...) - names(x = updated.features) <- missing.features - for (miss in names(x = updated.features)) { - index <- which(x == miss) - x[index] <- updated.features[miss] + if (search) { + tryCatch( + expr = { + updated.features <- UpdateSymbolList(symbols = missing.features, ...) + names(x = updated.features) <- missing.features + for (miss in names(x = updated.features)) { + index <- which(x == miss) + x[index] <- updated.features[miss] + } + }, + error = function(...) { + warning( + "Could not reach HGNC's gene names database", + call. = FALSE, + immediate. = TRUE + ) } - }, - error = function(...) { + ) + missing.features <- setdiff(x = x, y = rownames(x = object)) + if (length(x = missing.features) > 0) { warning( - "Could not reach HGNC's gene names database", + "The following features are still not present in the object: ", + paste(missing.features, collapse = ", "), call. = FALSE, immediate. = TRUE ) } - ) - missing.features <- setdiff(x = x, y = rownames(x = object)) - if (length(x = missing.features) > 0) { - warning( - "The following features are still not present in the object: ", - paste(missing.features, collapse = ", "), - call. = FALSE, - immediate. = TRUE - ) } } return(intersect(x = x, y = rownames(x = object))) diff --git a/man/AddModuleScore.Rd b/man/AddModuleScore.Rd index 2f240bddb..440221f5f 100644 --- a/man/AddModuleScore.Rd +++ b/man/AddModuleScore.Rd @@ -5,7 +5,8 @@ \title{Calculate module scores for feature expression programs in single cells} \usage{ AddModuleScore(object, features, pool = NULL, nbin = 24, ctrl = 100, - k = FALSE, assay = NULL, name = "Cluster", seed = 1, ...) + k = FALSE, assay = NULL, name = "Cluster", seed = 1, + search = TRUE, ...) } \arguments{ \item{object}{Seurat object} @@ -26,6 +27,10 @@ AddModuleScore(object, features, pool = NULL, nbin = 24, ctrl = 100, \item{seed}{Set a random seed} +\item{search}{Search for symbol synonyms for features in \code{features} that +don't match features in \code{object}? Searches the HGNC's gene names database; +see \code{\link{UpdateSymbolList}} for more details} + \item{...}{Extra parameters passed to \code{\link{UpdateSymbolList}}} } \value{ From 20f54f2a3bf19ce312a329b1013c395669096e01 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 17 Sep 2019 09:56:52 -0400 Subject: [PATCH 8/9] minor message formatting, remove unnecessary CheckDots --- R/utilities.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index c37a73f85..a9471fc75 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -105,7 +105,7 @@ AddModuleScore <- function( ifelse( test = search, yes = ", attempting to find updated synonyms", - no = "Not searching for symbol synonyms" + no = ", not searching for symbol synonyms" ), call. = FALSE, immediate. = TRUE @@ -430,7 +430,6 @@ CellCycleScoring <- function( set.ident = FALSE, ... ) { - CheckDots(..., fxns = 'AddModuleScore') name <- 'Cell Cycle' features <- list('S.Score' = s.features, 'G2M.Score' = g2m.features) object.cc <- AddModuleScore( From b2b2f5d7c0ac09aa3f2a91ec90ab3c92f9591acf Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 17 Sep 2019 15:19:54 -0400 Subject: [PATCH 9/9] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 24286ce12..d142bf570 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.0.9009 -Date: 2019-09-13 +Version: 3.1.0.9010 +Date: 2019-09-17 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Authors@R: c(