Skip to content

Commit

Permalink
Merge branch 'develop' into fix/python-umap
Browse files Browse the repository at this point in the history
  • Loading branch information
mojaveazure committed Sep 17, 2019
2 parents 9915e6f + dd4a48d commit 76951a6
Show file tree
Hide file tree
Showing 10 changed files with 396 additions and 10 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Seurat
Version: 3.1.0.9009
Date: 2019-09-13
Version: 3.1.0.9010
Date: 2019-09-17
Title: Tools for Single Cell Genomics
Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, and Butler A and Satija R (2017) <doi:10.1101/164889> for more details.
Authors@R: c(
Expand Down Expand Up @@ -33,6 +33,7 @@ Imports:
graphics,
grDevices,
grid,
httr,
ica,
igraph,
irlba,
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ export(FindNeighbors)
export(FindTransferAnchors)
export(FindVariableFeatures)
export(FontSize)
export(GeneSymbolThesarus)
export(GetAssay)
export(GetAssayData)
export(GetIntegrationData)
Expand Down Expand Up @@ -302,6 +303,7 @@ export(TopFeatures)
export(TransferData)
export(UMAPPlot)
export(UpdateSeuratObject)
export(UpdateSymbolList)
export(VariableFeaturePlot)
export(VariableFeatures)
export(VizDimLoadings)
Expand Down Expand Up @@ -439,6 +441,11 @@ importFrom(graphics,smoothScatter)
importFrom(grid,grobName)
importFrom(grid,grobTree)
importFrom(grid,unit)
importFrom(httr,GET)
importFrom(httr,accept_json)
importFrom(httr,content)
importFrom(httr,status_code)
importFrom(httr,timeout)
importFrom(ica,icafast)
importFrom(ica,icaimax)
importFrom(ica,icajade)
Expand Down
47 changes: 45 additions & 2 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,51 @@
#' \item{s.genes}{Genes associated with S-phase}
#' \item{g2m.genes}{Genes associated with G2M-phase}
#' }
#' @source http://science.sciencemag.org/content/352/6282/189
#' @source \url{http://science.sciencemag.org/content/352/6282/189}
#'
"cc.genes"

#' Cell cycle genes: 2019 update
#'
#' A list of genes used in cell-cycle regression, updated with 2019 symbols
#'
#' @section Updated symbols:
#' The following symbols were updated from \code{\link{cc.genes}}
#' \describe{
#' \item{s.genes}{
#' \itemize{
#' \item \emph{MCM2}: \emph{MCM7}
#' \item \emph{MLF1IP}: \emph{CENPU}
#' \item \emph{RPA2}: \emph{POLR1B}
#' \item \emph{BRIP1}: \emph{MRPL36}
#' }
#' }
#' \item{g2m.genes}{
#' \itemize{
#' \item \emph{FAM64A}: \emph{PIMREG}
#' \item \emph{HN1}: \emph{JPT1}
#' }
#' }
#' }
#'
#' @format A list of two vectors
#' \describe{
#' \item{s.genes}{Genes associated with S-phase}
#' \item{g2m.genes}{Genes associated with G2M-phase}
#' }
#' @source \url{http://science.sciencemag.org/content/352/6282/189}
#'
#' @seealso \code{\link{cc.genes}}
#'
#' @examples
#' \dontrun{
#' cc.genes.updated.2019 <- cc.genes
#' cc.genes.updated.2019$s.genes <- UpdateSymbolList(symbols = cc.genes.updated.2019$s.genes)
#' cc.genes.updated.2019$g2m.genes <- UpdateSymbolList(symbols = cc.genes.updated.2019$g2m.genes)
#' }
#'
"cc.genes.updated.2019"

#' A small example version of the PBMC dataset
#'
#' A subsetted version of 10X Genomics' 3k PBMC dataset
Expand All @@ -32,5 +74,6 @@
#' \item{version}{Seurat version used to create the object}
#' \item{commands}{Command history}
#' }
#' @source https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k
#' @source \url{https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k}
#'
"pbmc_small"
220 changes: 217 additions & 3 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ NULL
#' @param assay Name of assay to use
#' @param name Name for the expression programs
#' @param seed Set a random seed
#' @param search Search for symbol synonyms for features in \code{features} that
#' don't match features in \code{object}? Searches the HGNC's gene names database;
#' see \code{\link{UpdateSymbolList}} for more details
#' @param ... Extra parameters passed to \code{\link{UpdateSymbolList}}
#'
#' @return Returns a Seurat object with module scores added to object meta data
#'
# @importFrom Hmisc cut2
#' @importFrom ggplot2 cut_number
#' @importFrom Matrix rowMeans colMeans
#'
Expand Down Expand Up @@ -70,7 +73,9 @@ AddModuleScore <- function(
k = FALSE,
assay = NULL,
name = 'Cluster',
seed = 1
seed = 1,
search = TRUE,
...
) {
set.seed(seed = seed)
assay.old <- DefaultAssay(object = object)
Expand All @@ -92,6 +97,48 @@ AddModuleScore <- function(
features <- lapply(
X = features,
FUN = function(x) {
missing.features <- setdiff(x = x, y = rownames(x = object))
if (length(x = missing.features) > 0) {
warning(
"The following features are not present in the object: ",
paste(missing.features, collapse = ", "),
ifelse(
test = search,
yes = ", attempting to find updated synonyms",
no = ", not searching for symbol synonyms"
),
call. = FALSE,
immediate. = TRUE
)
if (search) {
tryCatch(
expr = {
updated.features <- UpdateSymbolList(symbols = missing.features, ...)
names(x = updated.features) <- missing.features
for (miss in names(x = updated.features)) {
index <- which(x == miss)
x[index] <- updated.features[miss]
}
},
error = function(...) {
warning(
"Could not reach HGNC's gene names database",
call. = FALSE,
immediate. = TRUE
)
}
)
missing.features <- setdiff(x = x, y = rownames(x = object))
if (length(x = missing.features) > 0) {
warning(
"The following features are still not present in the object: ",
paste(missing.features, collapse = ", "),
call. = FALSE,
immediate. = TRUE
)
}
}
}
return(intersect(x = x, y = rownames(x = object)))
}
)
Expand Down Expand Up @@ -383,7 +430,6 @@ CellCycleScoring <- function(
set.ident = FALSE,
...
) {
CheckDots(..., fxns = 'AddModuleScore')
name <- 'Cell Cycle'
features <- list('S.Score' = s.features, 'G2M.Score' = g2m.features)
object.cc <- AddModuleScore(
Expand Down Expand Up @@ -802,6 +848,128 @@ ExpVar <- function(x) {
return(log1p(x = var(x = expm1(x = x))))
}

#' Get updated synonyms for gene symbols
#'
#' Find current gene symbols based on old or alias symbols using the gene
#' names database from the HUGO Gene Nomenclature Committee (HGNC)
#'
#' @details For each symbol passed, we query the HGNC gene names database for
#' current symbols that have the provided symbol as either an alias
#' (\code{alias_symbol}) or old (\code{prev_symbol}) symbol. All other queries
#' are \strong{not} supported.
#'
#' @note This function requires internet access
#'
#' @param symbols A vector of gene symbols
#' @param timeout Time to wait before cancelling query in seconds
#' @param several.ok Allow several current gene sybmols for each provided symbol
#' @param verbose Show a progress bar depicting search progress
#' @param ... Extra parameters passed to \code{\link[httr]{GET}}
#'
#' @return For \code{GeneSymbolThesarus}, if \code{several.ok}, a named list
#' where each entry is the current symbol found for each symbol provided and the
#' names are the provided symbols. Otherwise, a named vector with the same information.
#'
#' @source \url{https://www.genenames.org/} \url{http://rest.genenames.org/}
#'
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom httr GET accept_json timeout status_code content
#'
#' @rdname UpdateSymbolList
#' @name UpdateSymbolList
#'
#' @export
#'
#' @seealso \code{\link[httr]{GET}}
#'
#' @examples
#' \dontrun{
#' GeneSybmolThesarus(symbols = c("FAM64A"))
#' }
#'
GeneSymbolThesarus <- function(
symbols,
timeout = 10,
several.ok = FALSE,
verbose = TRUE,
...
) {
db.url <- 'http://rest.genenames.org/fetch'
search.types <- c('alias_symbol', 'prev_symbol')
synonyms <- vector(mode = 'list', length = length(x = symbols))
not.found <- vector(mode = 'logical', length = length(x = symbols))
multiple.found <- vector(mode = 'logical', length = length(x = symbols))
names(x = multiple.found) <- names(x = not.found) <- names(x = synonyms) <- symbols
if (verbose) {
pb <- txtProgressBar(max = length(x = symbols), style = 3, file = stderr())
}
for (symbol in symbols) {
sym.syn <- character()
for (type in search.types) {
response <- GET(
url = paste(db.url, type, symbol, sep = '/'),
config = c(accept_json(), timeout(seconds = timeout)),
...
)
if (!identical(x = status_code(x = response), y = 200L)) {
next
}
response <- content(x = response)
if (response$response$numFound != 1) {
if (response$response$numFound > 1) {
warning(
"Multiple hits found for ",
symbol,
" as ",
type,
", skipping",
call. = FALSE,
immediate. = TRUE
)
}
next
}
sym.syn <- c(sym.syn, response$response$docs[[1]]$symbol)
}
not.found[symbol] <- length(x = sym.syn) < 1
multiple.found[symbol] <- length(x = sym.syn) > 1
if (length(x = sym.syn) == 1 || (length(x = sym.syn) > 1 && several.ok)) {
synonyms[[symbol]] <- sym.syn
}
if (verbose) {
setTxtProgressBar(pb = pb, value = pb$getVal() + 1)
}
}
if (verbose) {
close(con = pb)
}
if (sum(not.found) > 0) {
warning(
"The following symbols had no synonyms: ",
paste(names(x = which(x = not.found)), collapse = ', '),
call. = FALSE,
immediate. = TRUE
)
}
if (sum(multiple.found) > 0) {
msg <- paste(
"The following symbols had multiple synonyms:",
paste(names(x = which(x = multiple.found)), sep = ', ')
)
if (several.ok) {
message(msg)
message("Including anyways")
} else {
warning(msg, call. = FALSE, immediate. = TRUE)
}
}
synonyms <- Filter(f = Negate(f = is.null), x = synonyms)
if (!several.ok) {
synonyms <- unlist(x = synonyms)
}
return(synonyms)
}

#' Calculate the variance to mean ratio of logged values
#'
#' Calculate the variance to mean ratio (VMR) in non-logspace (return answer in
Expand Down Expand Up @@ -988,6 +1156,52 @@ StopCellbrowser <- function() {
}
}

#' @rdname UpdateSymbolList
#'
#' @return For \code{UpdateSymbolList}, \code{symbols} with updated symbols from
#' HGNC's gene names database
#'
#' @export
#'
#' @examples
#' \dontrun{
#' UpdateSymbolList(symbols = cc.genes$s.genes)
#' }
#'
UpdateSymbolList <- function(
symbols,
timeout = 10,
several.ok = FALSE,
verbose = TRUE,
...
) {
new.symbols <- suppressWarnings(expr = GeneSymbolThesarus(
symbols = symbols,
timeout = timeout,
several.ok = several.ok,
verbose = verbose,
...
))
if (length(x = new.symbols) < 1) {
warning("No updated symbols found", call. = FALSE, immediate. = TRUE)
} else {
if (verbose) {
message("Found updated symbols for ", length(x = new.symbols), " symbols")
x <- sapply(X = new.symbols, FUN = paste, collapse = ', ')
message(paste(names(x = x), x, sep = ' -> ', collapse = '\n'))
}
for (sym in names(x = new.symbols)) {
index <- which(x = symbols == sym)
symbols <- append(
x = symbols[-index],
values = new.symbols[[sym]],
after = index - 1
)
}
}
return(symbols)
}

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Methods for Seurat-defined generics
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down
Binary file added data/cc.genes.updated.2019.rda
Binary file not shown.
9 changes: 8 additions & 1 deletion man/AddModuleScore.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 76951a6

Please sign in to comment.