forked from satijalab/seurat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
FindTransferAnchors.Rd
210 lines (178 loc) · 8.08 KB
/
FindTransferAnchors.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/integration.R
\name{FindTransferAnchors}
\alias{FindTransferAnchors}
\title{Find transfer anchors}
\usage{
FindTransferAnchors(
reference,
query,
normalization.method = "LogNormalize",
recompute.residuals = TRUE,
reference.assay = NULL,
reference.neighbors = NULL,
query.assay = NULL,
reduction = "pcaproject",
reference.reduction = NULL,
project.query = FALSE,
features = NULL,
scale = TRUE,
npcs = 30,
l2.norm = TRUE,
dims = 1:30,
k.anchor = 5,
k.filter = 200,
k.score = 30,
max.features = 200,
nn.method = "annoy",
n.trees = 50,
eps = 0,
approx.pca = TRUE,
mapping.score.k = NULL,
verbose = TRUE
)
}
\arguments{
\item{reference}{\code{\link{Seurat}} object to use as the reference}
\item{query}{\code{\link{Seurat}} object to use as the query}
\item{normalization.method}{Name of normalization method used: LogNormalize
or SCT.}
\item{recompute.residuals}{If using SCT as a normalization method, compute
query Pearson residuals using the reference SCT model parameters.}
\item{reference.assay}{Name of the Assay to use from reference}
\item{reference.neighbors}{Name of the Neighbor to use from the reference.
Optionally enables reuse of precomputed neighbors.}
\item{query.assay}{Name of the Assay to use from query}
\item{reduction}{Dimensional reduction to perform when finding anchors.
Options are:
\itemize{
\item{pcaproject: Project the PCA from the reference onto the query. We
recommend using PCA when reference and query datasets are from scRNA-seq}
\item{lsiproject: Project the LSI from the reference onto the query. We
recommend using LSI when reference and query datasets are from scATAC-seq.
This requires that LSI has been computed for the reference dataset, and the
same features (eg, peaks or genome bins) are present in both the reference
and query. See \code{\link[Signac]{RunTFIDF}} and
\code{\link[Signac]{RunSVD}}}
\item{rpca: Project the PCA from the reference onto the query, and the PCA
from the query onto the reference (reciprocal PCA projection).}
\item{cca: Run a CCA on the reference and query }
}}
\item{reference.reduction}{Name of dimensional reduction to use from the
reference if running the pcaproject workflow. Optionally enables reuse of
precomputed reference dimensional reduction. If NULL (default), use a PCA
computed on the reference object.}
\item{project.query}{Project the PCA from the query dataset onto the
reference. Use only in rare cases where the query dataset has a much larger
cell number, but the reference dataset has a unique assay for transfer. In
this case, the default features will be set to the variable features of the
query object that are alos present in the reference.}
\item{features}{Features to use for dimensional reduction. If not specified,
set as variable features of the reference object which are also present in
the query.}
\item{scale}{Scale query data.}
\item{npcs}{Number of PCs to compute on reference if reference.reduction is
not provided.}
\item{l2.norm}{Perform L2 normalization on the cell embeddings after
dimensional reduction}
\item{dims}{Which dimensions to use from the reduction to specify the
neighbor search space}
\item{k.anchor}{How many neighbors (k) to use when finding anchors}
\item{k.filter}{How many neighbors (k) to use when filtering anchors. Set to
NA to turn off filtering.}
\item{k.score}{How many neighbors (k) to use when scoring anchors}
\item{max.features}{The maximum number of features to use when specifying the
neighborhood search space in the anchor filtering}
\item{nn.method}{Method for nearest neighbor finding. Options include: rann,
annoy}
\item{n.trees}{More trees gives higher precision when using annoy approximate
nearest neighbor search}
\item{eps}{Error bound on the neighbor finding algorithm (from
\code{\link{RANN}} or \code{\link{RcppAnnoy}})}
\item{approx.pca}{Use truncated singular value decomposition to approximate
PCA}
\item{mapping.score.k}{Compute and store nearest k query neighbors in the
AnchorSet object that is returned. You can optionally set this if you plan
on computing the mapping score and want to enable reuse of some downstream
neighbor calculations to make the mapping score function more efficient.}
\item{verbose}{Print progress bars and output}
}
\value{
Returns an \code{AnchorSet} object that can be used as input to
\code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}} and
\code{\link{MapQuery}}. The dimension reduction used for finding anchors is
stored in the \code{AnchorSet} object and can be used for computing anchor
weights in downstream functions. Note that only the requested dimensions are
stored in the dimension reduction object in the \code{AnchorSet}. This means
that if \code{dims=2:20} is used, for example, the dimension of the stored
reduction is \code{1:19}.
}
\description{
Find a set of anchors between a reference and query object. These
anchors can later be used to transfer data from the reference to
query object using the \code{\link{TransferData}} object.
}
\details{
The main steps of this procedure are outlined below. For a more detailed
description of the methodology, please see Stuart, Butler, et al Cell 2019.
\doi{10.1016/j.cell.2019.05.031}; \doi{10.1101/460147}
\itemize{
\item{Perform dimensional reduction. Exactly what is done here depends on
the values set for the \code{reduction} and \code{project.query}
parameters. If \code{reduction = "pcaproject"}, a PCA is performed on
either the reference (if \code{project.query = FALSE}) or the query (if
\code{project.query = TRUE}), using the \code{features} specified. The data
from the other dataset is then projected onto this learned PCA structure.
If \code{reduction = "cca"}, then CCA is performed on the reference and
query for this dimensional reduction step. If
\code{reduction = "lsiproject"}, the stored LSI dimension reduction in the
reference object is used to project the query dataset onto the reference.
If \code{l2.norm} is set to \code{TRUE}, perform L2 normalization of the
embedding vectors.}
\item{Identify anchors between the reference and query - pairs of cells
from each dataset that are contained within each other's neighborhoods
(also known as mutual nearest neighbors).}
\item{Filter low confidence anchors to ensure anchors in the low dimension
space are in broad agreement with the high dimensional measurements. This
is done by looking at the neighbors of each query cell in the reference
dataset using \code{max.features} to define this space. If the reference
cell isn't found within the first \code{k.filter} neighbors, remove the
anchor.}
\item{Assign each remaining anchor a score. For each anchor cell, determine
the nearest \code{k.score} anchors within its own dataset and within its
pair's dataset. Based on these neighborhoods, construct an overall neighbor
graph and then compute the shared neighbor overlap between anchor and query
cells (analogous to an SNN graph). We use the 0.01 and 0.90 quantiles on
these scores to dampen outlier effects and rescale to range between 0-1.}
}
}
\examples{
\dontrun{
# to install the SeuratData package see https://github.com/satijalab/seurat-data
library(SeuratData)
data("pbmc3k")
# for demonstration, split the object into reference and query
pbmc.reference <- pbmc3k[, 1:1350]
pbmc.query <- pbmc3k[, 1351:2700]
# perform standard preprocessing on each object
pbmc.reference <- NormalizeData(pbmc.reference)
pbmc.reference <- FindVariableFeatures(pbmc.reference)
pbmc.reference <- ScaleData(pbmc.reference)
pbmc.query <- NormalizeData(pbmc.query)
pbmc.query <- FindVariableFeatures(pbmc.query)
pbmc.query <- ScaleData(pbmc.query)
# find anchors
anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query)
# transfer labels
predictions <- TransferData(
anchorset = anchors,
refdata = pbmc.reference$seurat_annotations
)
pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions)
}
}
\references{
Stuart T, Butler A, et al. Comprehensive Integration of
Single-Cell Data. Cell. 2019;177:1888-1902 \doi{10.1016/j.cell.2019.05.031};
}
\concept{integration}