forked from satijalab/seurat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spatial.R
320 lines (317 loc) · 9.46 KB
/
spatial.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#' Get cell centroids
#'
#' Calculate the spatial mapping centroids for each cell, based on previously
#' calculated mapping probabilities for each bin.
#'
#' Currently, Seurat assumes that the tissue of interest has an 8x8 bin
#' structure. This will be broadened in a future release.
#'
#' @param object Seurat object
#' @param cells.use Cells to calculate centroids for (default is all cells)
#' @param get.exact Get exact centroid (Default is TRUE). If FALSE, identify
#' the single closest bin.
#'
#' @return Data frame containing the x and y coordinates for each cell
#' centroid.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Note that the PBMC test example object does not contain spatially restricted
#' # examples below are only demonstrate code
#' pmbc_small <- GetCentroids(pbmc_small, [email protected])
#' }
#'
GetCentroids <- function(object, cells.use = NULL, get.exact = TRUE) {
cells.use <- SetIfNull(x = cells.use, default = colnames(x = object@spatial@finalprob))
#Error checking
cell.names <- intersect(x = cells.use, y = colnames(x = object@spatial@finalprob))
if (length(x = cell.names) != length(x = cells.use)) {
print(paste(
"Error",
setdiff(x = cells.use, y = colnames(x = object@spatial@finalprob)),
" have not been mapped"
))
return(0)
}
if (get.exact) {
my.centroids <- data.frame(t(x = sapply(
X = colnames(x = object@data),
FUN = function(x) {
return(ExactCellCentroid(cell.probs = object@spatial@finalprob[, x]))
}
)))
} else {
my.centroids <- data.frame(t(x = sapply(
X = colnames(x = object@data),
FUN = function(x) {
return(CellCentroid(cell.probs = object@spatial@finalprob[, x]))
}
)))
}
colnames(x = my.centroids) <- c("bin.x","bin.y")
return(my.centroids)
}
#' Quantitative refinement of spatial inferences
#'
#' Refines the initial mapping with more complex models that allow gene
#' expression to vary quantitatively across bins (instead of 'on' or 'off'),
#' and that also considers the covariance structure between genes.
#'
#' Full details given in spatial mapping manuscript.
#'
#' @param object Seurat object
#' @param genes.use Genes to use to drive the refinement procedure.
#'
#' @return Seurat object, where mapping probabilities for each bin are stored
#' in object@@final.prob
#'
#' @import fpc
#' @importFrom stats cov
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Note that the PBMC test example object does not contain spatially restricted
#' # examples below are only demonstrate code
#' pmbc_small <- RefinedMapping(pbmc_small, [email protected])
#' }
#'
RefinedMapping <- function(object, genes.use) {
genes.use <- intersect(x = genes.use, y = rownames(x = object@imputed))
cells.max <- t(x = sapply(
X = colnames(object@data),
FUN = function(x) {
return(ExactCellCentroid(object@spatial@finalprob[, x]))
}
))
all.mu <- sapply(
X = genes.use,
FUN = function(gene) {
return(sapply(X = 1:64, FUN = function(bin) {
mean(x = as.numeric(x = object@imputed[
gene, # Row
FetchClosest(
bin = bin,
all.centroids = cells.max,
num.cell = 2*length(x = genes.use)
) # Column
]))
}))
}
)
all.cov <- list()
for (x in 1:64) {
all.cov[[x]] <- cov(
x = t(
x = object@imputed[
genes.use, # Row
FetchClosest(
bin = x,
all.centroids = cells.max,
num.cell = 2*length(x = genes.use)
) # Columns
]
)
)
}
mv.probs <- sapply(
X = colnames(x = object@data),
FUN = function(my.cell) {
return(sapply(X = 1:64, FUN = function(bin) {
return(slimdmvnorm(
x = as.numeric(x = object@imputed[genes.use, my.cell]),
mean = as.numeric(x = all.mu[bin, genes.use]),
sigma = all.cov[[bin]])
)
}))
}
)
mv.final <- exp(
x = sweep(
x = mv.probs,
MARGIN = 2,
STATS = apply(X = mv.probs, MARGIN = 2, FUN = LogAdd)
)
)
object@spatial@finalprob <- data.frame(mv.final)
return(object)
}
#' Infer spatial origins for single cells
#'
#' Probabilistically maps single cells based on (imputed) gene expression
#' estimates, a set of mixture models, and an in situ spatial reference map.
#'
#' @param object Seurat object
#' @param cells.use Which cells to map
#'
#' @return Seurat object, where mapping probabilities for each bin are stored
#' in object@@final.prob
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Note that the PBMC test example object does not contain spatially restricted
#' # examples below are only demonstrate code
#' pmbc_small <- InitialMapping(pbmc_small)
#' }
#'
InitialMapping <- function(object, cells.use = NULL) {
cells.use <- SetIfNull(x = cells.use, default = colnames(x = object@data))
every.prob <- sapply(
X = cells.use,
FUN = function(x) {
return(MapCell(
object = object,
cell.name = x,
do.plot = FALSE,
safe.use = FALSE
))
}
)
object@spatial@finalprob <- data.frame(every.prob)
rownames(x = object@spatial@finalprob) <- paste0("bin.", rownames(x = object@spatial@finalprob))
return(object)
}
#' Build mixture models of gene expression
#'
#' Models the imputed gene expression values as a mixture of gaussian
#' distributions. For a two-state model, estimates the probability that a given
#' cell is in the 'on' or 'off' state for any gene. Followed by a greedy
#' k-means step where cells are allowed to flip states based on the overall
#' structure of the data (see Manuscript for details)
#'
#' @param object Seurat object
#' @param gene Gene to fit
#' @param do.k Number of modes for the mixture model (default is 2)
#' @param num.iter Number of 'greedy k-means' iterations (default is 1)
#' @param do.plot Plot mixture model results
#' @param genes.use Genes to use in the greedy k-means step (See manuscript for details)
#' @param start.pct Initial estimates of the percentage of cells in the 'on'
#' state (usually estimated from the in situ map)
#'
#' @return A Seurat object, where the posterior of each cell being in the 'on'
#' or 'off' state for each gene is stored in object@@spatial@@mix.probs
#'
#' @importFrom graphics hist lines
#' @importFrom mixtools normalmixEM
#' @importFrom stats dnorm sd quantile
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Note that the PBMC test example object does not contain spatially restricted
#' # examples below are only demonstrate code
#' pmbc_small <- FitGeneK(object = pbmc_small, gene = "MS4A1")
#' }
#'
FitGeneK <- function(
object,
gene,
do.k = 2,
num.iter = 1,
do.plot = FALSE,
genes.use = NULL,
start.pct = NULL
) {
data <- object@imputed
data.use <- data[gene, ]
names(x = data.use) <- colnames(x = data.use)
scale.data <- t(x = scale(x = t(x = object@imputed)))
genes.use <- SetIfNull(x = genes.use, default = rownames(x = scale.data))
genes.use <- genes.use[genes.use %in% rownames(x = scale.data)]
scale.data <- scale.data[genes.use, ]
data.cut <- as.numeric(x = data.use[gene, ])
cell.ident <- as.numeric(x = cut(x = data.cut, breaks = do.k))
if (! (is.null(x = start.pct))) {
cell.ident <- rep.int(x = 1, times = length(x = data.cut))
cell.ident[data.cut > quantile(x = data.cut, probs = 1 - start.pct)] <- 2
}
cell.ident <- order(tapply(
X = as.numeric(x = data.use),
INDEX = cell.ident,
FUN = mean
))[cell.ident]
ident.table <- table(cell.ident)
if (num.iter > 0) {
for (i2 in 1:num.iter) {
cell.ident <- iter.k.fit(
scale.data = scale.data,
cell.ident = cell.ident,
data.use = data.use
)
ident.table <- table(cell.ident)
}
}
ident.table <- table(cell.ident)
raw.probs <- t(
x = sapply(
X = data.use,
FUN = function(y) {
return(unlist(
x = lapply(
X = 1:do.k,
FUN = function(x) {
return(
(ident.table[x] / sum(ident.table)) * dnorm(
x = y,
mean = mean(x = as.numeric(x = data.use[cell.ident == x])),
sd = sd(x = as.numeric(x = data.use[cell.ident == x]))
)
)
}
)
))
}
)
)
norm.probs <- raw.probs / apply(X = raw.probs, MARGIN = 1, FUN = sum)
colnames(x = norm.probs) <- unlist(
x = lapply(
X = 1:do.k,
FUN = function(x) {
paste(gene, x - 1, "post", sep=".")
}
)
)
norm.probs <- cbind(norm.probs, cell.ident)
colnames(x = norm.probs)[ncol(x = norm.probs)] <- paste0(gene, ".ident")
new.mix.probs <- data.frame(
SubsetColumn(
data = object@[email protected],
code = paste0(gene, "."),
invert = TRUE
),
row.names = rownames(x = object@[email protected])
)
colnames(x = new.mix.probs)[1] <- "nGene"
object@[email protected] <- cbind(new.mix.probs, norm.probs)
if (do.plot) {
nCol <- 2
num.row <- floor(x = (do.k + 1) / nCol - (1e-5)) + 1
hist(
x = as.numeric(x = data.use),
probability = TRUE,
ylim = c(0, 1),
xlab = gene,
main = gene
)
for (i in 1:do.k) {
lines(
x = seq(from = -10, to = 10, by = 0.01),
y = (ident.table[i] / sum(ident.table)) * dnorm(
x = seq(from = -10, to = 10, by = 0.01),
mean = mean(x = as.numeric(x = data.use[cell.ident == i])),
sd = sd(x = as.numeric(x = data.use[cell.ident == i]))
),
col=i,
lwd=2
)
}
}
return(object)
}