-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinterpolate.R
283 lines (273 loc) · 9.76 KB
/
interpolate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#' Create and run a kriging model grid
#'
#' Construct and run a model grid with all combinations of the different input parameter
#' configurations for the kriging model.
#'
#' @param independent An object of class \code{mobest_spatiotemporalpositions_multi}
#' as created by \link{create_spatpos_multi}. Spatiotemporal input point positions.
#' @param dependent An object of class \code{mobest_observations_multi}
#' as created by \link{create_obs_multi}. Dependent variables that should be interpolated.
#' @param kernel An object of class \code{mobest_kernelsetting_multi}
#' as created by \link{create_kernset_multi}. Kernel parameter settings.
#' @param prediction_grid An object of class \code{mobest_spatiotemporalpositions_multi}
#' as for example created by \link{create_prediction_grid}.
#' Prediction grid positions for the interpolation.
#' @param model_grid An object of class \code{mobest_modelgrid}
#' as created by \link{create_model_grid}.
#' @param unnest Logical. Should the kriging result be unnested to return a
#' prediction point-wise table of class \code{mobest_interpolgrid}?
#' @param quiet Logical. Should a progress indication be printed?
#'
#' @name interpolate
NULL
#' @rdname interpolate
#' @export
create_model_grid <- function(
independent,
dependent,
kernel,
prediction_grid
) {
# input check
checkmate::assert_class(independent, "mobest_spatiotemporalpositions_multi")
checkmate::assert_class(dependent, "mobest_observations_multi")
checkmate::assert_class(kernel, "mobest_kernelsetting_multi")
checkmate::assert_class(prediction_grid, "mobest_spatiotemporalpositions_multi")
check_compatible_multi(dependent, kernel, check_names_x_in_y)
check_compatible_multi(independent, dependent, check_df_nrow_equal)
# fill create general structure and id columns
independent_tables <- tibble::tibble(
independent_table = independent,
independent_table_id = names(independent)
)
dependent_vars <- purrr::map2_dfr(
names(dependent), dependent,
function(dependent_name, one_dependent) {
tibble::tibble(
dependent_setting_id = dependent_name,
dependent_var_id = names(one_dependent),
dependent_var = as.list(one_dependent)
)
}
)
kernel_settings <- purrr::map2_dfr(
names(kernel), kernel,
function(kernel_name, one_kernel) {
tibble::tibble(
kernel_setting_id = kernel_name,
dependent_var_id = names(one_kernel),
kernel_setting = unname(one_kernel[1:length(one_kernel)])
)
}
)
pred_grids <- tibble::tibble(
pred_grid = prediction_grid,
pred_grid_id = names(prediction_grid)
)
# expand grid and create model grid
model_grid <- create_model_grid_raw(
independent_tables = independent_tables,
dependent_vars = dependent_vars,
kernel_settings = kernel_settings,
pred_grids = pred_grids
) %>%
# make subclass of tibble
tibble::new_tibble(., nrow = nrow(.), class = "mobest_modelgrid")
return(model_grid)
}
create_model_grid_raw <- function(independent_tables, dependent_vars, kernel_settings, pred_grids) {
expand.grid(
independent_table_id = unique(independent_tables[["independent_table_id"]]),
dependent_setting_id = unique(dependent_vars[["dependent_setting_id"]]),
dependent_var_id = unique(dependent_vars[["dependent_var_id"]]),
kernel_setting_id = unique(kernel_settings[["kernel_setting_id"]]),
pred_grid_id = unique(pred_grids[["pred_grid_id"]]),
stringsAsFactors = F
) %>%
dplyr::left_join(
independent_tables, by = "independent_table_id"
) %>%
dplyr::left_join(
dependent_vars, by = c("dependent_setting_id", "dependent_var_id")
) %>%
dplyr::left_join(
kernel_settings, by = c("kernel_setting_id", "dependent_var_id")
) %>%
dplyr::left_join(
pred_grids, by = "pred_grid_id"
)
}
#' @rdname interpolate
#' @export
run_model_grid <- function(model_grid, unnest = T, quiet = F) {
# input check
checkmate::assert_class(model_grid, "mobest_modelgrid")
# run interpolation for each entry in the model_grid
if (!quiet) {
message("Running models")
pb <- progress::progress_bar$new(format = "[:bar] :current/:total (:percent)", total = nrow(model_grid))
}
prediction <- purrr::map(
1:nrow(model_grid), function(i) {
if (!quiet) { pb$tick() }
interpolate(
independent = model_grid[["independent_table"]][[i]],
dependent = model_grid[["dependent_var"]][[i]],
pred_grid = model_grid[["pred_grid"]][[i]],
# d has to be squared because of the configuration of the default laGP kernel
d = as.numeric(model_grid[["kernel_setting"]][[i]][c("dsx", "dsy", "dt")])^2,
g = model_grid[["kernel_setting"]][[i]][["g"]],
auto = model_grid[["kernel_setting"]][[i]][["auto"]],
on_residuals = model_grid[["kernel_setting"]][[i]][["on_residuals"]]
)
}
)
# simplify model_grid
if (!quiet) { message("Finishing prediction output") }
model_grid_simplified <- model_grid %>%
dplyr::mutate(
dsx = purrr::map_dbl(model_grid$kernel_setting, purrr::pluck("dsx")),
dsy = purrr::map_dbl(model_grid$kernel_setting, purrr::pluck("dsy")),
dt = purrr::map_dbl(model_grid$kernel_setting, purrr::pluck("dt")),
g = purrr::map_dbl(model_grid$kernel_setting, purrr::pluck("g"))
) %>%
dplyr::select(
-.data[["kernel_setting"]],
-.data[["independent_table"]],
-.data[["dependent_var"]],
-.data[["pred_grid"]]
)
# add prediction results for each run as a data.frame in a list column to model_grid
model_grid_simplified$prediction <- purrr::map2(
prediction, model_grid[["pred_grid"]],
function(x, y) {
pred <- data.frame(
mean = x$mean,
sd = sqrt(x$s2),
stringsAsFactors = F
)
# merge with pred_grid to add relevant spatial information
cbind(y, pred)
}
)
if (unnest) {
model_grid_simplified %>%
tidyr::unnest(cols = "prediction") %>%
# make subclass of tibble
tibble::new_tibble(., nrow = nrow(.), class = "mobest_interpolgrid") %>%
return()
} else {
return(model_grid_simplified)
}
}
#' 3D interpolation with laGP
#'
#' Performs kriging with laGPs gaussian process prediction. See
#' \code{?laGP::predGPsep} for more information.
#'
#' @param independent An object of class mobest_spatiotemporalpositions.
#' Spatiotemporal input point positions
#' @param dependent Numeric vector.
#' Dependent variable that should be interpolated
#' @param pred_grid An object of class mobest_spatiotemporalpositions.
#' Prediction grid positions for the interpolation
#' @param d Numeric vector. Lengthscale parameter.
#' See \code{?laGP::newGP} for more info
#' @param g Numeric. Nugget parameter
#' @param auto Should the lengthscale and nugget values be automatically determined
#' by laGPs maximum likelihood algorithm? See \code{?laGP::mleGPsep} for more info
#' @param on_residuals Should a linear model be wrapped around the kriging model
#' to handle the main trends independently?
#'
#' @noRd
#' @keywords internal
interpolate <- function(independent, dependent, pred_grid, d = NA, g = NA, auto = F, on_residuals = T) {
# check input
checkmate::assert_class(independent, "mobest_spatiotemporalpositions")
checkmate::assert_numeric(dependent, len = nrow(independent))
checkmate::assert_class(pred_grid, "mobest_spatiotemporalpositions")
if (on_residuals) {
# linear fit
combined <- independent %>% dplyr::mutate(d = dependent)
model <- stats::lm(d ~ x + y + z, data = combined)
dependent <- model[["residuals"]]
}
# priors for the global GP
if ((any(is.na(d)) && is.na(g)) || auto) {
da <- laGP::darg(list(mle = TRUE, max = 10), independent)
ga <- laGP::garg(list(mle = TRUE, max = 10), dependent)
d <- da$start
g <- ga$start
}
# fit the global GP
gp <- laGP::newGPsep(X = independent[, c("x", "y", "z")], Z = dependent, d = d, g = g)
# optimise fit automatically
if (auto) {
laGP::mleGPsep(
gpsepi = gp,
param = "both",
tmin = c(da$min, ga$min), tmax = c(da$max, ga$max), ab = c(da$ab, ga$ab),
maxit = 200
)
}
# predictions from the global GP on the prediction
pred <- laGP::predGPsep(gp, XX = pred_grid[, c("x", "y", "z")], lite = T)
# delete GP object
laGP::deleteGPsep(gp)
if (on_residuals) {
# add predictions from linear model again
pred$mean <- pred$mean + stats::predict(model, pred_grid[c("x", "y", "z")])
}
# return result
return(pred)
}
#### helper functions ####
check_compatible_multi <- function(x, y, comp_f, ...) {
purrr::pwalk(
get_permutations(x, y),
function(i1, i2) {
comp_f(
x[[i1]],
y[[i2]],
names(x)[i1],
names(y)[i2],
...
)
}
)
}
check_df_nrow_equal <- function(x, y, name_x, name_y) {
if (nrow(x) != nrow(y)) {
stop(name_x, " and ", name_y, ": Not the same number of lines")
}
}
check_names_x_in_y <- function(x, y, name_x, name_y, ignore_sd_cols = F) {
names_x <- names(x)
names_y <- names(y)
if (ignore_sd_cols) {
names_x <- names_x[!grepl("\\_sd$", names_x)]
names_y <- names_y[!grepl("\\_sd$", names_y)]
}
if (!setequal(intersect(names_x, names_y), names_x)) {
stop(name_x, " and ", name_y, ": First not a subset of the latter")
}
}
check_names_equal <- function(x, y, name_x, name_y, ignore_sd_cols = F) {
names_x <- names(x)
names_y <- names(y)
if (ignore_sd_cols) {
names_x <- names_x[!grepl("\\_sd$", names_x)]
names_y <- names_y[!grepl("\\_sd$", names_y)]
}
if (!setequal(names_x, names_y)) {
stop(name_x, " and ", name_y, ": Not the same names")
}
}
get_permutations <- function(x, y, include.equals = FALSE) {
expand.grid(seq_along(x), seq_along(y)) %>%
as.data.frame() %>%
magrittr::set_names(c("i1", "i2")) %>%
dplyr::filter(
.data[["i1"]] >= .data[["i2"]]
)
}