-
Notifications
You must be signed in to change notification settings - Fork 1
/
control.R
299 lines (273 loc) · 15.5 KB
/
control.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#' @title Control parameters for regression
#' @author Piotr Chlebicki, Maciej Beręsewicz
#' \loadmathjax
#'
#' @description \code{controlMethod} constructs a list with all necessary
#' control parameters for regression fitting in
#' \code{estimatePopsizeFit} and \code{estimatePopsize}.
#'
#' @param epsilon tolerance for fitting algorithms by default \code{1e-8}.
#' @param maxiter maximum number of iterations.
#' @param verbose value indicating whether to trace steps of fitting algorithm for
#' \code{IRLS} fitting method different values of verbose give the following information:
#' \itemize{
#' \item 1 -- Returns information on the number of current
#' iteration and current log-likelihood.
#' \item 2 -- Returns information on vector of regression parameters
#' at current iteration (and all of the above).
#' \item 3 -- Returns information on reduction of log-likelihood
#' at current iteration (and all of the above).
#' \item 4 -- Returns information on value of log-likelihood function gradient
#' at current iteration (and all of the above).
#' \item 5 -- Returns information on convergence criterion and values that are
#' taken into account when considering convergence (and all of the above).
#' }
#' if \code{optim} method was chosen verbose will be passed to [stats::optim()] as trace.
#' @param printEveryN integer value indicating how often to print information
#' specified in \code{verbose}, by default set to \code{1}.
#' @param coefStart,etaStart initial parameters for regression coefficients
#' or linear predictors if \code{NULL}. For \code{IRLS} fitting only \code{etaStart}
#' is needed so if \code{coefStart} is provided it will be converted to \code{etaStart},
#' for \code{optim} fitting \code{coefStart} is necessary and argument \code{etaStart}
#' will be ignored.
#' @param silent logical, indicating whether warnings in \code{IRLS} method should be suppressed.
#' @param optimPass optional list of parameters passed to \code{stats::optim(..., control = optimPass)}
#' if FALSE then list of control parameters will be inferred from other parameters.
#' @param optimMethod method of [stats::optim()] used \code{"Nelder-Mead"} is the default .
#' @param stepsize only for \code{IRLS}, scaling of updates to \code{beta} vector
#' lower value means slower convergence but more accuracy by default 1.
#' In general if fitting algorithm fails lowering this value tends to
#' be most effective at correcting it.
#' @param checkDiagWeights logical value indicating whether to check if diagonal
#' elements of working weights matrixes in \code{IRLS} are sufficiently positive
#' so that these matrixes are positive defined. By default \code{TRUE}.
#' @param weightsEpsilon small number to ensure positive definedness of weights matrixes.
#' Only matters if \code{checkDiagWeights} is set to \code{TRUE}.
#' By default \code{1e-8}.
#' @param momentumFactor experimental parameter in \code{IRLS} only allowing for
#' taking previous step into account at current step, i.e instead of
#' updating regression parameters as:
#' \mjsdeqn{\boldsymbol{\beta}_{(a)} =
#' \boldsymbol{\beta}_{(a-1)} + \text{stepsize} \cdot \text{step}_{(a)}}
#' the update will be made as:
#' \mjsdeqn{
#' \boldsymbol{\beta}_{(a)} = \boldsymbol{\beta}_{(a-1)} + \text{stepsize}
#' \cdot (\text{step}_{(a)} + \text{momentum}\cdot\text{step}_{(a-1)})}
#' @param momentumActivation the value of log-likelihood reduction bellow
#' which momentum will apply.
#' @param criterion criterion used to determine convergence in \code{IRLS},
#' multiple values may be provided. By default \code{c("coef", "abstol")}.
#' @param saveIRLSlogs logical value indicating if information specified in
#' \code{verbose} should be saved to output object, by default \code{FALSE}.
#'
#' @return List with selected parameters, it is also possible to call list directly.
#' @seealso [singleRcapture::estimatePopsize()] [singleRcapture::estimatePopsizeFit()]
#' [singleRcapture::controlModel()] [singleRcapture::controlPopVar()]
#' @export
controlMethod <- function(epsilon = 1e-8,
maxiter = 1000,
verbose = 0,
printEveryN = 1L,
coefStart = NULL,
etaStart = NULL,
optimMethod = "Nelder-Mead",
silent = FALSE,
optimPass = FALSE,
stepsize = 1,
checkDiagWeights = TRUE,
weightsEpsilon = 1e-8,
momentumFactor = 0,
saveIRLSlogs = FALSE,
momentumActivation = 5,
criterion = c("coef",
"abstol",
"reltol")) {
if (!missing(criterion) && all(c("abstol", "reltol") %in% criterion))
stop("Choosing both absolute tolerance and relative tolerance for convergence criterion is not allowed.")
if (!missing(criterion) && all(!(c("coef", "abstol", "reltol") %in% criterion)))
stop("At least one convergence criterion has to be chosen")
#if (isFALSE(is.integer(printEveryN) & printEveryN > 0))
# stop("printEveryN argument is either negative or not an integer")
if (!isTRUE(is.finite(epsilon)) || !isTRUE(0 < epsilon) || isTRUE(length(epsilon) > 1))
stop("Argument epsilon has to be a positive numeric value (of length 1).")
if (!isTRUE(is.finite(maxiter)) || !isTRUE(0 < maxiter) || isTRUE(length(maxiter) > 1))
stop("Argument maxiter has to be a positive numeric value (of length 1).")
if (!isTRUE(is.finite(verbose)) || isTRUE(length(verbose) > 1))
stop("Argument verbose has to be a numeric value (of length 1).")
if (!isTRUE(is.finite(printEveryN)) || !isFALSE(0 > printEveryN) || isTRUE(length(printEveryN) > 1)) {
stop("Argument printEveryN has to be a nonnegative numeric value (of length 1).")
# If it's not an int
printEveryN <- as.integer(printEveryN)
}
if (!is.null(etaStart) && !isTRUE(is.numeric(etaStart)))
stop("Argument etaStart has to be either a numeric vector or NULL.")
if (!is.null(coefStart) && !isTRUE(is.numeric(coefStart)))
stop("Argument coefStart has to be either a numeric vector or NULL.")
if (!isTRUE(is.logical(silent)) || isTRUE(length(silent) > 1))
stop("Argument silent should be logical value (of length 1).")
if (!isTRUE(is.logical(checkDiagWeights)) || isTRUE(length(checkDiagWeights) > 1))
stop("Argument checkDiagWeights should be logical value (of length 1).")
if (!isTRUE(is.logical(saveIRLSlogs)) || isTRUE(length(saveIRLSlogs) > 1))
stop("Argument saveIRLSlogs should be logical value (of length 1).")
if (!isTRUE(is.finite(stepsize)) || !isTRUE(0 < stepsize) || isTRUE(length(stepsize) > 1))
stop("Argument stepsize has to be a positive numeric value (of length 1).")
if (!isTRUE(is.finite(momentumActivation)) || !isTRUE(0 < momentumActivation) ||
isTRUE(length(momentumActivation) > 1))
stop("Argument momentumActivation has to be a positive numeric value (of length 1).")
if (!isTRUE(is.finite(momentumFactor)) || !isFALSE(0 > momentumFactor) ||
isTRUE(length(momentumFactor) > 1))
stop("Argument momentumFactor has to be a non negative numeric value (of length 1).")
list(
epsilon = epsilon,
maxiter = maxiter,
verbose = verbose,
printEveryN = printEveryN,
etaStart = etaStart,
coefStart = coefStart,
optimMethod = optimMethod,
silent = silent,
optimPass = optimPass,
stepsize = stepsize,
checkDiagWeights = checkDiagWeights,
weightsEpsilon = weightsEpsilon,
momentumFactor = momentumFactor,
momentumActivation = momentumActivation,
saveIRLSlogs = saveIRLSlogs,
criterion = if (missing(criterion)) c("coef", "abstol") else criterion
)
}
#' @title Control parameters specific to some models
#' @author Piotr Chlebicki, Maciej Beręsewicz
#'
#' @description \code{controlModel} constructs a list with all necessary
#' control parameters in \code{estimatePopsize} that are either specific to
#' selected model or do not fit anywhere else.
#'
#' Specifying additional formulas should be done by using only right hand side of
#' the formula also for now all variables from additional formulas should also be
#' included in the "main" formula.
#'
#' @param weightsAsCounts boolean value indicating whether to treat \code{weights}
#' argument as number of occurrences for each row in the \code{data} and adjust
#' necessary methods and functionalities, like adjustments in bootstrap or
#' decreasing weights in \code{dfbeta} instead or deleting rows from data,
#' to accommodate this form of model specification.
#' @param omegaFormula formula for inflation parameter in one inflated zero
#' truncated and zero truncated one inflated models.
#' @param alphaFormula formula for dispersion parameter in negative binomial
#' based models.
#' @param piFormula formula for probability parameter in pseudo hurdle zero
#' truncated and zero truncated pseudo hurdle models.
#'
#' @return A list with selected parameters, it is also possible to call list directly.
#' @seealso [singleRcapture::estimatePopsize()] [singleRcapture::controlMethod()] [singleRcapture::controlPopVar()] [singleRcapture::singleRmodels()]
#' @export
controlModel <- function(weightsAsCounts = FALSE,
omegaFormula = ~ 1,
alphaFormula = ~ 1,
piFormula = ~ 1) {
# This is not fully completed yet.
list(
weightsAsCounts = weightsAsCounts,
omegaFormula = omegaFormula,
alphaFormula = alphaFormula,
piFormula = piFormula
)
}
#' @title Control parameters for population size estimation
#' @author Piotr Chlebicki, Maciej Beręsewicz
#' \loadmathjax
#'
#' @description Creating control parameters for population size estimation and
#' respective standard error and variance estimation.
#'
#' @param alpha significance level, 0.05 used by default.
#' @param cores for bootstrap only, number of processor cores to be used,
#' any number greater than 1 activates code designed with \code{doParallel},
#' \code{foreach} and \code{parallel} packages. Note that for now using parallel
#' computing makes tracing impossible so \code{traceBootstrapSize} and
#' \code{bootstrapVisualTrace} parameters are ignored in this case.
#' @param bootType bootstrap type. Default is \code{"parametric"},
#' other possible values are: \code{"semiparametric"} and \code{"nonparametric"}.
#' @param B number of bootstrap samples to be performed (default 500).
#' @param confType type of confidence interval for bootstrap confidence interval,
#' \code{"percentile"} by default.
#' Other possibilities: \code{"studentized"} and \code{"basic"}.
#' @param keepbootStat boolean value indicating whether to keep a vector of
#' statistics produced by bootstrap.
#' @param traceBootstrapSize boolean value indicating whether to print size of
#' bootstrapped sample after truncation for semi- and fully parametric bootstraps.
#' @param bootstrapVisualTrace boolean value indicating whether to plot bootstrap
#' statistics in real time if \code{cores = 1} if \code{cores > 1} it instead
#' indicates whether to make progress bar.
#' @param fittingMethod method used for fitting models from bootstrap samples.
#' @param bootstrapFitcontrol control parameters for each regression works exactly
#' like \code{controlMethod} but for fitting models from bootstrap samples.
#' @param sd indicates how to compute standard deviation of population
#' size estimator either as:
#' \mjsdeqn{\hat{\sigma}=\sqrt{\hat{\text{var}}(\hat{N})}}
#' for \code{sqrt} (which is slightly biased if \mjseqn{\hat{N}}
#' has a normal distribution) or for \code{normalMVUE} as the unbiased
#' minimal variance estimator for normal distribution:
#' \mjsdeqn{\hat{\sigma}=\sqrt{\hat{\text{var}}(\hat{N})}
#' \frac{\Gamma\left(\frac{N_{obs}-1}{2}\right)}{\Gamma\left(\frac{N_{obs}}{2}\right)}
#' \sqrt{\frac{N_{obs}}{2}}}
#' where the ration involving gamma functions is computed by log gamma function.
#' @param covType type of covariance matrix for regression parameters by default
#' observed information matrix.
#'
#' @return A list with selected parameters, it is also possible to call list directly.
#' @seealso [singleRcapture::estimatePopsize()] [singleRcapture::controlModel()] [singleRcapture::controlMethod()]
#' @export
controlPopVar <- function(alpha = .05,
bootType = c("parametric",
"semiparametric",
"nonparametric"),
B = 500,
confType = c("percentilic",
"normal",
"basic"), # TODO: add all
keepbootStat = TRUE,
traceBootstrapSize = FALSE,
bootstrapVisualTrace = FALSE,
fittingMethod = c("optim", "IRLS"),
bootstrapFitcontrol = NULL,
sd = c("sqrtVar", "normalMVUE"),
covType = c("observedInform", "Fisher"),
cores = 1L) {
if (missing(fittingMethod)) fittingMethod <- "IRLS"
if (missing(bootType)) bootType <- "parametric"
if (missing(confType)) confType <- "percentilic"
if (missing(covType)) covType <- "observedInform"
if (missing(sd)) sd <- "sqrtVar"
# I'm using !isTRUE instead of isFALSE because I do not wan't nulls to pass the tests
if (!isTRUE(is.finite(alpha)) || isTRUE(0 > alpha || 1 < alpha) || isTRUE(length(alpha) > 1))
stop("Argument alpha should be a numeric value between 0 and 1 (of length 1).")
if (!isTRUE(sd %in% c("sqrtVar", "normalMVUE")) && !missing(sd))
stop("Argument sd should be a character with value either sqrtVar or normalMVUE.")
if ((!isTRUE(covType %in% c("observedInform", "Fisher")) && !missing(covType)) || isTRUE(length(covType) > 1))
stop("Argument covType should be a character with value either observedInform or Fisher.")
if (!missing(bootType) && !isTRUE(bootType %in% c("parametric",
"semiparametric",
"nonparametric")))
stop("Argument bootType should be a character with value either in c(parametric, semiparametric, nonparametric).")
if (!isTRUE(is.finite(B)) || isTRUE(B < 0) || isTRUE(length(B) > 1))
stop("Argument B should be a numeric value greater than 0 (of length 1).")
if (!isTRUE(all(c(is.logical(traceBootstrapSize), is.logical(bootstrapVisualTrace)))) ||
isTRUE(length(bootstrapVisualTrace) > 1) || isTRUE(length(traceBootstrapSize) > 1))
stop("Arguments traceBootstrapSize and bootstrapVisualTrace should be logical values (of length 1).")
list(
bootstrapVisualTrace = bootstrapVisualTrace,
bootstrapFitcontrol = bootstrapFitcontrol,
traceBootstrapSize = traceBootstrapSize,
fittingMethod = fittingMethod,
keepbootStat = keepbootStat,
bootType = bootType,
confType = confType,
covType = covType,
cores = cores,
alpha = alpha,
B = B,
sd = sd
)
}