Skip to content

Commit

Permalink
update tests and docs, remove varying me function for now
Browse files Browse the repository at this point in the history
  • Loading branch information
be-green committed Oct 14, 2021
1 parent 715f267 commit e94b8df
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 218 deletions.
174 changes: 43 additions & 131 deletions R/marginal_effects.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,13 @@ get_marginal_effects = function(qreg_coeffs,
avgME = matrix(NA, N, p)

nms <- colnames(qreg_vcv_vec)
if(is.null(nms)) {
nms <- rownames(qreg_coeffs)
}
qtls <- stringr::str_extract(nms, "^[0-9\\.]+")
coef_nms <- stringr::str_replace(nms, paste0(qtls, "_"), "")

if(!missing(qreg_vcv_vec)) {
avgME_se = matrix(NA, N, p)
} else {
avgME_se = c()
}
avgME_se = matrix(NA, N, p)

# matrix with transformations of data that give marginal effects;
# ME = R_matrix * qreg_coeffs
Expand All @@ -91,7 +90,7 @@ get_marginal_effects = function(qreg_coeffs,
R_matrix[(j_star+kk):p,(j_star+kk),jj] = avg_spacings[j_star+kk]
}

if(length(covmat_rows) == p) {
if(calc_se == F | length(covmat_rows) == p) {
avgME[jj,] = R_matrix[,,jj] %*% qreg_coeffs[jj,]

# calculating standard errors here.
Expand All @@ -103,6 +102,10 @@ get_marginal_effects = function(qreg_coeffs,
covmat_rows] %*%
matrix(R_matrix[kk,,jj], ncol = 1))
}
} else {
for(kk in 1:p){
avgME_se[jj,kk] = NA
}
}
} else {
avgME[jj,] = NA
Expand All @@ -118,9 +121,8 @@ get_marginal_effects = function(qreg_coeffs,

}

if(calc_se) return(list('avgME' = avgME,
'avgME_se' = avgME_se))
else return(list('avgME' = avgME))
return(list('avgME' = avgME,
'avgME_se' = avgME_se))
}

#' Get marginal effects at a set of levels for the covariates
Expand All @@ -140,8 +142,13 @@ me <- function(fit, X) {
reg_coefs <- t(coef(fit))
spacings <- avg_spacing(X, reg_coefs, alphas, jstar)

if(any(is.na(fit$se$quant_cov))) {
calcSE = F
} else {
calcSE = T
}
me <- get_marginal_effects(reg_coefs, spacings, jstar,
calc_se = T,
calc_se = calcSE,
qreg_vcv_vec = fit$se$quant_cov)

# point estimates
Expand All @@ -155,92 +162,15 @@ me <- function(fit, X) {
me
}

#' Get marginal effects at a set of levels for the covariates
#' @param fit A fitted model from the `qs` function
#' @param type one of "ame" (average marginal effects),
#' "mea" (marginal effects at the average), or "varying" (varying marginal
#' effects over different levels of the data)
#' @param variable variable to calculate marginal effects over
#' @param data optional data.frame that specifies level of data to calculate
#' marginal effects
#' @param size What bin size to use when varying the variable of interest
#' @param trim What to trim the variable of interest at, 0 < trim < 0.5
#' @details This function defaults to using
#' the average defaults to average for all coefficients
#' except variable chosen. If you want to vary a covariate but keep everything
#' else fixed at a certain level, you can specify the data argument.
#' The size argument defaults to a bin size of 1/3 of the standard deviation
#' of the variable, and the trim defaults to using the 95th percentile
#' instead of the max because there may be large outliers. You can over-ride
#' by setting trim to 0, which will use the min and max.
#' @importFrom stats model.matrix
#' @importFrom stats as.formula
me_by_variable <- function(fit, type, variable,
data = NA, size = NA, trim = 0.05) {


if(length(data) == 1 & anyNA(data)) {
X = stats::model.matrix(stats::as.formula(fit$specs$formula),
data = fit$specs$X)

} else {
X <- data
}

if (type == "mea") {
# data <- data.frame(t(colMeans(X)))
# vardata <- mean(X[,variable])
} else if (type == "varying") {
data <- t(colMeans(X))

var <- X[,variable]
if(is.numeric(var)) {
vardata <- bin_along_range(var)
} else {
vardata <- unique(var)
}

data <- as.data.frame(do.call("rbind", lapply(rep(list(data[,-which(colnames(data) == variable)]),
length(vardata)), unlist)))

data[[variable]] <- vardata
data[[variable]] <- vardata

} else if (type == "ame") {
# vardata <- X[,variable]
} else {
stop("Type must be one of:\n",
"\t \"mea\": marginal effects at the average\n",
"\t \"varying\": marginal effects across different levels of variable\n",
"\t \"ame\": average marginal effects\n")
}

var_me <- matrix(NA, nrow = length(vardata), ncol = length(fit$specs$alpha))

if(type == "varying") {

} else {
var_me <- me(fit, X = X)
return(var_me)
}
}

#' Get all marginal effects of variables in the fit
#' @param fit model fitted by `qs()`
#' @param type one of "ame" (average marginal effects),
#' "mea" (marginal effects at the average), or "varying" (varying marginal
#' effects over different levels of the data)
#' @param type one of "ame" (average marginal effects) or
#' "mea" (marginal effects at the average)
#' @param variable which variable to calculate marginal effects on
#' @param data optional data.frame that specifies level of data to calculate
#' marginal effects
#' @param size What bin size to use when varying the variable of interest
#' @param trim What to trim the variable of interest at, 0 < trim < 0.5
#' @details This function defaults to using
#' the average defaults to average for all coefficients
#' except variable chosen. If you want to vary a covariate but keep everything
#' else fixed at a certain level, you can specify the data argument.
#' The size argument defaults to a bin size of 1/3 of the standard deviation
#' of the variable, and the trim defaults to using the 95th percentile
#' @details The trim defaults to using the 95th percentile
#' instead of the max because there may be large outliers. You can over-ride
#' by setting trim to 0, which will use the min and max.
#' By default, marginal effects will calculate marginal effects for
Expand All @@ -251,7 +181,6 @@ marginal_effects <- function(fit,
type = "mea",
variable = "all",
data = NA,
size = NA,
trim = 0.05) {

if(anyNA(data) & length(data) == 1) {
Expand All @@ -261,24 +190,14 @@ marginal_effects <- function(fit,
variable <- setdiff(colnames(data), "(Intercept)")
}

if(type == "varying") {
all_me <- lapply(variable,
function(v) {
me_by_variable(fit, type,
v, data,
size, trim)
})
names(all_me) <- variable
} else {
if(type == "mea") {
data = matrix(colMeans(data), nrow = 1)
}
all_me <- me(fit, X = data)
if(length(variable) > 1 || variable != "all") {
all_me$avgME <- all_me$avgME[which(rownames(all_me$avgME) %in% variable),, drop = F]
all_me$avgME_se <- all_me$avgME_se[which(rownames(all_me$avgME_se) %in% variable),, drop = F]
}
all_me <- list(all_me)

if(type == "mea") {
data = matrix(colMeans(data), nrow = 1)
}
all_me <- me(fit, X = data)
if(length(variable) > 1 || variable != "all") {
all_me$avgME <- all_me$avgME[which(rownames(all_me$avgME) %in% variable),, drop = F]
all_me$avgME_se <- all_me$avgME_se[which(rownames(all_me$avgME_se) %in% variable),, drop = F]
}

attr(all_me, "jstar") <- fit$specs$jstar
Expand All @@ -301,30 +220,23 @@ marginal_effects <- function(fit,
print.qs_me <- function(x, ...) {
type = attr(x, "type")
out_type <- switch(type,
ame = "Average Marginal Effects",
mea = "Marginal Effects at the Average",
varying = "Varying Over Levels")
ame = "Average Marginal Effects:\n",
mea = "Marginal Effects at the Average:\n")
cat(out_type)
for (i in 1:length(x)) {
cat(paste0(names(x)[i],": \n"))
if(type != "varying") {
sub_x = x[[i]]
mat <- matrix(nrow = nrow(sub_x$avgME) + nrow(sub_x$avgME_se),
ncol = ncol(sub_x$avgME))
rn = c()
sub_x_rn = rownames(sub_x$avgME)
for(i in 1:nrow(sub_x$avgME)) {
rn <- c(rn, c(sub_x_rn[i], paste(sub_x_rn[i], "SE")))
mat[2 * i - 1,] <- sub_x$avgME[i,]
mat[2 * i,] <- sub_x$avgME_se[i,]
# cat(paste0(names(x)[i],": \n"))
mat <- matrix(nrow = nrow(x$avgME) + nrow(x$avgME_se),
ncol = ncol(x$avgME))
rn = c()
sub_x_rn = rownames(x$avgME)
for(i in 1:nrow(x$avgME)) {
rn <- c(rn, c(sub_x_rn[i], paste(sub_x_rn[i], "SE")))
mat[2 * i - 1,] <- x$avgME[i,]
mat[2 * i,] <- x$avgME_se[i,]

}
rownames(mat) <- rn
colnames(mat) <- colnames(sub_x)
print(mat, ...)
} else {
print(x[[i]], row.names = F, ...)
}
}
rownames(mat) <- rn
colnames(mat) <- colnames(x$avgME)
print(mat, ...)


}
40 changes: 26 additions & 14 deletions R/play_nice_with_mice.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
#' @rdname mice-helpers
#' @param data data to be interpolated by mice
#' @param quantiles Quantiles to target
#' @param ... other controls to be passed to the [`qs`] function
#' @importFrom stats complete.cases
#' @export
make_penalized_blots <- function(data, ...) {
make_penalized_blots <- function(data,
quantiles = c(0.01, 0.05, 0.1, 0.25, 0.5,
0.75, 0.9, 0.95, 0.99),
...) {

if(!is.data.frame(data)) {
data <- as.data.frame(data)
Expand All @@ -23,10 +27,12 @@ make_penalized_blots <- function(data, ...) {
if(use_qs[[i]]) {
formula <- as.formula(paste0(colnames(data)[i], "~ ."))

fit <- qs(formula, data = data[stats::complete.cases(data),],
fit <- qs(formula, quantiles = quantiles,
data = data[stats::complete.cases(data),],
calc_se = F, algorithm = "lasso", ...)

l[[i]]$control <- qs_control(lambda = unlist(fit$quantreg_fit$lambda))
l[[i]]$control <- qs_control(lambda = unlist(fit$quantreg_fit$lambda),
calc_avg_me = F, calc_r2 = F)
}
}
l
Expand Down Expand Up @@ -76,27 +82,30 @@ make_penalized_blots <- function(data, ...) {
#' @examples
#' \dontrun{
#' library(mice)
#' x <- rnorm(1000)
#' x <- rnorm(10000)
#' x[sample(1:length(x), 100)] <- NA
#' x <- matrix(x, ncol = 10)
#'
#' # default method
#' mice(x, method = "qs", calc_se = F)
#' # get optimal lambdas from CV search based on complete data
#' bl <- make_penalized_blots(x)
#'
#' # lasso penalty w/ specified lambda
#' mice(x, method = "qs", algorithm = "lasso", calc_se = F, control = qs_control(lambda = 0.05))
#' }
#' # pass those to the lasso and get imputations
#' imputations = mice::mice(x, m = 10,
#' defaultMethod = c("qs", "logreg", "polyreg", "polr"),
#' blots = bl, algorithm = "lasso")
#'}
#' @export
#' @importFrom MASS mvrnorm
mice.impute.qs <- function(y, ry, x, wy = NULL,
quantiles = c(0.1, 0.25, 0.5, 0.75, 0.9),
quantiles = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
baseline_quantile = 0.5,
algorithm = "sfn",
algorithm = "two_pass",
tails = "gaussian",
parallel = F,
calc_se = F,
weights = NULL,
control = qs_control(),
control = qs_control(calc_r2 = F,
calc_avg_me = F),
std_err_control = se_control(),
...) {
if (is.null(wy)) {
Expand Down Expand Up @@ -150,7 +159,7 @@ mice.impute.qs <- function(y, ry, x, wy = NULL,
std_err_control = std_err_control,
parallel = parallel,
control = qs_control(control$trunc, control$small, lambda = lambda,
output_quantiles = F, calc_avg_me = F),
output_quantiles = F, calc_avg_me = F, calc_r2 = F),
...)


Expand All @@ -169,6 +178,9 @@ mice.impute.qs <- function(y, ry, x, wy = NULL,
data = x[which(wy),],
jstar = jstar)

rng <- distributional_effects(out, tails = tails, alphas = alpha, parallel = parallel)
suppressWarnings({
rng <- distributional_effects(out, tails = tails, alphas = alpha, parallel = parallel)
})

rng$r(1)
}
2 changes: 1 addition & 1 deletion R/spline_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ cub_root_select <- function(a, b, c, d, q_1, q_2) {
cub_root_select_rconics <- function(a, b, c, d, q_1, q_2) {
#roots_clean <- matrix(Re(roots)[abs(Im(roots)) < 1e-6], ncol = 3, byrow = TRUE)
coeff <- cbind(a,b,c,d)
roots <- apply(cbind(a,b,c,d), 1, RConics::cubic)
roots <- apply(coeff, 1, RConics::cubic)
roots_clean <- matrix(roots, ncol = 3, byrow = TRUE)
roots_clean[abs(Im(roots_clean)) > 1e-6] <- NA
roots_clean <- Re(roots_clean)
Expand Down
23 changes: 4 additions & 19 deletions man/marginal_effects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e94b8df

Please sign in to comment.