update tests and docs, remove varying me function for now

herbps10 · Oct 14, 2021 · e94b8df · e94b8df
1 parent 715f267
commit e94b8df
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 218 deletions.
diff --git a/R/marginal_effects.R b/R/marginal_effects.R
@@ -64,14 +64,13 @@ get_marginal_effects = function(qreg_coeffs,
   avgME = matrix(NA, N, p)
 
   nms <- colnames(qreg_vcv_vec)
+  if(is.null(nms)) {
+    nms <- rownames(qreg_coeffs)
+  }
   qtls <- stringr::str_extract(nms, "^[0-9\\.]+")
   coef_nms <- stringr::str_replace(nms, paste0(qtls, "_"), "")
 
-  if(!missing(qreg_vcv_vec)) {
-    avgME_se = matrix(NA, N, p)
-  } else {
-    avgME_se = c()
-  }
+  avgME_se = matrix(NA, N, p)
 
   # matrix with transformations of data that give marginal effects;
   # ME = R_matrix * qreg_coeffs
@@ -91,7 +90,7 @@ get_marginal_effects = function(qreg_coeffs,
       R_matrix[(j_star+kk):p,(j_star+kk),jj] = avg_spacings[j_star+kk]
     }
 
-    if(length(covmat_rows) == p) {
+    if(calc_se == F | length(covmat_rows) == p) {
       avgME[jj,] = R_matrix[,,jj] %*% qreg_coeffs[jj,]
 
       # calculating standard errors here.
@@ -103,6 +102,10 @@ get_marginal_effects = function(qreg_coeffs,
                                                 covmat_rows] %*%
                                    matrix(R_matrix[kk,,jj], ncol = 1))
         }
+      } else {
+        for(kk in 1:p){
+          avgME_se[jj,kk] = NA
+        }
       }
     } else {
       avgME[jj,] = NA
@@ -118,9 +121,8 @@ get_marginal_effects = function(qreg_coeffs,
 
   }
 
-  if(calc_se) return(list('avgME' = avgME,
-                          'avgME_se' = avgME_se))
-  else return(list('avgME' = avgME))
+    return(list('avgME' = avgME,
+                'avgME_se' = avgME_se))
 }
 
 #' Get marginal effects at a set of levels for the covariates
@@ -140,8 +142,13 @@ me <- function(fit, X) {
   reg_coefs <- t(coef(fit))
   spacings <- avg_spacing(X, reg_coefs, alphas, jstar)
 
+  if(any(is.na(fit$se$quant_cov))) {
+    calcSE = F
+  } else {
+    calcSE = T
+  }
   me <- get_marginal_effects(reg_coefs, spacings, jstar,
-                             calc_se = T,
+                             calc_se = calcSE,
                              qreg_vcv_vec = fit$se$quant_cov)
 
   # point estimates
@@ -155,92 +162,15 @@ me <- function(fit, X) {
   me
 }
 
-#' Get marginal effects at a set of levels for the covariates
-#' @param fit A fitted model from the `qs` function
-#' @param type one of "ame" (average marginal effects),
-#' "mea" (marginal effects at the average), or "varying" (varying marginal
-#' effects over different levels of the data)
-#' @param variable variable to calculate marginal effects over
-#' @param data optional data.frame that specifies level of data to calculate
-#' marginal effects
-#' @param size What bin size to use when varying the variable of interest
-#' @param trim What to trim the variable of interest at, 0 < trim < 0.5
-#' @details This function defaults to using
-#' the average defaults to average for all coefficients
-#' except variable chosen. If you want to vary a covariate but keep everything
-#' else fixed at a certain level, you can specify the data argument.
-#' The size argument defaults to a bin size of 1/3 of the standard deviation
-#' of the variable, and the trim defaults to using the 95th percentile
-#' instead of the max because there may be large outliers. You can over-ride
-#' by setting trim to 0, which will use the min and max.
-#' @importFrom stats model.matrix
-#' @importFrom stats as.formula
-me_by_variable <- function(fit, type, variable,
-                           data = NA, size = NA, trim = 0.05) {
-
-
-  if(length(data) == 1 & anyNA(data)) {
-    X = stats::model.matrix(stats::as.formula(fit$specs$formula),
-                              data = fit$specs$X)
-
-  } else {
-    X <- data
-  }
-
-  if (type == "mea") {
-    # data <- data.frame(t(colMeans(X)))
-    # vardata <- mean(X[,variable])
-  } else if (type == "varying") {
-    data <- t(colMeans(X))
-
-    var <- X[,variable]
-    if(is.numeric(var)) {
-      vardata <- bin_along_range(var)
-    } else {
-      vardata <- unique(var)
-    }
-
-    data <- as.data.frame(do.call("rbind", lapply(rep(list(data[,-which(colnames(data) == variable)]),
-                                                      length(vardata)), unlist)))
-
-    data[[variable]] <- vardata
-    data[[variable]] <- vardata
-
-  } else if (type == "ame") {
-    # vardata <- X[,variable]
-  } else {
-    stop("Type must be one of:\n",
-         "\t \"mea\": marginal effects at the average\n",
-         "\t \"varying\": marginal effects across different levels of variable\n",
-         "\t \"ame\": average marginal effects\n")
-  }
-
-  var_me <- matrix(NA, nrow = length(vardata), ncol = length(fit$specs$alpha))
-
-  if(type == "varying") {
-
-  } else {
-    var_me <- me(fit, X = X)
-    return(var_me)
-  }
-}
-
 #' Get all marginal effects of variables in the fit
 #' @param fit model fitted by `qs()`
-#' @param type one of "ame" (average marginal effects),
-#' "mea" (marginal effects at the average), or "varying" (varying marginal
-#' effects over different levels of the data)
+#' @param type one of "ame" (average marginal effects) or
+#' "mea" (marginal effects at the average)
 #' @param variable which variable to calculate marginal effects on
 #' @param data optional data.frame that specifies level of data to calculate
 #' marginal effects
-#' @param size What bin size to use when varying the variable of interest
 #' @param trim What to trim the variable of interest at, 0 < trim < 0.5
-#' @details This function defaults to using
-#' the average defaults to average for all coefficients
-#' except variable chosen. If you want to vary a covariate but keep everything
-#' else fixed at a certain level, you can specify the data argument.
-#' The size argument defaults to a bin size of 1/3 of the standard deviation
-#' of the variable, and the trim defaults to using the 95th percentile
+#' @details The trim defaults to using the 95th percentile
 #' instead of the max because there may be large outliers. You can over-ride
 #' by setting trim to 0, which will use the min and max.
 #' By default, marginal effects will calculate marginal effects for
@@ -251,7 +181,6 @@ marginal_effects <- function(fit,
                              type = "mea",
                              variable = "all",
                              data = NA,
-                             size = NA,
                              trim = 0.05) {
 
   if(anyNA(data) & length(data) == 1) {
@@ -261,24 +190,14 @@ marginal_effects <- function(fit,
     variable <- setdiff(colnames(data), "(Intercept)")
   }
 
-  if(type == "varying") {
-    all_me <- lapply(variable,
-                     function(v) {
-                       me_by_variable(fit, type,
-                                      v, data,
-                                      size, trim)
-                     })
-    names(all_me) <- variable
-  } else {
-    if(type == "mea") {
-      data = matrix(colMeans(data), nrow = 1)
-    }
-    all_me <- me(fit, X = data)
-    if(length(variable) > 1 || variable != "all") {
-      all_me$avgME <- all_me$avgME[which(rownames(all_me$avgME) %in% variable),, drop = F]
-      all_me$avgME_se <- all_me$avgME_se[which(rownames(all_me$avgME_se) %in% variable),,  drop = F]
-    }
-    all_me <- list(all_me)
+
+  if(type == "mea") {
+    data = matrix(colMeans(data), nrow = 1)
+  }
+  all_me <- me(fit, X = data)
+  if(length(variable) > 1 || variable != "all") {
+    all_me$avgME <- all_me$avgME[which(rownames(all_me$avgME) %in% variable),, drop = F]
+    all_me$avgME_se <- all_me$avgME_se[which(rownames(all_me$avgME_se) %in% variable),,  drop = F]
   }
 
   attr(all_me, "jstar") <- fit$specs$jstar
@@ -301,30 +220,23 @@ marginal_effects <- function(fit,
 print.qs_me <- function(x, ...) {
   type = attr(x, "type")
   out_type <- switch(type,
-                     ame = "Average Marginal Effects",
-                     mea = "Marginal Effects at the Average",
-                     varying = "Varying Over Levels")
+                     ame = "Average Marginal Effects:\n",
+                     mea = "Marginal Effects at the Average:\n")
   cat(out_type)
-  for (i in 1:length(x)) {
-    cat(paste0(names(x)[i],": \n"))
-    if(type != "varying") {
-      sub_x = x[[i]]
-      mat <- matrix(nrow = nrow(sub_x$avgME) + nrow(sub_x$avgME_se),
-                    ncol = ncol(sub_x$avgME))
-      rn = c()
-      sub_x_rn = rownames(sub_x$avgME)
-      for(i in 1:nrow(sub_x$avgME)) {
-          rn <- c(rn, c(sub_x_rn[i], paste(sub_x_rn[i], "SE")))
-          mat[2 * i - 1,] <- sub_x$avgME[i,]
-          mat[2 * i,] <- sub_x$avgME_se[i,]
+  # cat(paste0(names(x)[i],": \n"))
+  mat <- matrix(nrow = nrow(x$avgME) + nrow(x$avgME_se),
+                ncol = ncol(x$avgME))
+  rn = c()
+  sub_x_rn = rownames(x$avgME)
+  for(i in 1:nrow(x$avgME)) {
+      rn <- c(rn, c(sub_x_rn[i], paste(sub_x_rn[i], "SE")))
+      mat[2 * i - 1,] <- x$avgME[i,]
+      mat[2 * i,] <- x$avgME_se[i,]
 
-      }
-      rownames(mat) <- rn
-      colnames(mat) <- colnames(sub_x)
-      print(mat, ...)
-    } else {
-      print(x[[i]], row.names = F, ...)
-    }
   }
+  rownames(mat) <- rn
+  colnames(mat) <- colnames(x$avgME)
+  print(mat, ...)
+
 
 }
diff --git a/R/play_nice_with_mice.R b/R/play_nice_with_mice.R
@@ -1,9 +1,13 @@
 #' @rdname mice-helpers
 #' @param data data to be interpolated by mice
+#' @param quantiles Quantiles to target
 #' @param ... other controls to be passed to the [`qs`] function
 #' @importFrom stats complete.cases
 #' @export
-make_penalized_blots <- function(data, ...) {
+make_penalized_blots <- function(data,
+                                 quantiles = c(0.01, 0.05, 0.1, 0.25, 0.5,
+                                               0.75, 0.9, 0.95, 0.99),
+                                 ...) {
 
   if(!is.data.frame(data)) {
     data <- as.data.frame(data)
@@ -23,10 +27,12 @@ make_penalized_blots <- function(data, ...) {
     if(use_qs[[i]]) {
       formula <- as.formula(paste0(colnames(data)[i], "~ ."))
 
-      fit <- qs(formula, data = data[stats::complete.cases(data),],
+      fit <- qs(formula, quantiles = quantiles,
+                data = data[stats::complete.cases(data),],
                 calc_se = F, algorithm = "lasso", ...)
 
-      l[[i]]$control <- qs_control(lambda = unlist(fit$quantreg_fit$lambda))
+      l[[i]]$control <- qs_control(lambda = unlist(fit$quantreg_fit$lambda),
+                                   calc_avg_me = F, calc_r2 = F)
     }
   }
   l
@@ -76,27 +82,30 @@ make_penalized_blots <- function(data, ...) {
 #' @examples
 #' \dontrun{
 #' library(mice)
-#' x <- rnorm(1000)
+#' x <- rnorm(10000)
 #' x[sample(1:length(x), 100)] <- NA
 #' x <- matrix(x, ncol = 10)
 #'
-#' # default method
-#' mice(x, method = "qs", calc_se = F)
+#' # get optimal lambdas from CV search based on complete data
+#' bl <- make_penalized_blots(x)
 #'
-#' # lasso penalty w/ specified lambda
-#' mice(x, method = "qs", algorithm = "lasso", calc_se = F, control = qs_control(lambda = 0.05))
-#' }
+#' # pass those to the lasso and get imputations
+#' imputations = mice::mice(x, m = 10,
+#'                          defaultMethod = c("qs", "logreg", "polyreg", "polr"),
+#'                          blots = bl, algorithm = "lasso")
+#'}
 #' @export
 #' @importFrom MASS mvrnorm
 mice.impute.qs <- function(y, ry, x, wy = NULL,
-                           quantiles = c(0.1, 0.25, 0.5, 0.75, 0.9),
+                           quantiles = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
                            baseline_quantile = 0.5,
-                           algorithm = "sfn",
+                           algorithm = "two_pass",
                            tails = "gaussian",
                            parallel = F,
                            calc_se = F,
                            weights = NULL,
-                           control = qs_control(),
+                           control = qs_control(calc_r2 = F,
+                                                calc_avg_me = F),
                            std_err_control = se_control(),
                            ...) {
   if (is.null(wy)) {
@@ -150,7 +159,7 @@ mice.impute.qs <- function(y, ry, x, wy = NULL,
       std_err_control = std_err_control,
       parallel = parallel,
       control = qs_control(control$trunc, control$small, lambda = lambda,
-                           output_quantiles = F, calc_avg_me = F),
+                           output_quantiles = F, calc_avg_me = F, calc_r2 = F),
       ...)
 
 
@@ -169,6 +178,9 @@ mice.impute.qs <- function(y, ry, x, wy = NULL,
                                data = x[which(wy),],
                                jstar = jstar)
 
-  rng <- distributional_effects(out, tails = tails, alphas = alpha, parallel = parallel)
+  suppressWarnings({
+    rng <- distributional_effects(out, tails = tails, alphas = alpha, parallel = parallel)
+  })
+
   rng$r(1)
 }
diff --git a/R/spline_helpers.R b/R/spline_helpers.R
@@ -49,7 +49,7 @@ cub_root_select <- function(a, b, c, d, q_1, q_2) {
 cub_root_select_rconics <- function(a, b, c, d, q_1, q_2) {
   #roots_clean <- matrix(Re(roots)[abs(Im(roots)) < 1e-6], ncol = 3, byrow = TRUE)
   coeff <- cbind(a,b,c,d)
-  roots <- apply(cbind(a,b,c,d), 1, RConics::cubic)
+  roots <- apply(coeff, 1, RConics::cubic)
   roots_clean <- matrix(roots, ncol = 3, byrow = TRUE)
   roots_clean[abs(Im(roots_clean)) > 1e-6] <- NA
   roots_clean <- Re(roots_clean)

diff --git a/man/marginal_effects.Rd b/man/marginal_effects.Rd