forked from vincentarelbundock/modelsummary
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract.R
164 lines (145 loc) · 6.08 KB
/
extract.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#' Extract and combine estimates and goodness-of-fit statistics from several
#' statistical models.
#'
#' @inheritParams modelsummary
#' @return tibble
#' @examples
#' library(modelsummary)
#' data(trees)
#' models <- list()
#' models[['Bivariate']] <- lm(Girth ~ Height, data = trees)
#' models[['Multivariate']] <- lm(Girth ~ Height + Volume, data = trees)
#' extract(models)
#'
#' @export
extract <- function(models,
statistic = 'std.error',
statistic_override = NULL,
statistic_vertical = TRUE,
conf_level = 0.95,
coef_map = NULL,
coef_omit = NULL,
gof_map = modelsummary::gof_map,
gof_omit = NULL,
add_rows = NULL,
add_rows_location = NULL,
stars = FALSE,
fmt = '%.3f',
exponentiate = FALSE) {
# models must be a list of models or a single model
# TODO: this is code repetition from modelsummary(), but we need it over there
# as well for sanity checks. This doesn't matter at all, but maybe there's
# a more elegant solution.
if (!'list' %in% class(models)) {
models <- list(models)
}
# model names
if (is.null(names(models))) {
model_names <- paste('Model', 1:length(models))
} else {
if (any(names(models) == '')) {
stop('Model names cannot include empty strings. Please make sure that every object in the `models` list has a unique, non-empty name. If the `models` list has no names at all, `modelsummary` will create some automatically.')
}
model_names <- names(models)
}
# if statistics_override is a single function, repeat it in a list to allow map
if (is.function(statistic_override)) {
statistic_override <- rep(list(statistic_override), length(models))
}
# extract and combine estimates
est <- list()
for (i in seq_along(models)) {
est[[i]] <- extract_estimates(model = models[[i]],
fmt = fmt,
statistic = statistic,
statistic_override = statistic_override[[i]],
statistic_vertical = statistic_vertical,
conf_level = conf_level,
stars = stars,
exponentiate = exponentiate)
# coef_map: omit, rename (must be done before join to collapse rows)
if (!is.null(coef_map)) {
est[[i]] <- est[[i]][est[[i]]$term %in% names(coef_map),] # omit (white list)
idx <- match(est[[i]]$term, names(coef_map))
est[[i]]$term <- coef_map[idx] # rename
# defensive programming
if (any(table(est[[i]]$term) > 2)) {
msg <- paste('You are trying to assign the same name to two different variables in model', i)
stop(msg)
}
}
# coef_omit
if (!is.null(coef_omit)) {
est[[i]] <- est[[i]] %>%
dplyr::filter(!stringr::str_detect(term, coef_omit))
}
# set model name
colnames(est[[i]])[3] <- model_names[i]
}
est <- est %>%
purrr::reduce(dplyr::full_join, by = c('term', 'statistic')) %>%
dplyr::mutate(group = 'estimates') %>%
dplyr::select(group, term, statistic, names(.))
# reorder estimates (must be done after join
if (!is.null(coef_map)) {
idx <- match(est[['term']], coef_map)
est <- est[order(idx, est[['statistic']]),]
}
# extract and combine gof
gof <- models %>%
purrr::map(extract_gof, fmt = fmt, gof_map = gof_map) %>%
purrr::reduce(dplyr::full_join, by = 'term') %>%
stats::setNames(c('term', model_names))
# add gof row identifier
gof <- gof %>%
dplyr::mutate(group = 'gof') %>%
dplyr::select(group, term, names(.))
# omit gof using regex
if (!is.null(gof_omit)) {
gof <- gof %>%
dplyr::filter(!stringr::str_detect(term, gof_omit))
}
# gof_map: omit, reorder, rename
gof <- gof[!gof$term %in% gof_map$raw[gof_map$omit],] # omit (black list)
gof_names <- gof_map$clean[match(gof$term, gof_map$raw)] # rename
gof_names[is.na(gof_names)] <- gof$term[is.na(gof_names)]
gof$term <- gof_names
idx <- match(gof$term, gof_map$clean) # reorder
gof <- gof[order(idx, gof$term),]
# add_rows: this needs to be done after sorting and combining to preserve
# user-selected row order
if (!is.null(add_rows)) {
add_rows <- lapply(add_rows, as.character) %>% # TODO: remove once sanity checks are complete
do.call('rbind', .) %>%
data.frame(stringsAsFactors = FALSE) %>%
stats::setNames(c('term', model_names)) %>%
dplyr::mutate(group = 'gof', statistic = '')
add_rows <- add_rows[, colnames(gof)]
# sanity check add_rows_location
checkmate::assert_numeric(add_rows_location, null.ok = TRUE,
max.len = 1, lower = 0, upper = nrow(gof))
if (is.null(add_rows_location)) { # bottom if location is not specified
gof <- dplyr::bind_rows(gof, add_rows)
} else {
if (add_rows_location == 0) { # top if location is 0
gof <- dplyr::bind_rows(add_rows, gof)
} else if (add_rows_location == nrow(gof)) { # bottom if location is nrow(gof)
gof <- dplyr::bind_rows(gof, add_rows)
} else { # middle otherwise
top <- gof[1:add_rows_location,]
bot <- gof[(add_rows_location + 1):nrow(gof),]
gof <- dplyr::bind_rows(top, add_rows, bot)
}
}
}
# combine estimates and gof
if (nrow(gof) > 0) {
tab <- dplyr::bind_rows(est, gof)
} else {
tab <- est
}
# empty cells
tab[is.na(tab)] <- ''
# output
return(tab)
}