forked from ddsjoberg/gtsummary
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_stat.R
324 lines (310 loc) · 11.1 KB
/
add_stat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#' Add a custom statistic column
#'
#' \lifecycle{maturing}
#' The function allows a user to add a new column (or columns) of statistics to an
#' existing `tbl_summary`, `tbl_svysummary`, or `tbl_continuous` object.
#'
#' @param x `tbl_summary`, `tbl_svysummary`, or `tbl_continuous` object
#' @param fns list of formulas indicating the functions that create the statistic.
#' See details below.
#' @param location list of formulas indicating the location the new statistics
#' are placed. The RHS of the formula must be one of `c("label", "level", "missing")`.
#' When `"label"`, a single statistic
#' is placed on the variable label row. When `"level"` the statistics are placed
#' on the variable level rows. The length of the vector of statistics returned from the
#' `fns` function must match the dimension of levels. Default is to place the
#' new statistics on the label row.
#' @param ... DEPRECATED
#'
#' @section Details:
#'
#' The returns from custom functions passed in `fns=` are required to follow a
#' specified format. Each of these function will execute on a single variable.
#' 1. Each function must return a tibble or a vector. If a vector is returned,
#' it will be converted to a tibble with one column and number of rows equal
#' to the length of the vector.
#' 1. When `location = "label"`, the returned statistic from the custom function
#' must be a tibble with one row. When `location = "level"` the tibble must have
#' the same number of rows as there are levels in the variable (excluding the
#' row for unknown values).
#' 1. Each function may take the following arguments: `foo(data, variable, by, tbl, ...)`
#' - `data=` is the input data frame passed to `tbl_summary()`
#' - `variable=` is a string indicating the variable to perform the calculation on. This is the variable in the label column of the table.
#' - `by=` is a string indicating the by variable from `tbl_summary=`, if present
#' - `tbl=` the original `tbl_summary()`/`tbl_svysummary()` object is also available to utilize
#'
#' The user-defined does not need to utilize each of these inputs. It's
#' encouraged the user-defined function accept `...` as each of the arguments
#' *will* be passed to the function, even if not all inputs are utilized by
#' the user's function, e.g. `foo(data, variable, by, ...)`
#'
#' - Use `modify_header()` to update the column headers
#' - Use `modify_fmt_fun()` to update the functions that format the statistics
#' - Use `modify_footnote()` to add a explanatory footnote
#'
#' If you return a tibble with column names `p.value` or `q.value`, default
#' p-value formatting will be applied, and you may take advantage of subsequent
#' p-value formatting functions, such as `bold_p()` or `add_q()`.
#'
#' To access the continuous variable in a `tbl_continuous()` table, use
#' `tbl$inputs$variable`.
#'
#' @export
#' @seealso Review [list, formula, and selector syntax][syntax] used throughout gtsummary
#' @examples
#' \donttest{
#' library(dplyr, warn.conflicts = FALSE)
#' library(stringr)
#' # Example 1 ----------------------------------
#' # fn returns t-test pvalue
#' my_ttest <- function(data, variable, by, ...) {
#' t.test(data[[variable]] ~ as.factor(data[[by]]))$p.value
#' }
#'
#' add_stat_ex1 <-
#' trial %>%
#' select(trt, age, marker) %>%
#' tbl_summary(by = trt, missing = "no") %>%
#' add_stat(fns = everything() ~ my_ttest) %>%
#' modify_header(
#' list(
#' add_stat_1 ~ "**p-value**",
#' all_stat_cols() ~ "**{level}**"
#' )
#' )
#'
#' # Example 2 ----------------------------------
#' # fn returns t-test test statistic and pvalue
#' my_ttest2 <- function(data, variable, by, ...) {
#' t.test(data[[variable]] ~ as.factor(data[[by]])) %>%
#' broom::tidy() %>%
#' mutate(
#' stat = str_glue("t={style_sigfig(statistic)}, {style_pvalue(p.value, prepend_p = TRUE)}")
#' ) %>%
#' pull(stat)
#' }
#'
#' add_stat_ex2 <-
#' trial %>%
#' select(trt, age, marker) %>%
#' tbl_summary(by = trt, missing = "no") %>%
#' add_stat(fns = everything() ~ my_ttest2) %>%
#' modify_header(add_stat_1 ~ "**Treatment Comparison**")
#'
#' # Example 3 ----------------------------------
#' # return test statistic and p-value is separate columns
#' my_ttest3 <- function(data, variable, by, ...) {
#' t.test(data[[variable]] ~ as.factor(data[[by]])) %>%
#' broom::tidy() %>%
#' select(statistic, p.value)
#' }
#'
#' add_stat_ex3 <-
#' trial %>%
#' select(trt, age, marker) %>%
#' tbl_summary(by = trt, missing = "no") %>%
#' add_stat(fns = everything() ~ my_ttest3) %>%
#' modify_header(
#' list(
#' statistic ~ "**t-statistic**",
#' p.value ~ "**p-value**"
#' )
#' ) %>%
#' modify_fmt_fun(
#' list(
#' statistic ~ style_sigfig,
#' p.value ~ style_pvalue
#' )
#' )
#' }
#' @section Example Output:
#' \if{html}{Example 1}
#'
#' \if{html}{\out{
#' `r man_create_image_tag(file = "add_stat_ex1.png", width = "60")`
#' }}
#'
#' \if{html}{Example 2}
#'
#' \if{html}{\out{
#' `r man_create_image_tag(file = "add_stat_ex2.png", width = "60")`
#' }}
#'
#' \if{html}{Example 3}
#'
#' \if{html}{\out{
#' `r man_create_image_tag(file = "add_stat_ex3.png", width = "60")`
#' }}
add_stat <- function(x, fns, location = NULL, ...) {
updated_call_list <- c(x$call_list, list(add_stat = match.call()))
# checking inputs ------------------------------------------------------------
.assert_class(x, c("tbl_summary", "tbl_svysummary", "tbl_continuous"))
# deprecated arguments -------------------------------------------------------
dots <- rlang::dots_list(...)
dep_args <-
list(
fmt_fun = list("gtsummary::add_stat(fmt_fun=)", "modify_fmt_fun()"),
header = list("gtsummary::add_stat(header=)", "modify_header()"),
footnote = list("gtsummary::add_stat(footnote=)", "modify_footnote()"),
new_col_name = list("gtsummary::add_stat(new_col_name=)", NULL)
)
purrr::iwalk(
dep_args,
function(.x, .y) {
if (!is.null(dots[[.y]])) {
lifecycle::deprecate_stop(when = "1.4.0", what = .x[[1]], with = .x[[2]])
}
}
)
# convert to named lists -----------------------------------------------------
if (rlang::is_string(location)) {
lifecycle::deprecate_stop(
"1.4.0",
"gtsummary::add_stat(location = 'must be a formula list, e.g. `everything() ~ \"label\"`,')"
)
location <- inject(everything() ~ !!location)
}
location <-
.formula_list_to_named_list(
x = location,
data = switch(class(x)[1],
"tbl_summary" = select(x$inputs$data, any_of(x$meta_data$variable)),
"tbl_svysummary" = select(x$inputs$data$variables, any_of(x$meta_data$variable))
),
var_info = x$table_body,
arg_name = "location",
type_check = chuck(type_check, "is_string", "fn"),
type_check_msg = chuck(type_check, "is_string", "msg")
)
imap(
location,
~ switch(!is_string(.x) || !.x %in% c("label", "level", "missing"),
abort("RHS of `location=` formulas must be one of 'label', 'level', or 'missing'")
)
)
fns <-
.formula_list_to_named_list(
x = fns,
data = switch(class(x)[1],
"tbl_summary" = select(x$inputs$data, any_of(x$meta_data$variable)),
"tbl_svysummary" = select(x$inputs$data$variables, any_of(x$meta_data$variable))
),
var_info = x$table_body,
arg_name = "fns",
type_check = chuck(type_check, "is_function", "fn"),
type_check_msg = chuck(type_check, "is_function", "msg")
)
# setting new column name ----------------------------------------------------
stat_col_name <-
select(x$table_body, dplyr::matches("^add_stat_\\d*[1-9]\\d*$")) %>%
names() %>%
length() %>%
{
paste0("add_stat_", . + 1)
}
# calculating statistics -----------------------------------------------------
df_new_stat <-
tibble(variable = names(fns)) %>%
left_join(x$meta_data %>% select("variable", "summary_type"),
by = "variable"
) %>%
mutate(
row_type = map_chr(.data$variable, ~ location[[.x]] %||% "label"),
label = map2(
.data$variable, .data$row_type,
~ filter(x$table_body, .data$variable == .x, .data$row_type == .y)$label
)
) %>%
mutate(
df_add_stats = purrr::imap(fns, ~ eval_fn_safe(tbl = x, variable = .y, fn = .x))
) %>%
select(-"summary_type")
# converting returned statistics to a tibble if not already ------------------
df_new_stat$df_add_stats <-
df_new_stat$df_add_stats %>%
map(~ switch(is.data.frame(.x),
.x
) %||% tibble(!!stat_col_name := .x))
# check dims of calculated statistics ----------------------------------------
purrr::pwalk(
list(df_new_stat$variable, df_new_stat$label, df_new_stat$df_add_stats),
function(variable, label, df_add_stats) {
if (nrow(df_add_stats) != length(label)) {
glue(
"Dimension of '{variable}' and the added statistic do not match. ",
"Expecting statistic/data frame to be length/no. rows {length(label)}."
) %>%
abort()
}
}
)
# check new column names do not exist in `x$table_body`
new_col_names <- bind_rows(df_new_stat$df_add_stats) %>% names()
if (any(new_col_names %in% names(x$table_body))) {
paste(
"Cannot add new column that already exist in gtsummary table:",
"{.field {quoted_list(new_col_names %in% intersect(names(x$table_body)))}}"
) %>%
abort()
}
# merging new columns with `x$table_body` ------------------------------------
x <-
x %>%
modify_table_body(
left_join,
df_new_stat %>% tidyr::unnest(cols = c("label", "df_add_stats")),
by = c("variable", "row_type", "label")
) %>%
# showing all new columns
modify_table_styling(
columns = all_of(new_col_names),
hide = FALSE,
) %>%
# assigning a default fmt_fun
modify_table_styling(
columns = c(where(is.numeric) & all_of(new_col_names)),
fmt_fun = function(x) style_sigfig(x, digits = 3)
) %>%
# if a numeric column is called 'p.value' or 'q.value', giving p-value default formatting
modify_table_styling(
columns = c(where(is.numeric) & any_of(c("p.value", "q.value"))),
fmt_fun = get_theme_element("pkgwide-fn:pvalue_fun", default = style_pvalue)
)
# return tbl_summary object --------------------------------------------------
# fill in the Ns in the header table modify_stat_* columns
x <- .fill_table_header_modify_stats(x)
x$call_list <- updated_call_list
x
}
eval_fn_safe <- function(variable, tbl, fn) {
tryCatch(
withCallingHandlers(
{
# initializing to NA
stat <- NA_real_
stat <- rlang::call2(
fn,
data = tbl$inputs$data,
variable = variable,
by = tbl$inputs$by,
tbl = tbl
) %>%
eval()
},
# printing warning and errors as message
warning = function(w) {
message(glue(
"There was an warning for variable '{variable}':\n ", as.character(w)
))
invokeRestart("muffleWarning")
}
),
error = function(e) {
message(glue(
"There was an error for variable '{variable}':\n", as.character(e)
))
return(NA_real_)
}
)
stat
}