Skip to content

Commit

Permalink
glimpse functions super fast
Browse files Browse the repository at this point in the history
  • Loading branch information
ewenharrison committed Oct 3, 2018
1 parent 8cb4509 commit d1cbd2d
Show file tree
Hide file tree
Showing 81 changed files with 6,093 additions and 648 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: finalfit
Type: Package
Title: Quickly Create Elegant Regression Results Tables and Plots when Modelling
Version: 0.8.7
Version: 0.8.8
Authors@R: c(
person(given = "Ewen", family = "Harrison", role = c("aut", "cre"), email = "[email protected]"),
person(given = "Tom", family = "Drake", role = c("aut")),
Expand Down
5 changes: 1 addition & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ export(dependent_label)
export(extract_fit)
export(extract_labels)
export(extract_variable_label)
export(ff_describe)
export(ff_formula)
export(ff_glimpse)
export(ff_merge)
Expand All @@ -51,6 +50,7 @@ export(lmuni)
export(metrics_hoslem)
export(missing_compare)
export(missing_df)
export(missing_glimpse)
export(missing_pairs)
export(missing_pattern)
export(missing_plot)
Expand Down Expand Up @@ -78,14 +78,11 @@ importFrom(stats,confint)
importFrom(stats,glm)
importFrom(stats,lm)
importFrom(stats,logLik)
importFrom(stats,mad)
importFrom(stats,median)
importFrom(stats,na.omit)
importFrom(stats,pchisq)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,quantile)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,xtabs)
importFrom(survival,Surv)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# finalfit 0.8.8

* `ff_glimpse` re-written to remove `psych` dependency
* `missing_glimpse` added: single data frame describing all variables and missing values

# finalfit 0.8.7

* New vignettes
Expand Down
176 changes: 95 additions & 81 deletions R/ff_glimpse.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#' @param explanatory Optional character vector: name(s) of explanatory
#' variable(s).
#' @param digits Significant digits for continuous variable summaries
#' @param levels_cut Max number of factor levels to include in factor levels
#' summary (in order to avoid the long printing of variables with many
#' factors).
#'
#' @return Dataframe on summary data.
#' @export
Expand All @@ -23,89 +26,100 @@
#' colon_s %>%
#' finalfit_glimpse(dependent, explanatory)

ff_glimpse <- function(.data, dependent=NULL, explanatory=NULL, digits = 1){
if(is.null(dependent) && is.null(explanatory)){
df.in = .data
}else{
keep = names(.data) %in% c(dependent, explanatory)
df.in = .data[keep]
}

# Continuous
df.in %>%
dplyr::select_if(is.numeric) -> df.numeric

if(dim(df.numeric)[2]!=0){
df.numeric %>%
ff_describe(na.rm = TRUE, interp=FALSE, skew = FALSE, ranges = TRUE,
check=TRUE,fast=F, omit=FALSE) %>%
format(digits = digits, scientific=FALSE) %>%
dplyr::select(-vars)-> df.numeric.out1

df.numeric %>%
lapply(function(x){
label = attr(x, "label")
list(label=label)
}) %>%
do.call(rbind, .) -> df.numeric.out2

df.numeric.out = data.frame(df.numeric.out2, df.numeric.out1)

}else{
df.numeric.out = df.numeric
}

# Factors
df.in %>%
dplyr::select_if(Negate(is.numeric)) -> df.factors

if(dim(df.factors)[2]!=0){
df.factors %>%
lapply(function(x){
n = which(!is.na(x)) %>% length()
missing_n = which(is.na(x)) %>% length()
missing_percent = format(missing_n*100/length(x), digits = 2)
label = attr(x, "label")
levels_n = length(levels(x))
levels = ifelse(is.factor(x),
levels(x) %>%
paste0("\"", ., "\"", collapse = ", "),
"-")
levels_count = ifelse(is.factor(x),
summary(x) %>%
paste(collapse = ", "),
"-")
levels_percent = ifelse(is.factor(x),
summary(x) %>%
prop.table() %>%
`*`(100) %>%
format(digits = 2) %>%
paste(collapse=", "),
"-")
list(label=label, n=n, missing_n = missing_n, missing_percent = missing_percent,
level_n=levels_n, levels=levels,
levels_count=levels_count, levels_percent = levels_percent)
}
) %>%
do.call(rbind, .) %>%
data.frame() -> df.factors.out

}else{
df.factors.out = df.factors
}

cat("Numerics\n")
print(df.numeric.out, row.names = TRUE)
cat("\nFactors\n")
print(df.factors.out, row.names = TRUE)

return(invisible(
list(
numerics = df.numeric.out,
factors = df.factors.out))
)
ff_glimpse <- function(.data, dependent=NULL, explanatory=NULL, digits = 1,
levels_cut = 5){
if(is.null(dependent) && is.null(explanatory)){
df.in = .data
}else{
keep = names(.data) %in% c(dependent, explanatory)
df.in = .data[keep]
}

# Continuous
df.in %>%
dplyr::select_if(is.numeric) -> df.numeric

if(dim(df.numeric)[2]!=0){
df.numeric %>%
missing_glimpse(digits=digits) -> df.numeric.out1

df.numeric %>%
purrr::map_df(function(x){
mean = mean(x, na.rm = TRUE)
sd = sd(x, na.rm = TRUE)
min = min(x, na.rm = TRUE)
quartile_25 = quantile(x, probs = 0.25, na.rm = TRUE)
median = median(x, na.rm = TRUE)
quartile_75 = quantile(x, probs = 0.75, na.rm = TRUE)
max = max(x, na.rm = TRUE)
df.out = data.frame(mean, sd, min, quartile_25, median, quartile_75, max) %>%
dplyr::mutate_all(round_tidy, digits=digits)
}) -> df.numeric.out2

df.numeric.out = data.frame(df.numeric.out1, df.numeric.out2)

}else{
df.numeric.out = df.numeric
}

# Factors
df.in %>%
dplyr::select_if(Negate(is.numeric)) -> df.factors

if(dim(df.factors)[2]!=0){

df.factors %>%
missing_glimpse(digits=digits) -> df.factors.out1

fac2char = function(., cut = levels_cut) {
length(levels(.)) > cut
}

df.factors %>%
dplyr::mutate_if(fac2char, as.character) -> df.factors


df.factors %>%
purrr::map_df(function(x){
levels_n = length(levels(as.factor(x)))
levels = ifelse(is.factor(x),
forcats::fct_explicit_na(x) %>%
levels() %>%
paste0("\"", ., "\"", collapse = ", "),
"-")
levels_count = ifelse(is.factor(x),
summary(x) %>%
paste(collapse = ", "),
"-")
levels_percent = ifelse(is.factor(x),
summary(x) %>%
prop.table() %>%
`*`(100) %>%
format(digits = 2) %>%
paste(collapse=", "),
"-")
df.out = dplyr::data_frame(levels_n, levels, levels_count, levels_percent) %>% data.frame()
}) -> df.factors.out2

df.factors.out = data.frame(df.factors.out1, df.factors.out2)

}else{
df.factors.out = df.factors
}

cat("Numerics\n")
print(df.numeric.out, row.names = TRUE)
cat("\nFactors\n")
print(df.factors.out, row.names = TRUE)

return(invisible(
list(
numerics = df.numeric.out,
factors = df.factors.out))
)
}


#' @rdname ff_glimpse
#' @export
finalfit_glimpse <- ff_glimpse
Loading

0 comments on commit d1cbd2d

Please sign in to comment.