Skip to content

Commit

Permalink
Merge pull request epiforecasts#102 from epiforecasts/master
Browse files Browse the repository at this point in the history
update branch with master
  • Loading branch information
nikosbosse authored Feb 3, 2021
2 parents 7e5880e + 7234536 commit 762163b
Show file tree
Hide file tree
Showing 14 changed files with 1,218 additions and 10 deletions.
5 changes: 5 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Package: scoringutils
Title: Utilities for Scoring and Assessing Predictions
Version: 0.1.7
Language: en-GB
Authors@R: c(
person(given = "Nikos",
family = "Bosse",
Expand All @@ -11,6 +12,10 @@ Authors@R: c(
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-8057-8037")),
person(given = "Johannes Bracher",
role = c("ctb"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-3777-1410")),
person("Joel", "Hellewell",
email = "[email protected]",
role = c("ctb"),
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ export(interval_score)
export(logs)
export(merge_pred_and_obs)
export(mse)
export(pairwise_comparison)
export(pit)
export(pit_df)
export(pit_df_fast)
export(plot_pairwise_comparison)
export(plot_predictions)
export(quantile_bias)
export(quantile_coverage)
Expand Down Expand Up @@ -46,10 +48,12 @@ importFrom(data.table,`%like%`)
importFrom(data.table,`:=`)
importFrom(data.table,as.data.table)
importFrom(data.table,copy)
importFrom(data.table,data.table)
importFrom(data.table,dcast)
importFrom(data.table,melt)
importFrom(data.table,rbindlist)
importFrom(data.table,setDT)
importFrom(data.table,setnames)
importFrom(forcats,fct_relevel)
importFrom(forcats,fct_rev)
importFrom(ggplot2,aes)
Expand Down Expand Up @@ -87,6 +91,11 @@ importFrom(stats,cor)
importFrom(stats,mad)
importFrom(stats,median)
importFrom(stats,na.omit)
importFrom(stats,p.adjust)
importFrom(stats,quantile)
importFrom(stats,rbinom)
importFrom(stats,reorder)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,wilcox.test)
importFrom(utils,combn)
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
## scoringutils 0.1.7
## Feature updates
- added a function, `pairwise_comparison()` that runs pairwise comparisons
between models on the output of `eval_forecasts()`
- added functionality to compute relative skill within `eval_forecasts()`
- added a function to visualise pairwise comparisons

### Package updates
- The WIS definition change introduced in version 0.1.5 was partly corrected
such that the difference in weighting is only introduced when summarising
Expand Down
56 changes: 51 additions & 5 deletions R/eval_forecasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,19 @@
#' @param merge_by character vector with column names that `forecasts` and
#' `truth_data` should be merged on. Default is `NULL` and merge will be
#' attempted automatically.
#' @param compute_relative_skill logical, whether or not to compute realitve
#' performance between models. If `TRUE` (the default), then a column called
#' 'model' must be present in the input data. For more information on
#' the computation of relative skill, see \code{\link{pairwise_comparison}}.
#' Relative skill will be calculated for the aggregation level specified in
#' `summarise_by`.
#' @param rel_skill_metric chracter string with the name of the metric for which
#' a relative skill shall be computed. If equal to 'auto' (the default), then
#' one of interval score, crps or brier score will be used where appropriate
#' @param baseline character string with the name of a model. If a baseline is
#' given, then a scaled relative skill with respect to the baseline will be
#' returned. By default (`NULL`), relative skill will not be scaled with
#' respect to a baseline model.
#'
#' @return A data.table with appropriate scores. For binary predictions,
#' the Brier Score will be returned, for quantile predictions the interval
Expand Down Expand Up @@ -199,7 +212,10 @@ eval_forecasts <- function(data = NULL,
verbose = TRUE,
forecasts = NULL,
truth_data = NULL,
merge_by = NULL) {
merge_by = NULL,
compute_relative_skill = TRUE,
rel_skill_metric = "auto",
baseline = NULL) {


# preparations ---------------------------------------------------------------
Expand All @@ -220,6 +236,36 @@ eval_forecasts <- function(data = NULL,
# do a copy to avoid that the input may be altered in any way.
data <- data.table::as.data.table(data)

# error handling for relative skill computation
# should probably wrap this in a function warn_if_verbose(warning, verbose)
if (compute_relative_skill) {
if (!("model" %in% colnames(data))) {
if (verbose) {
warning("to compute relative skills, there must column present called 'model'. Relative skill will not be computed")
}
compute_relative_skill <- FALSE
}
models <- unique(data$model)
if (length(models) < 2 + (!is.null(baseline))) {
if (verbose) {
warning("you need more than one model non-baseline model to make model comparisons. Relative skill will not be computed")
}
compute_relative_skill <- FALSE
}
if (!is.null(baseline) && !(baseline %in% models)) {
if (verbose){
warning("The baseline you provided for the relative skill is not one of the models in the data. Relative skill will not be computed")
}
compute_relative_skill <- FALSE
}
if (rel_skill_metric != "auto" && !(rel_skill_metric %in% list_of_avail_metrics())) {
if (verbose) {
warning("argument 'rel_skill_metric' must either be 'auto' or one of the metrics that can be computed. Relative skill will not be computed")
}
compute_relative_skill <- FALSE
}
}

# check that everything is unique
unique_data <- unique(data)
if (nrow(unique_data) != nrow(data)) {
Expand Down Expand Up @@ -329,7 +375,10 @@ eval_forecasts <- function(data = NULL,
pit_plots = pit_plots,
interval_score_arguments = interval_score_arguments,
summarised = summarised,
verbose = verbose)
verbose = verbose,
compute_relative_skill = compute_relative_skill,
rel_skill_metric = rel_skill_metric,
baseline = baseline)
return(res)
}

Expand All @@ -349,10 +398,7 @@ eval_forecasts <- function(data = NULL,
summarised = summarised,
verbose = verbose)
return(res)

}


}


Expand Down
19 changes: 16 additions & 3 deletions R/eval_forecasts_quantile.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ eval_forecasts_quantile <- function(data,
pit_plots,
interval_score_arguments,
summarised,
verbose) {
verbose,
compute_relative_skill,
rel_skill_metric,
baseline) {

# make sure data is in the correct format ------------------------------------
# check format
Expand Down Expand Up @@ -135,7 +138,16 @@ eval_forecasts_quantile <- function(data,
}


############################ pairwise comparisons ############################
if (compute_relative_skill) {

relative_res <- add_rel_skill_to_eval_forecasts(unsummarised_scores = res,
rel_skill_metric = rel_skill_metric,
baseline = baseline,
by = by,
summarise_by = summarise_by,
verbose = verbose)
res <- merge(res, relative_res, by = by)
}

# summarise scores if desired ------------------------------------------------
if (summarised) {
Expand Down Expand Up @@ -163,7 +175,7 @@ eval_forecasts_quantile <- function(data,
res <- res[, lapply(.SD, mean, na.rm = TRUE),
by = c(summarise_by),
.SDcols = colnames(res) %like%
"coverage|bias|sharpness|coverage_deviation|interval_score|overprediction|underprediction|aem|ae_point"]
"coverage|bias|sharpness|coverage_deviation|interval_score|overprediction|underprediction|aem|ae_point|relative_skill|scaled_rel_skill"]
}

# if neither quantile nor range are in summarise_by, remove coverage and quantile_coverage
Expand All @@ -173,5 +185,6 @@ eval_forecasts_quantile <- function(data,
if (!("quantile" %in% summarise_by)) {
res[, c("quantile_coverage") := NULL]
}

return(res)
}
Loading

0 comments on commit 762163b

Please sign in to comment.