Merge pull request epiforecasts#102 from epiforecasts/master

update branch with master
elray1 · Feb 3, 2021 · 762163b · 762163b
2 parents 7e5880e + 7234536
commit 762163b
Show file tree

Hide file tree

Showing 14 changed files with 1,218 additions and 10 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,7 @@
 Package: scoringutils
 Title: Utilities for Scoring and Assessing Predictions
 Version: 0.1.7
+Language: en-GB
 Authors@R: c(
     person(given = "Nikos",
            family = "Bosse",
@@ -11,6 +12,10 @@ Authors@R: c(
            role = c("aut"),
            email = "[email protected]",
            comment = c(ORCID = "0000-0001-8057-8037")), 
+    person(given = "Johannes Bracher",
+           role = c("ctb"),
+           email = "[email protected]",
+           comment = c(ORCID = "0000-0002-3777-1410")), 
     person("Joel", "Hellewell",
            email = "[email protected]", 
            role = c("ctb"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -14,9 +14,11 @@ export(interval_score)
 export(logs)
 export(merge_pred_and_obs)
 export(mse)
+export(pairwise_comparison)
 export(pit)
 export(pit_df)
 export(pit_df_fast)
+export(plot_pairwise_comparison)
 export(plot_predictions)
 export(quantile_bias)
 export(quantile_coverage)
@@ -46,10 +48,12 @@ importFrom(data.table,`%like%`)
 importFrom(data.table,`:=`)
 importFrom(data.table,as.data.table)
 importFrom(data.table,copy)
+importFrom(data.table,data.table)
 importFrom(data.table,dcast)
 importFrom(data.table,melt)
 importFrom(data.table,rbindlist)
 importFrom(data.table,setDT)
+importFrom(data.table,setnames)
 importFrom(forcats,fct_relevel)
 importFrom(forcats,fct_rev)
 importFrom(ggplot2,aes)
@@ -87,6 +91,11 @@ importFrom(stats,cor)
 importFrom(stats,mad)
 importFrom(stats,median)
 importFrom(stats,na.omit)
+importFrom(stats,p.adjust)
 importFrom(stats,quantile)
+importFrom(stats,rbinom)
+importFrom(stats,reorder)
 importFrom(stats,runif)
 importFrom(stats,sd)
+importFrom(stats,wilcox.test)
+importFrom(utils,combn)
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,10 @@
 ## scoringutils 0.1.7
+## Feature updates
+- added a function, `pairwise_comparison()` that runs pairwise comparisons 
+between models on the output of `eval_forecasts()`
+- added functionality to compute relative skill within `eval_forecasts()`
+- added a function to visualise pairwise comparisons
+
 ### Package updates
 - The WIS definition change introduced in version 0.1.5 was partly corrected
 such that the difference in weighting is only introduced when summarising 

diff --git a/R/eval_forecasts.R b/R/eval_forecasts.R
@@ -117,6 +117,19 @@
 #' @param merge_by character vector with column names that `forecasts` and
 #' `truth_data` should be merged on. Default is `NULL` and merge will be
 #' attempted automatically.
+#' @param compute_relative_skill logical, whether or not to compute realitve
+#' performance between models. If `TRUE` (the default), then a column called
+#' 'model' must be present in the input data. For more information on
+#' the computation of relative skill, see \code{\link{pairwise_comparison}}.
+#' Relative skill will be calculated for the aggregation level specified in
+#' `summarise_by`.
+#' @param rel_skill_metric chracter string with the name of the metric for which
+#' a relative skill shall be computed. If equal to 'auto' (the default), then
+#' one of interval score, crps or brier score will be used where appropriate
+#' @param baseline character string with the name of a model. If a baseline is
+#' given, then a scaled relative skill with respect to the baseline will be
+#' returned. By default (`NULL`), relative skill will not be scaled with
+#' respect to a baseline model.
 #'
 #' @return A data.table with appropriate scores. For binary predictions,
 #' the Brier Score will be returned, for quantile predictions the interval
@@ -199,7 +212,10 @@ eval_forecasts <- function(data = NULL,
                            verbose = TRUE,
                            forecasts = NULL,
                            truth_data = NULL,
-                           merge_by = NULL) {
+                           merge_by = NULL,
+                           compute_relative_skill = TRUE,
+                           rel_skill_metric = "auto",
+                           baseline = NULL) {
 
 
   # preparations ---------------------------------------------------------------
@@ -220,6 +236,36 @@ eval_forecasts <- function(data = NULL,
   # do a copy to avoid that the input may be altered in any way.
   data <- data.table::as.data.table(data)
 
+  # error handling for relative skill computation
+  # should probably wrap this in a function warn_if_verbose(warning, verbose)
+  if (compute_relative_skill) {
+    if (!("model" %in% colnames(data))) {
+      if (verbose) {
+        warning("to compute relative skills, there must column present called 'model'. Relative skill will not be computed")
+      }
+      compute_relative_skill <- FALSE
+    }
+    models <- unique(data$model)
+    if (length(models) < 2 + (!is.null(baseline))) {
+      if (verbose) {
+        warning("you need more than one model non-baseline model to make model comparisons. Relative skill will not be computed")
+      }
+      compute_relative_skill <- FALSE
+    }
+    if (!is.null(baseline) && !(baseline %in% models)) {
+      if (verbose){
+        warning("The baseline you provided for the relative skill is not one of the models in the data. Relative skill will not be computed")
+      }
+      compute_relative_skill <- FALSE
+    }
+    if (rel_skill_metric != "auto" && !(rel_skill_metric %in% list_of_avail_metrics())) {
+      if (verbose) {
+        warning("argument 'rel_skill_metric' must either be 'auto' or one of the metrics that can be computed. Relative skill will not be computed")
+      }
+      compute_relative_skill <- FALSE
+    }
+  }
+
   # check that everything is unique
   unique_data <- unique(data)
   if (nrow(unique_data) != nrow(data)) {
@@ -329,7 +375,10 @@ eval_forecasts <- function(data = NULL,
                                    pit_plots = pit_plots,
                                    interval_score_arguments = interval_score_arguments,
                                    summarised = summarised,
-                                   verbose = verbose)
+                                   verbose = verbose,
+                                   compute_relative_skill = compute_relative_skill,
+                                   rel_skill_metric = rel_skill_metric,
+                                   baseline = baseline)
     return(res)
   }
 
@@ -349,10 +398,7 @@ eval_forecasts <- function(data = NULL,
                                  summarised = summarised,
                                  verbose = verbose)
     return(res)
-
   }
-
-
 }
 
 

diff --git a/R/eval_forecasts_quantile.R b/R/eval_forecasts_quantile.R
@@ -7,7 +7,10 @@ eval_forecasts_quantile <- function(data,
                                     pit_plots,
                                     interval_score_arguments,
                                     summarised,
-                                    verbose) {
+                                    verbose,
+                                    compute_relative_skill,
+                                    rel_skill_metric,
+                                    baseline) {
 
   # make sure data is in the correct format ------------------------------------
   # check format
@@ -135,7 +138,16 @@ eval_forecasts_quantile <- function(data,
   }
 
 
-  ############################ pairwise comparisons ############################
+  if (compute_relative_skill) {
+
+    relative_res <- add_rel_skill_to_eval_forecasts(unsummarised_scores = res,
+                                                    rel_skill_metric = rel_skill_metric,
+                                                    baseline = baseline,
+                                                    by = by,
+                                                    summarise_by = summarise_by,
+                                                    verbose = verbose)
+    res <- merge(res, relative_res, by = by)
+  }
 
   # summarise scores if desired ------------------------------------------------
   if (summarised) {
@@ -163,7 +175,7 @@ eval_forecasts_quantile <- function(data,
     res <- res[, lapply(.SD, mean, na.rm = TRUE),
                by = c(summarise_by),
                .SDcols = colnames(res) %like%
-                 "coverage|bias|sharpness|coverage_deviation|interval_score|overprediction|underprediction|aem|ae_point"]
+                 "coverage|bias|sharpness|coverage_deviation|interval_score|overprediction|underprediction|aem|ae_point|relative_skill|scaled_rel_skill"]
   }
 
   # if neither quantile nor range are in summarise_by, remove coverage and quantile_coverage
@@ -173,5 +185,6 @@ eval_forecasts_quantile <- function(data,
   if (!("quantile" %in% summarise_by)) {
     res[, c("quantile_coverage") := NULL]
   }
+
   return(res)
 }