man/hmda.wmshap.table.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hmda.wmshap.table.R
\name{hmda.wmshap.table}
\alias{hmda.wmshap.table}
\title{Create SHAP Summary Table Based on the Given Criterion}
\usage{
hmda.wmshap.table(
  wmshap,
  method = c("mean"),
  cutoff = 0.01,
  round = 3,
  exclude_features = NULL,
  dict = dictionary(raw, attribute = "label"),
  markdown.table = TRUE,
  split.tables = 120,
  split.cells = 50
)
}
\arguments{
\item{wmshap}{A wmshap object, returned by the hmda.wmshap function
containing a data frame \code{summaryShaps}.}

\item{method}{Character. Specify the method for selecting important features
based on their weighted mean SHAP ratios. The default is
\code{"mean"}, which selects features whose weighted mean shap ratio (WMSHAP)
exceeds the \code{cutoff}. The alternative is
\code{"lowerCI"}, which selects features whose lower bound of confidence
interval exceeds the \code{cutoff}.}

\item{cutoff}{Numeric. The threshold cutoff for the selection method;
only features with a value in the \code{method} column
greater than or equal to this value are retained.
Default is \code{0.01}.}

\item{round}{Integer. The number of decimal places to round the
SHAP mean and confidence interval values. Default is
\code{3}.}

\item{exclude_features}{Character vector. A vector of feature names to be
excluded from the summary table. Default is \code{NULL}.}

\item{dict}{A data frame containing at least two columns named
\code{"name"} and \code{"description"}. If provided, the
function uses this dictionary to add human-readable feature
descriptions. Default is \code{NULL}.}

\item{markdown.table}{Logical. If \code{TRUE}, the output is formatted as a
markdown table using the \pkg{pander} package; otherwise, a
data frame is returned. Default is \code{TRUE}.}

\item{split.tables}{Integer. Controls table splitting in \code{pander()}.
Default is \code{120}.}

\item{split.cells}{Integer. Controls cell splitting in \code{pander()}.
Default is \code{50}.}
}
\value{
If \code{markdown.table = TRUE}, returns a markdown table (invisibly)
        showing two columns: \code{"Description"} and \code{"WMSHAP"}. If
        \code{markdown.table = FALSE}, returns a data frame with these columns.
}
\description{
Generates a summary table of weighted mean SHAP (WMSHAP) values
  and confidence intervals for each feature based on a weighted SHAP analysis.
  The function filters the SHAP summary table (from a \code{wmshap} object) by
  selecting features that meet or exceed a specified cutoff using a selection
  method (default "mean"). It then sorts the table by the mean SHAP value,
  formats the SHAP values along with their 95\% confidence intervals into a single
  string, and optionally adds human-readable feature descriptions from a provided
  dictionary. The output is returned as a markdown table using the \pkg{pander}
  package, or as a data frame if requested.
}
\examples{
\dontrun{
  library(HMDA)
  library(h2o)
  hmda.init()

  # Import a sample binary outcome dataset into H2O
  train <- h2o.importFile(
  "https://s3.amazonaws.com/h2o-public-test-data/smalldata/higgs/higgs_train_10k.csv")
  test <- h2o.importFile(
  "https://s3.amazonaws.com/h2o-public-test-data/smalldata/higgs/higgs_test_5k.csv")

  # Identify predictors and response
  y <- "response"
  x <- setdiff(names(train), y)

  # For binary classification, response should be a factor
  train[, y] <- as.factor(train[, y])
  test[, y] <- as.factor(test[, y])

  params <- list(learn_rate = c(0.01, 0.1),
                 max_depth = c(3, 5, 9),
                 sample_rate = c(0.8, 1.0)
  )

  # Train and validate a cartesian grid of GBMs
  hmda_grid1 <- hmda.grid(algorithm = "gbm", x = x, y = y,
                          grid_id = "hmda_grid1",
                          training_frame = train,
                          nfolds = 10,
                          ntrees = 100,
                          seed = 1,
                          hyper_params = gbm_params1)

  # Assess the performances of the models
  grid_performance <- hmda.grid.analysis(hmda_grid1)

  # Return the best 2 models according to each metric
  hmda.best.models(grid_performance, n_models = 2)

  # build an autoEnsemble model & test it with the testing dataset
  meta <- hmda.autoEnsemble(models = hmda_grid1, training_frame = train)
  print(h2o.performance(model = meta$model, newdata = test))

  # compute weighted mean shap values
  wmshap <- hmda.wmshap(models = hmda_grid1,
                        newdata = test,
                        performance_metric = "aucpr",
                        standardize_performance_metric = FALSE,
                        performance_type = "xval",
                        minimum_performance = 0,
                        method = "mean",
                        cutoff = 0.01,
                        plot = TRUE)

  # identify the important features
  selected <- hmda.feature.selection(wmshap,
                                     method = c("mean"),
                                     cutoff = 0.01)
  print(selected)

  # View the plot of weighted mean SHAP values and confidence intervals
  print(wmshap$plot)

  # get the wmshap table output in Markdown format:
  md_table <- shapley.table(wmshap = wmshap,
                            method = "mean",
                            cutoff = 0.01,
                            round = 3,
                            markdown.table = TRUE)
  head(md_table)
}

}
\author{
E. F. Haghish
}