Skip to content

Commit

Permalink
Removes furrr, future dependencies, adds Rcpp, RcppParallel, …
Browse files Browse the repository at this point in the history
…and `purrr` dependencies (#74)

Makes data.table's rbindlist_with_attrs() the default loading method.
  • Loading branch information
saiemgilani authored May 31, 2022
1 parent 0554a86 commit 227fd6a
Show file tree
Hide file tree
Showing 26 changed files with 190 additions and 101 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ Imports:
cli (>= 3.0.0),
data.table (>= 1.14.0),
dplyr,
furrr,
future,
glue,
httr,
janitor,
Expand All @@ -46,8 +44,9 @@ Imports:
nnet,
progressr (>= 0.6.0),
purrr (>= 0.3.0),
Rcpp (>= 1.0.7),
RcppParallel (>= 5.1.4),
rlang,
stringi,
stringr (>= 1.3.0),
tibble (>= 3.0),
tidyr (>= 1.0.0)
Expand All @@ -61,6 +60,7 @@ Suggests:
rmarkdown,
RSQLite,
stats,
stringi,
testthat,
usethis (>= 1.6.0),
xgboost (>= 1.1)
Expand Down
6 changes: 5 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,12 @@ import(purrr)
import(stringr)
import(tidyr)
import(utils)
importFrom(Rcpp,getRcppVersion)
importFrom(RcppParallel,defaultNumThreads)
importFrom(cli,cli_abort)
importFrom(data.table,data.table)
importFrom(data.table,fread)
importFrom(data.table,setDT)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,between)
Expand Down Expand Up @@ -123,7 +128,6 @@ importFrom(purrr,set_names)
importFrom(rlang,.data)
importFrom(stats,na.omit)
importFrom(stats,predict)
importFrom(stringi,stri_extract_first_regex)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
importFrom(stringr,str_length)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
* [```load_cfb_schedules()```](https://cfbfastR.sportsdataverse.org/reference/load_cfb_schedules.html)
* [```load_cfb_teams()```](https://cfbfastR.sportsdataverse.org/reference/load_cfb_teams.html)

- Removes `furrr`, `future` dependencies, adds `Rcpp`, `RcppParallel`, and `purrr` dependencies

# **cfbfastR v1.8.0**

* All functions now default to return tibbles.
Expand Down
53 changes: 22 additions & 31 deletions R/cfb_pbp.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,39 @@ NULL
#' or writes it into a db using some forwarded arguments in the dots
#' @param seasons A vector of 4-digit years associated with given College Football seasons.
#' @param ... Additional arguments passed to an underlying function that writes
#' the season data into a database (used by [`update_cfb_db()`][update_cfb_db]).
#' @param qs Whether to use the function [qs::qdeserialize()] for more efficient loading.
#' the season data into a database (used by [`update_cfb_db()`][update_cfb_db])
#' @param dbConnection A `DBIConnection` object, as returned by [DBI::dbConnect()]
#' @param tablename The name of the play by play data table within the database
#' @export
load_cfb_pbp <- function(seasons, ..., qs = FALSE) {
load_cfb_pbp <- function(seasons = most_recent_cfb_season(),...,
dbConnection = NULL, tablename = NULL) {
dots <- rlang::dots_list(...)

if (all(c("dbConnection", "tablename") %in% names(dots))) in_db <- TRUE else in_db <- FALSE
loader <- rds_from_url

if (isTRUE(qs) && !is_installed("qs")) {
cli::cli_abort("Package {.val qs} required for argument {.val qs = TRUE}. Please install it.")
}
if (!is.null(dbConnection) && !is.null(tablename)) in_db <- TRUE else in_db <- FALSE

most_recent <- most_recent_cfb_season()
if(isTRUE(seasons)) seasons <- 2014:most_recent_cfb_season()

if (!all(seasons %in% 2014:most_recent)) {
cli::cli_abort("Please pass valid seasons between 2014 and {most_recent}")
}
stopifnot(is.numeric(seasons),
seasons >= 2014,
seasons <= most_recent_cfb_season())

if (length(seasons) > 1 && is_sequential() && isFALSE(in_db)) {
cli::cli_alert_info(c(
"It is recommended to use parallel processing when trying to load multiple seasons.",
"Please consider running {.code future::plan(\"multisession\")}!",
"Will go on sequentially..."
))
}
urls <- paste0("https://raw.githubusercontent.com/sportsdataverse/cfbfastR-data/main/data/rds/pbp_players_pos_",seasons,".rds")

p <- NULL
if (is_installed("progressr")) p <- progressr::progressor(along = seasons)

p <- progressr::progressor(along = seasons)

if (isFALSE(in_db)) {
out <- furrr::future_map_dfr(seasons, cfb_single_season, p = p, qs = qs)
}

if (isTRUE(in_db)) {
purrr::walk(seasons, cfb_single_season, p, ..., qs = qs)
out <- lapply(urls, progressively(loader, p))
out <- data.table::rbindlist(out, use.names = TRUE, fill = TRUE)
if (in_db) {
DBI::dbWriteTable(dbConnection, tablename, out, append = TRUE)
out <- NULL
} else {
class(out) <- c("cfbfastR_data","tbl_df","tbl","data.table","data.frame")

}
# change this later when data in repo has attributes
if(is.null(attr(out,"cfbfastR_timestamp"))) {
out <- out %>%
make_cfbfastR_data("play-by-play data from cfbfastR data repo",Sys.time())
}
out
return(out)
}

Expand Down
2 changes: 1 addition & 1 deletion R/cfbd_betting.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ cfbd_betting_lines <- function(game_id = NULL,
df <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
tidyr::unnest(.data$lines)

Expand Down
2 changes: 1 addition & 1 deletion R/cfbd_coaches.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ cfbd_coaches <- function(first = NULL,
df <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON() %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
tidyr::unnest(.data$seasons) %>%
dplyr::arrange(.data$year)
Expand Down
14 changes: 7 additions & 7 deletions R/cfbd_games.R
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,8 @@ cfbd_game_box_advanced <- function(game_id, long = FALSE) {
df <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
furrr::future_map_if(is.data.frame, list) %>%
furrr::future_map_if(is.data.frame, list)
purrr::map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list)

df <- tibble::enframe(unlist(df$teams, use.names = TRUE))
team1 <- seq(1, nrow(df) - 1, by = 2)
Expand Down Expand Up @@ -969,18 +969,18 @@ cfbd_game_player_stats <- function(year,
df <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
dplyr::rename(game_id = .data$id) %>%
tidyr::unnest(.data$teams) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
tidyr::unnest(.data$categories) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
dplyr::rename(category = .data$name) %>%
tidyr::unnest(.data$types) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
dplyr::rename(stat_category = .data$name) %>%
tidyr::unnest(.data$athletes) %>%
Expand Down Expand Up @@ -1357,7 +1357,7 @@ cfbd_game_team_stats <- function(year,
df <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble()

if (nrow(df) == 0) {
Expand Down
2 changes: 1 addition & 1 deletion R/cfbd_pbp_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ cfbd_pbp_data <- function(year,

p <- progressr::progressor(along = g_ids)

play_df <- furrr::future_map_dfr(
play_df <- purrr::map_dfr(
g_ids,
function(x){
play_df <- play_df %>%
Expand Down
2 changes: 1 addition & 1 deletion R/cfbd_play.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' year_split <- split(weekly_year_df, weekly_year_df$year)
#' for (i in 1:length(year_split)) {
#' i <- 1
#' future::plan("multisession")
#'
#' progressr::with_progress({
#' year_split[[i]] <- year_split[[i]] %>%
#' dplyr::mutate(
Expand Down
2 changes: 1 addition & 1 deletion R/cfbd_players.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ cfbd_player_usage <- function(year = 2019,
df <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
dplyr::rename(
athlete_id = .data$id,
Expand Down
2 changes: 1 addition & 1 deletion R/cfbd_ratings.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ cfbd_rankings <- function(year, week = NULL, season_type = "regular") {
polls <- res %>%
httr::content(as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
furrr::future_map_if(is.data.frame, list) %>%
purrr::map_if(is.data.frame, list) %>%
dplyr::as_tibble() %>%
tidyr::unnest(.data$polls) %>%
tidyr::unnest(.data$ranks) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/create_wpa_naive.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ create_wpa_naive <- function(df, wp_model) {
df[kickoff_ind, "wp_before"] <- as.vector(predict(wp_model, new_kick, type = "response"))
}
g_ids <- sort(unique(df$game_id))
df2 <- furrr::future_map_dfr(
df2 <- purrr::map_dfr(
g_ids,
function(x) {
df %>%
Expand Down
3 changes: 1 addition & 2 deletions R/helper_pbp_add_yardage.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@
#' }
#' @keywords internal
#' @importFrom rlang .data
#' @importFrom stringi stri_extract_first_regex
#' @importFrom stringr str_detect str_extract str_remove
#' @importFrom dplyr mutate arrange case_when
#' @import stringr
#' @export
#'

Expand Down
4 changes: 2 additions & 2 deletions R/load_cfb.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ load_cfb_schedules <- function(seasons = most_recent_cfb_season()){
urls <- paste0("https://github.com/sportsdataverse/cfbfastR-data/raw/main/schedules/rds/cfb_schedules_",
seasons, ".rds")

# out <- furrr::future_map_dfr(urls, progressively(rds_from_url, p = p))
# out <- purrr::map_dfr(urls, progressively(rds_from_url, p = p))

out <- lapply(urls, progressively(rds_from_url, p))
out <- rbindlist_with_attrs(out)
Expand Down Expand Up @@ -74,7 +74,7 @@ load_cfb_rosters <- function(seasons = most_recent_cfb_season()){
urls <- paste0("https://github.com/sportsdataverse/cfbfastR-data/raw/main/rosters/rds/cfb_rosters_",
seasons, ".rds")

# out <- furrr::future_map_dfr(urls, progressively(rds_from_url, p = p))
# out <- purrr::map_dfr(urls, progressively(rds_from_url, p = p))

out <- lapply(urls, progressively(rds_from_url, p))
out <- rbindlist_with_attrs(out)
Expand Down
Loading

0 comments on commit 227fd6a

Please sign in to comment.