-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_advanced.R
26 lines (22 loc) · 946 Bytes
/
get_advanced.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# load libraries
library(rvest)
library(dplyr)
advanced_url <- "https://www.basketball-reference.com/leagues/NBA_2020_advanced.html"
advanced_df <- advanced_url %>%
read_html() %>%
html_table() %>%
as.data.frame(stringsAsFactors=FALSE) %>%
# remove redundant columns
select(-Var.20, -Var.25) %>%
# remove redundant rows
filter(Rk != "Rk") %>%
# convert column types
mutate(Rk = as.integer(Rk), Age = as.integer(Age), G = as.integer(G), MP = as.integer(MP))
# reset column names
names(advanced_df) <- c('Rk', 'Player', 'Pos', 'Age', 'Tm', 'G', 'MP', 'PER', 'TSpct', '3PAr', 'FTr',
'ORBpct', 'DRBpct', 'TRBpct', 'ASTpct', 'STLpct', 'BLKpct', 'TOVpct', 'USGpct',
'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP')
# convert column types
advanced_df[, 8:27] <- lapply(advanced_df[, 8:27], as.double)
# save dataframes
saveRDS(advanced_df, paste0('./Data/advanced.Rda'))