Analysis of Spanish play-by-play data

Guillermo Vinue

2025-10-22

This document contains all the needed R code to reproduce the results described in the paper A Basketball Big Data Platform for Box Score and Play-by-Play Data (https://doi.org/10.1089/big.2023.0177).

# Firstly, load BAwiR and other packages that will be used in the paper:
library(BAwiR) 
library(tidyverse) 

The following data file is an illustration of the type of play-by-play data available from the Spanish ACB league.

df0 <- acb_vbc_cz_pbp_2223

day_num <- unique(df0$day)
game_code <- unique(df0$game_code)

Do some first data processing:

acb_games_2223_sl <- acb_vbc_cz_sl_2223 %>%
  filter(period == "1C")

df1 <- do_prepare_data(df0, day_num, 
                      acb_games_2223_sl, acb_games_2223_info,
                      game_code)
# Lineups and sub-lineups:
data_li <- do_lineup(df1, day_num, game_code, "Valencia Basket", FALSE) 
data_subli <- do_sub_lineup(data_li, 4)
# Possessions:
data_poss <- do_possession(df1, "1C")  
# Timeouts:
df1_to <- do_prepare_data_to(df0, TRUE, acb_games_2223_info, acb_games_2223_coach)
data_to <- do_time_out_success(df1_to, day_num, game_code, 
                               "Casademont Zaragoza_Porfirio Fisac", FALSE)
# Periods:
team_sel <- "Valencia Basket" # "Casademont Zaragoza"
period_sel <- "1C"            # "4C"
player_sel <- "Webb"          # "Mara"

pre_per <- do_preproc_period(acb_vbc_cz_pbp_2223, team_sel, period_sel, acb_vbc_cz_sl_2223)
  
df2 <- pre_per$df2
df0_inli_team <- pre_per$df0_inli_team
  
df3 <- do_prepare_data(df2, day_num, df0_inli_team, acb_games_2223_info, game_code)
                         
data_per <- do_stats_per_period(df3, day_num, game_code, team_sel, period_sel, player_sel)

# Clutch time:
data_clutch <- do_clutch_time(df0) 
# If no rows, that means that the game did not have clutch time.
# Free throw fouls:
data_ft_comm <- do_ft_fouls(df0, "comm")
data_ft_rec <- do_ft_fouls(df0, "rec")

# Offensive fouls:
data_off_comm <- do_offensive_fouls(df0, "comm")
data_off_rec <- do_offensive_fouls(df0, "rec")
# Offensive rebounds:
df1_or <- do_prepare_data_or(df0, TRUE, acb_games_2223_info)
data_or <- do_reb_off_success(df1_or, day_num, game_code, "Valencia Basket", FALSE)
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora Linux 39 (Workstation Edition)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS-OPENMP;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=es_ES.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=es_ES.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Madrid
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.37     R6_2.6.1          fastmap_1.2.0     xfun_0.52        
##  [5] cachem_1.1.0      knitr_1.50        htmltools_0.5.8.1 rmarkdown_2.29   
##  [9] lifecycle_1.0.4   cli_3.6.5         sass_0.4.10       jquerylib_0.1.4  
## [13] compiler_4.3.3    tools_4.3.3       evaluate_1.0.3    bslib_0.9.0      
## [17] yaml_2.3.10       rlang_1.1.6       jsonlite_2.0.0