Data

This vignette demonstrates the use of the triplot package on Fifa20 dataset. Dataset was downloaded from the Kaggle website, preprocessed and made available in DALEX package.

Dataset contains 37 features that describe each player performance. We will be exploring models that are predicting players value in Euro (in millions).

library("DALEX")
library("triplot")

data(fifa)

fifa$value_eur <- fifa$value_eur/10^6

fifa[, c("nationality", "overall", "potential", 
         "wage_eur")] <- NULL

head(fifa[,c(1:6)])
#>                   value_eur age height_cm weight_kg attacking_crossing
#> L. Messi               95.5  32       170        72                 88
#> Cristiano Ronaldo      58.5  34       187        83                 84
#> Neymar Jr             105.5  27       175        68                 87
#> J. Oblak               77.5  26       188        87                 13
#> E. Hazard              90.0  28       175        74                 81
#> K. De Bruyne           90.0  28       181        70                 93
#>                   attacking_finishing
#> L. Messi                           95
#> Cristiano Ronaldo                  94
#> Neymar Jr                          87
#> J. Oblak                           11
#> E. Hazard                          84
#> K. De Bruyne                       82

Build models for predicting players value

For the further analysis, we prepare 3 models:

  • random forest,
  • tuned random forest,
  • gradient boosting machine.
library("ranger")
library("gbm")

fifa_rf <- ranger(value_eur~., data = fifa)

fifa_rf_mod <- ranger(value_eur~., data = fifa,
                      mtry = 18, splitrule = "variance", min.node.size = 5)

fifa_gbm <- gbm(value_eur~., data = fifa, n.trees = 250,
                     interaction.depth = 4, distribution = "gaussian")

Build explainers

For each of those models, we build DALEX explainer. Using explainers, simplifies further models analysis.

fifa_rf_exp <- DALEX::explain(fifa_rf,
                              data = fifa[,-1], #without target coulmn
                              y = fifa$value_eur,
                              label = "RF", 
                              verbose = FALSE)

fifa_rf_m_exp <- DALEX::explain(fifa_rf_mod,
                              data = fifa[,-1], 
                              y = fifa$value_eur,
                              label = "RF tuned",
                              verbose = FALSE)

fifa_gbm_exp <- DALEX::explain(fifa_gbm,
                               data = fifa[, -1], 
                               y = fifa$value_eur,
                               predict_function =
                                 function(m, x)
                                   predict(m, x, n.trees = 250),
                               label = "GBM",
                               verbose = FALSE)

Triplot for model

‘Triplot’ objects allow us to investigate the importance of each feature (plot on the left), features correlation (plot on the right) as well as the change hierarchical aspects importance (plot in the middle).

fifa_rf_tri <- model_triplot(fifa_rf_exp, B = 1, N = 5000)

fifa_rf_m_tri <- model_triplot(fifa_rf_m_exp, B = 1, N = 5000)

fifa_gbm_tri <- model_triplot(fifa_gbm_exp, B = 1, N = 5000)

plot(fifa_rf_tri, show_model_label = TRUE, margin_mid = 0)

plot(fifa_rf_m_tri, show_model_label = TRUE, margin_mid = 0)

plot(fifa_gbm_tri, show_model_label = TRUE, margin_mid = 0)

fifa_vg <- list(
  "age" = "age",
  "body" = c("height_cm", "weight_kg"),
  "attacking" = c("attacking_crossing",
                "attacking_finishing", "attacking_heading_accuracy",
                "attacking_short_passing", "attacking_volleys"),
  "skill" = c("skill_dribbling",
            "skill_curve", "skill_fk_accuracy", "skill_long_passing",
            "skill_ball_control"),
  "movement" = c("movement_acceleration", "movement_sprint_speed",
               "movement_agility", "movement_reactions", "movement_balance"),
  "power" = c("power_shot_power", "power_jumping", "power_stamina", "power_strength",
            "power_long_shots"),
  "mentality" = c("mentality_aggression", "mentality_interceptions",
                "mentality_positioning", "mentality_vision", "mentality_penalties",
                "mentality_composure"),
  "defending" = c("defending_marking", "defending_standing_tackle",
                "defending_sliding_tackle"),
  "goalkeeping" = c("goalkeeping_diving",
                   "goalkeeping_handling", "goalkeeping_kicking",
                   "goalkeeping_positioning", "goalkeeping_reflexes"))

Investigate models for chosen instances (players)

For the instance level model analysis, we choose top player (value wise).

set.seed(123)

top_player <- fifa[order(fifa$value_eur, decreasing = TRUE),][1,]

print(top_player)
#>           value_eur age height_cm weight_kg attacking_crossing
#> Neymar Jr     105.5  27       175        68                 87
#>           attacking_finishing attacking_heading_accuracy
#> Neymar Jr                  87                         62
#>           attacking_short_passing attacking_volleys skill_dribbling skill_curve
#> Neymar Jr                      87                87              96          88
#>           skill_fk_accuracy skill_long_passing skill_ball_control
#> Neymar Jr                87                 81                 95
#>           movement_acceleration movement_sprint_speed movement_agility
#> Neymar Jr                    94                    89               96
#>           movement_reactions movement_balance power_shot_power power_jumping
#> Neymar Jr                 92               84               80            61
#>           power_stamina power_strength power_long_shots mentality_aggression
#> Neymar Jr            81             49               84                   51
#>           mentality_interceptions mentality_positioning mentality_vision
#> Neymar Jr                      36                    87               90
#>           mentality_penalties mentality_composure defending_marking
#> Neymar Jr                  90                  94                27
#>           defending_standing_tackle defending_sliding_tackle goalkeeping_diving
#> Neymar Jr                        26                       29                  9
#>           goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
#> Neymar Jr                    9                  15                      15
#>           goalkeeping_reflexes
#> Neymar Jr                   11
aspects_top_player_rf <- predict_aspects(fifa_rf_exp, 
                new_observation = top_player, 
                variable_groups = fifa_vg)

aspects_top_player_rf_m <- predict_aspects(fifa_rf_m_exp, 
                new_observation = top_player, 
                variable_groups = fifa_vg)

aspects_top_player_gbm <- predict_aspects(fifa_gbm_exp, 
                new_observation = top_player, 
                variable_groups = fifa_vg)

plot(aspects_top_player_rf,
      aspects_top_player_rf_m,
      aspects_top_player_gbm)

fifa_rf_m_tri_player <- predict_triplot(fifa_rf_m_exp, top_player)

plot(fifa_rf_m_tri_player, show_model_label = TRUE, margin_mid = 0.2)

For comparison, for next analysis we pick one of the goal keeper’s prediction to be explained.

set.seed(123)

player_gk <- fifa["J. Oblak",]

print(player_gk)
#>          value_eur age height_cm weight_kg attacking_crossing
#> J. Oblak      77.5  26       188        87                 13
#>          attacking_finishing attacking_heading_accuracy attacking_short_passing
#> J. Oblak                  11                         15                      43
#>          attacking_volleys skill_dribbling skill_curve skill_fk_accuracy
#> J. Oblak                13              12          13                14
#>          skill_long_passing skill_ball_control movement_acceleration
#> J. Oblak                 40                 30                    43
#>          movement_sprint_speed movement_agility movement_reactions
#> J. Oblak                    60               67                 88
#>          movement_balance power_shot_power power_jumping power_stamina
#> J. Oblak               49               59            78            41
#>          power_strength power_long_shots mentality_aggression
#> J. Oblak             78               12                   34
#>          mentality_interceptions mentality_positioning mentality_vision
#> J. Oblak                      19                    11               65
#>          mentality_penalties mentality_composure defending_marking
#> J. Oblak                  11                  68                27
#>          defending_standing_tackle defending_sliding_tackle goalkeeping_diving
#> J. Oblak                        12                       18                 87
#>          goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
#> J. Oblak                   92                  78                      90
#>          goalkeeping_reflexes
#> J. Oblak                   89
aspects_gk_rf <- predict_aspects(fifa_rf_exp, 
                new_observation = player_gk, 
                variable_groups = fifa_vg)

aspects_gk_rf_m <- predict_aspects(fifa_rf_m_exp, 
                new_observation = player_gk, 
                variable_groups = fifa_vg)

aspects_gk_gbm <- predict_aspects(fifa_gbm_exp, 
                new_observation = player_gk, 
                variable_groups = fifa_vg)
plot(aspects_gk_rf,
      aspects_gk_rf_m,
      aspects_gk_gbm)

Session info

#> R version 4.0.4 (2021-02-15)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Catalina 10.15.7
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] gbm_2.1.8     ranger_0.12.1 triplot_1.3.1 DALEX_2.1.1  
#> 
#> loaded via a namespace (and not attached):
#>  [1] shape_1.4.5        xfun_0.21          splines_4.0.4      lattice_0.20-41   
#>  [5] colorspace_2.0-0   vctrs_0.3.6        htmltools_0.5.1.1  yaml_2.2.1        
#>  [9] utf8_1.1.4         survival_3.2-7     rlang_0.4.10       pkgdown_1.6.1.9000
#> [13] pillar_1.5.1       glue_1.4.2         foreach_1.5.1      lifecycle_1.0.0   
#> [17] stringr_1.4.0      munsell_0.5.0      gtable_0.3.0       ragg_1.1.1        
#> [21] codetools_0.2-18   memoise_2.0.0      evaluate_0.14      labeling_0.4.2    
#> [25] knitr_1.31         fastmap_1.1.0      fansi_0.4.2        highr_0.8         
#> [29] Rcpp_1.0.6         scales_1.1.1       cachem_1.0.4       desc_1.3.0        
#> [33] ingredients_2.0.1  farver_2.1.0       systemfonts_1.0.1  fs_1.5.0          
#> [37] textshaping_0.3.1  ggplot2_3.3.3      digest_0.6.27      stringi_1.5.3     
#> [41] grid_4.0.4         rprojroot_2.0.2    tools_4.0.4        magrittr_2.0.1    
#> [45] glmnet_4.1-1       patchwork_1.1.1    tibble_3.1.0       ggdendro_0.1.22   
#> [49] crayon_1.4.1       pkgconfig_2.0.3    ellipsis_0.3.1     MASS_7.3-53       
#> [53] Matrix_1.3-2       rmarkdown_2.7      iterators_1.0.13   R6_2.5.0          
#> [57] compiler_4.0.4