Plot Generic for Break Down Uncertainty Objects

# S3 method for break_down_uncertainty
plot(
  x,
  ...,
  vcolors = DALEX::colors_breakdown_drwhy(),
  show_boxplots = TRUE,
  max_features = 10,
  max_vars = NULL
)

Arguments

x

an explanation created with break_down_uncertainty

...

other parameters.

vcolors

If NA (default), DrWhy colors are used.

show_boxplots

logical if TRUE (default) boxplot will be plotted to show uncertanity of attributions

max_features

maximal number of features to be included in the plot. By default it's 10.

max_vars

alias for the max_features parameter.

Value

a ggplot2 object.

References

Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://ema.drwhy.ai

Examples

library("DALEX")
library("iBreakDown")
set.seed(1313)
model_titanic_glm <- glm(survived ~ gender + age + fare,
                       data = titanic_imputed, family = "binomial")
explain_titanic_glm <- explain(model_titanic_glm,
                           data = titanic_imputed,
                           y = titanic_imputed$survived,
                           label = "glm")
#> Preparation of a new explainer is initiated
#>   -> model label       :  glm 
#>   -> data              :  2207  rows  8  cols 
#>   -> target variable   :  2207  values 
#>   -> predict function  :  yhat.glm  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package stats , ver. 4.1.2 , task classification (  default  ) 
#>   -> predicted values  :  numerical, min =  0.1490412 , mean =  0.3221568 , max =  0.9878987  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -0.8898433 , mean =  4.198546e-13 , max =  0.8448637  
#>   A new explainer has been created!  

sh_glm <- shap(explain_titanic_glm, titanic_imputed[1, ])

sh_glm
#>                                     min          q1      median        mean
#> glm: age = 42               -0.01514903 -0.01492541 -0.01434645 -0.01447369
#> glm: class = 3rd             0.00000000  0.00000000  0.00000000  0.00000000
#> glm: embarked = Southampton  0.00000000  0.00000000  0.00000000  0.00000000
#> glm: fare = 7.11            -0.01823177 -0.01784977 -0.01784977 -0.01772034
#> glm: gender = male          -0.10843751 -0.10725651 -0.10725651 -0.10725871
#> glm: parch = 0               0.00000000  0.00000000  0.00000000  0.00000000
#> glm: sibsp = 0               0.00000000  0.00000000  0.00000000  0.00000000
#> glm: survived = 0            0.00000000  0.00000000  0.00000000  0.00000000
#>                                      q3         max
#> glm: age = 42               -0.01396446 -0.01396446
#> glm: class = 3rd             0.00000000  0.00000000
#> glm: embarked = Southampton  0.00000000  0.00000000
#> glm: fare = 7.11            -0.01738588 -0.01705077
#> glm: gender = male          -0.10725293 -0.10667755
#> glm: parch = 0               0.00000000  0.00000000
#> glm: sibsp = 0               0.00000000  0.00000000
#> glm: survived = 0            0.00000000  0.00000000
plot(sh_glm)


# \dontrun{
## Not run:
library("randomForest")
set.seed(1313)

model <- randomForest(status ~ . , data = HR)
new_observation <- HR_test[1,]

explainer_rf <- explain(model,
                        data = HR[1:1000,1:5])
#> Preparation of a new explainer is initiated
#>   -> model label       :  randomForest  (  default  )
#>   -> data              :  1000  rows  5  cols 
#>   -> target variable   :  not specified! (  WARNING  )
#>   -> predict function  :  yhat.randomForest  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package randomForest , ver. 4.7.1 , task multiclass (  default  ) 
#>   -> model_info        :  Model info detected multiclass task but 'y' is a NULL .  (  WARNING  )
#>   -> model_info        :  By deafult multiclass tasks supports only factor 'y' parameter. 
#>   -> model_info        :  Consider changing to a factor vector with true class names.
#>   -> model_info        :  Otherwise I will not be able to calculate residuals or loss function.
#>   -> predicted values  :  predict function returns multiple columns:  3  (  default  ) 
#>   -> residual function :  difference between 1 and probability of true class (  default  )
#>   A new explainer has been created!  

bd_rf <- break_down_uncertainty(explainer_rf,
                           new_observation,
                           path = c(3,2,4,1,5),
                           show_boxplots = FALSE)
bd_rf
#>                                             min        q1    median
#> randomForest.fired: age = 57.73       -0.021328  0.019462  0.153816
#> randomForest.fired: evaluation = 2    -0.018856  0.009084  0.035996
#> randomForest.fired: gender = male     -0.009380  0.005474  0.019250
#> randomForest.fired: hours = 42.32      0.167650  0.195686  0.230614
#> randomForest.fired: salary = 2        -0.270298 -0.172701 -0.160058
#> randomForest.ok: age = 57.73          -0.346842 -0.346842 -0.162470
#> randomForest.ok: evaluation = 2        0.028666  0.099847  0.125760
#> randomForest.ok: gender = male        -0.282642 -0.162907 -0.021756
#> randomForest.ok: hours = 42.32        -0.106876 -0.089536 -0.053010
#> randomForest.ok: salary = 2            0.046824  0.118454  0.118552
#> randomForest.promoted: age = 57.73    -0.126732 -0.010081 -0.006132
#> randomForest.promoted: evaluation = 2 -0.201822 -0.172814 -0.164010
#> randomForest.promoted: gender = male  -0.045880 -0.002331 -0.001994
#> randomForest.promoted: hours = 42.32  -0.247972 -0.235271 -0.178676
#> randomForest.promoted: salary = 2     -0.003902  0.001014  0.027152
#>                                               mean        q3       max
#> randomForest.fired: age = 57.73        0.178679818  0.352974  0.362800
#> randomForest.fired: evaluation = 2     0.023217636  0.041020  0.045408
#> randomForest.fired: gender = male      0.103037273  0.187181  0.280686
#> randomForest.fired: hours = 42.32      0.244756909  0.291987  0.351330
#> randomForest.fired: salary = 2        -0.157277636 -0.125635 -0.070866
#> randomForest.ok: age = 57.73          -0.167283636 -0.005432  0.005860
#> randomForest.ok: evaluation = 2        0.119492364  0.129860  0.196252
#> randomForest.ok: gender = male        -0.096862545 -0.008832 -0.003480
#> randomForest.ok: hours = 42.32        -0.045487455 -0.001451  0.030996
#> randomForest.ok: salary = 2            0.129991273  0.142728  0.268992
#> randomForest.promoted: age = 57.73    -0.011396182  0.011513  0.015468
#> randomForest.promoted: evaluation = 2 -0.142710000 -0.110062 -0.058120
#> randomForest.promoted: gender = male  -0.006174727  0.002231  0.023564
#> randomForest.promoted: hours = 42.32  -0.199269455 -0.173865 -0.156930
#> randomForest.promoted: salary = 2      0.027286364  0.041506  0.077562
plot(bd_rf, max_features = 3)


# example for regression - apartment prices
# here we do not have intreactions
model <- randomForest(m2.price ~ . , data = apartments)
explainer_rf <- explain(model,
                        data = apartments_test[1:1000,2:6],
                        y = apartments_test$m2.price[1:1000])
#> Preparation of a new explainer is initiated
#>   -> model label       :  randomForest  (  default  )
#>   -> data              :  1000  rows  5  cols 
#>   -> target variable   :  1000  values 
#>   -> predict function  :  yhat.randomForest  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package randomForest , ver. 4.7.1 , task regression (  default  ) 
#>   -> predicted values  :  numerical, min =  2052.033 , mean =  3487.71 , max =  5776.623  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -632.8469 , mean =  1.070017 , max =  1328.352  
#>   A new explainer has been created!  

bd_rf <- break_down_uncertainty(explainer_rf,
                                     apartments_test[1,],
                                     path = c("floor", "no.rooms", "district",
                                         "construction.year", "surface"))
bd_rf
#>                                              min        q1     median
#> randomForest: construction.year = 1976 -128.5908 -110.1911  -68.37002
#> randomForest: district = Srodmiescie    981.8193 1036.9753 1054.79081
#> randomForest: floor = 3                 172.8786  183.5605  193.26996
#> randomForest: no.rooms = 5             -229.8610 -225.7194 -209.30956
#> randomForest: surface = 131            -272.2211 -266.0785 -249.94062
#>                                              mean         q3        max
#> randomForest: construction.year = 1976  -81.56069  -52.48483  -47.64365
#> randomForest: district = Srodmiescie   1046.51524 1054.79081 1091.59037
#> randomForest: floor = 3                 195.40642  209.02407  215.52532
#> randomForest: no.rooms = 5             -201.00985 -204.86547 -130.21186
#> randomForest: surface = 131            -249.01688 -229.21426 -229.21426
plot(bd_rf)


bd_rf <- shap(explainer_rf,
              apartments_test[1,])
bd_rf
#>                                              min        q1     median
#> randomForest: construction.year = 1976 -128.5908 -127.7759  -92.75977
#> randomForest: district = Srodmiescie    981.8193 1054.7908 1069.75828
#> randomForest: floor = 3                 159.4690  172.8786  184.31616
#> randomForest: no.rooms = 5             -225.7194 -207.9039 -198.81536
#> randomForest: surface = 131            -345.1926 -318.0552 -277.00849
#>                                              mean         q3        max
#> randomForest: construction.year = 1976  -93.72824  -60.08701  -47.64365
#> randomForest: district = Srodmiescie   1075.07584 1091.59037 1139.28110
#> randomForest: floor = 3                 186.95249  194.98506  215.52532
#> randomForest: no.rooms = 5             -175.28235 -130.21186 -130.21186
#> randomForest: surface = 131            -282.68349 -272.14890 -229.21426
plot(bd_rf)

plot(bd_rf, show_boxplots = FALSE)

# }