This function calculates the break down algorithm for B random orderings. Then it calculates the distribution of attributions for these different orderings. Note that the shap() function is just a simplified interface to the break_down_uncertainty() function with a default value set to B=25.

break_down_uncertainty(x, ..., keep_distributions = TRUE, B = 10)

# S3 method for explainer
break_down_uncertainty(
x,
new_observation,
...,
keep_distributions = TRUE,
B = 10
)

# S3 method for default
break_down_uncertainty(
x,
data,
predict_function = predict,
new_observation,
label = class(x),
...,
path = NULL,
keep_distributions = TRUE,
B = 10
)

shap(x, ..., B = 25)

## Arguments

x an explainer created with function explain or a model. other parameters. if TRUE then we will keep distribution for predicted values. It's needed by the describe function. number of random paths a new observation with columns that correspond to variables used in the model. validation dataset, will be extracted from x if it is an explainer. predict function, will be extracted from x if it is an explainer. name of the model. By default it's extracted from the 'class' attribute of the model. if specified, then this path will be highlighed on the plot. Use average in order to show an average effect

## Value

an object of the break_down_uncertainty class.

Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

break_down, local_attributions

## Examples

library("DALEX")
library("iBreakDown")
# Toy examples, because CRAN angels ask for them
titanic <- na.omit(titanic)
set.seed(1313)
titanic_small <- titanic[sample(1:nrow(titanic), 500), c(1,2,6,9)]
model_titanic_glm <- glm(survived == "yes" ~ gender + age + fare,
data = titanic_small, family = "binomial")
explain_titanic_glm <- explain(model_titanic_glm,
data = titanic_small[,-9],
y = titanic_small$survived == "yes")#> Preparation of a new explainer is initiated #> -> model label : lm ( default ) #> -> data : 500 rows 4 cols #> -> target variable : 500 values #> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.111212 , mean = 0.298 , max = 0.9430377 #> -> residual function : difference between y and yhat ( default ) #> -> residuals : numerical, min = -0.789032 , mean = 1.799189e-14 , max = 0.8594593 #> A new explainer has been created! # there is no explanation level uncertanity linked with additive models bd_rf <- break_down_uncertainty(explain_titanic_glm, titanic_small[1, ]) bd_rf#> min q1 median mean #> lm: age = 50 -0.041452499 -0.04145250 -0.038439639 -0.039633973 #> lm: fare = 13 -0.005977352 -0.00537534 -0.005339308 -0.005413922 #> lm: gender = male -0.102615297 -0.10261530 -0.102615297 -0.101346348 #> lm: survived = no 0.000000000 0.00000000 0.000000000 0.000000000 #> q3 max #> lm: age = 50 -0.038412614 -0.038403606 #> lm: fare = 13 -0.005339308 -0.005339308 #> lm: gender = male -0.099602437 -0.098964393 #> lm: survived = no 0.000000000 0.000000000plot(bd_rf) # \donttest{ ## Not run: library("randomForest") set.seed(1313) model <- randomForest(status ~ . , data = HR) new_observation <- HR_test[1,] explainer_rf <- explain(model, data = HR[1:1000, 1:5])#> Preparation of a new explainer is initiated #> -> model label : randomForest ( default ) #> -> data : 1000 rows 5 cols #> -> target variable : not specified! ( WARNING ) #> -> model_info : package randomForest , ver. 4.6.14 , task classification ( default ) #> -> predict function : yhat.randomForest will be used ( default ) #> -> predicted values : predict function returns multiple columns: 3 ( WARNING ) some of functionalities may not work #> -> residual function : difference between y and yhat ( default ) #> A new explainer has been created! bd_rf <- break_down_uncertainty(explainer_rf, new_observation) bd_rf#> min q1 median mean #> randomForest.fired: age = 57.73 -0.021328 0.0247710 0.253395 0.1946016 #> randomForest.fired: evaluation = 2 -0.018856 0.0073270 0.032725 0.0216108 #> randomForest.fired: gender = male -0.009380 0.0054740 0.019250 0.0911182 #> randomForest.fired: hours = 42.32 0.167650 0.1953890 0.220689 0.2461712 #> randomForest.fired: salary = 2 -0.270298 -0.1751675 -0.160058 -0.1610878 #> randomForest.ok: age = 57.73 -0.346842 -0.3468420 -0.199269 -0.1834688 #> randomForest.ok: evaluation = 2 0.028666 0.1002960 0.125760 0.1215018 #> randomForest.ok: gender = male -0.282642 -0.1062540 -0.021756 -0.0845928 #> randomForest.ok: hours = 42.32 -0.106876 -0.0970580 -0.046824 -0.0447352 #> randomForest.ok: salary = 2 0.046824 0.1184785 0.118552 0.1311450 #> randomForest.promoted: age = 57.73 -0.126732 -0.0061320 -0.006132 -0.0111328 #> randomForest.promoted: evaluation = 2 -0.201822 -0.1749640 -0.166262 -0.1431126 #> randomForest.promoted: gender = male -0.045880 -0.0019940 -0.000019 -0.0065254 #> randomForest.promoted: hours = 42.32 -0.247972 -0.2398625 -0.189205 -0.2014360 #> randomForest.promoted: salary = 2 -0.003902 0.0069900 0.034329 0.0299428 #> q3 max #> randomForest.fired: age = 57.73 0.3529740 0.362800 #> randomForest.fired: evaluation = 2 0.0418870 0.045408 #> randomForest.fired: gender = male 0.1521340 0.280686 #> randomForest.fired: hours = 42.32 0.3072255 0.351330 #> randomForest.fired: salary = 2 -0.1390850 -0.070866 #> randomForest.ok: age = 57.73 -0.0178415 0.005860 #> randomForest.ok: evaluation = 2 0.1307830 0.196252 #> randomForest.ok: gender = male -0.0061560 -0.003480 #> randomForest.ok: hours = 42.32 0.0010015 0.030996 #> randomForest.ok: salary = 2 0.1542220 0.268992 #> randomForest.promoted: age = 57.73 0.0129425 0.015468 #> randomForest.promoted: evaluation = 2 -0.0957510 -0.058120 #> randomForest.promoted: gender = male 0.0023685 0.023564 #> randomForest.promoted: hours = 42.32 -0.1719955 -0.156930 #> randomForest.promoted: salary = 2 0.0415060 0.077562plot(bd_rf) # example for regression - apartment prices # here we do not have intreactions model <- randomForest(m2.price ~ . , data = apartments) explainer_rf <- explain(model, data = apartments_test[1:1000, 2:6], y = apartments_test$m2.price[1:1000])#> Preparation of a new explainer is initiated
#>   -> model label       :  randomForest  (  default  )
#>   -> data              :  1000  rows  5  cols
#>   -> target variable   :  1000  values
#>   -> model_info        :  package randomForest , ver. 4.6.14 , task regression (  default  )
#>   -> predict function  :  yhat.randomForest  will be used (  default  )
#>   -> predicted values  :  numerical, min =  2052.033 , mean =  3487.71 , max =  5776.623
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -632.8469 , mean =  1.070017 , max =  1328.352
#>   A new explainer has been created!
bd_rf <- break_down_uncertainty(explainer_rf, apartments_test[1,])
bd_rf#>                                              min        q1     median
#> randomForest: construction.year = 1976 -128.5908 -119.3910  -75.48837
#> randomForest: district = Srodmiescie    981.8193 1036.9753 1054.79081
#> randomForest: floor = 3                 178.8471  189.5230  194.12751
#> randomForest: no.rooms = 5             -229.8610 -225.7194 -212.31243
#> randomForest: surface = 131            -272.2211 -266.0785 -250.70512
#>                                              mean         q3        max
#> randomForest: construction.year = 1976  -82.87975  -50.06424  -47.64365
#> randomForest: district = Srodmiescie   1046.73182 1054.79081 1091.59037
#> randomForest: floor = 3                 197.65920  210.33113  215.52532
#> randomForest: no.rooms = 5             -200.17988 -203.34626 -130.21186
#> randomForest: surface = 131            -250.99715 -234.39585 -229.21426plot(bd_rf) bd_rf <- break_down_uncertainty(explainer_rf, apartments_test[1,], path = 1:5)
plot(bd_rf) bd_rf <- break_down_uncertainty(explainer_rf,
apartments_test[1,],
path = c("floor", "no.rooms", "district",
"construction.year", "surface"))
plot(bd_rf) bd_rf <- shap(explainer_rf,
apartments_test[1,])
bd_rf#>                                              min        q1    median       mean
#> randomForest: construction.year = 1976 -128.5908 -127.7759 -116.5361  -97.94983
#> randomForest: district = Srodmiescie    981.8193 1054.7908 1074.7538 1078.05046
#> randomForest: floor = 3                 159.4690  172.8786  187.2105  190.61682
#> randomForest: no.rooms = 5             -233.0194 -209.3096 -204.8655 -183.54147
#> randomForest: surface = 131            -343.0658 -284.8054 -273.9106 -276.84173
#>                                                q3        max
#> randomForest: construction.year = 1976  -69.15964  -47.64365
#> randomForest: district = Srodmiescie   1100.63043 1139.28110
#> randomForest: floor = 3                 206.40993  215.52532
#> randomForest: no.rooms = 5             -135.54778 -130.21186
#> randomForest: surface = 131            -255.12184 -229.21426plot(bd_rf) plot(bd_rf, show_boxplots = FALSE) # }