Function `break_down_uncertainty()` calls `B` times the break down algorithm for random orderings. Then it calculated distribution of attributions for these different orderings. Note that the `shap()` function is just a simplified interface to the `break_down_uncertainty()` function with a default value set to `B=25`.

break_down_uncertainty(x, ..., keep_distributions = TRUE, B = 10)

# S3 method for explainer
break_down_uncertainty(x, new_observation, ...,
  keep_distributions = TRUE, B = 10)

# S3 method for default
break_down_uncertainty(x, data,
  predict_function = predict, new_observation, label = class(x)[1],
  ..., path = NULL, keep_distributions = TRUE, B = 10)

shap(x, ..., B = 25)

Arguments

x

an explainer created with function explain or a model.

...

other parameters.

keep_distributions

if TRUE then we will keep distribution for predicted values. It's needed by the describe function.

B

number of random paths

new_observation

a new observation with columns that correspond to variables used in the model.

data

validation dataset, will be extracted from `x` if it is an explainer.

predict_function

predict function, will be extracted from `x` if it is an explainer.

label

name of the model. By default it's extracted from the 'class' attribute of the model.

path

if specified, then this path will be highlighed on the plot. Use `average` in order to show an average effect

Value

an object of the `break_down_uncertainty` class.

References

Predictive Models: Visual Exploration, Explanation and Debugging https://pbiecek.github.io/PM_VEE

See also

Examples

library("DALEX") library("iBreakDown") # Toy examples, because CRAN angels ask for them titanic <- na.omit(titanic) set.seed(1313) titanic_small <- titanic[sample(1:nrow(titanic), 500), c(1,2,6,9)] model_titanic_glm <- glm(survived == "yes" ~ gender + age + fare, data = titanic_small, family = "binomial") explain_titanic_glm <- explain(model_titanic_glm, data = titanic_small[,-9], y = titanic_small$survived == "yes")
#> Preparation of a new explainer is initiated #> -> model label : lm (default) #> -> data : 500 rows 4 cols #> -> target variable : 500 values #> -> predict function : yhat.glm will be used (default) #> -> predicted values : numerical, min = 0.111212 , mean = 0.298 , max = 0.9430377 #> -> residual function : difference between y and yhat (default) #> -> residuals : numerical, min = -0.789032 , mean = 1.799189e-14 , max = 0.8594593 #> A new explainer has been created!
# there is no explanation level uncertanity linked with additive models bd_rf <- break_down_uncertainty(explain_titanic_glm, titanic_small[1, ]) bd_rf
#> min q1 median mean #> lm: age = 50 -0.041452499 -0.04145250 -0.038439639 -0.039633973 #> lm: fare = 13 -0.005977352 -0.00537534 -0.005339308 -0.005413922 #> lm: gender = male -0.102615297 -0.10261530 -0.102615297 -0.101346348 #> lm: survived = no 0.000000000 0.00000000 0.000000000 0.000000000 #> q3 max #> lm: age = 50 -0.038412614 -0.038403606 #> lm: fare = 13 -0.005339308 -0.005339308 #> lm: gender = male -0.099602437 -0.098964393 #> lm: survived = no 0.000000000 0.000000000
plot(bd_rf)
# \donttest{ ## Not run: library("randomForest") set.seed(1313) model <- randomForest(status ~ . , data = HR) new_observation <- HR_test[1,] explainer_rf <- explain(model, data = HR[1:1000, 1:5])
#> Preparation of a new explainer is initiated #> -> model label : randomForest (default) #> -> data : 1000 rows 5 cols #> -> target variable : not specified! (WARNING) #> -> predict function : yhat.randomForest will be used (default)
#> Warning: the condition has length > 1 and only the first element will be used
#> -> predicted values : numerical, min = 0 , mean = 0.3333333 , max = 1 #> -> residual function : difference between y and yhat (default) #> A new explainer has been created!
bd_rf <- break_down_uncertainty(explainer_rf, new_observation) bd_rf
#> min q1 median mean #> randomForest.fired: age = 58 -0.021328 0.0247710 0.253395 0.1946016 #> randomForest.fired: evaluation = 2 -0.018856 0.0073270 0.032725 0.0216108 #> randomForest.fired: gender = male -0.009380 0.0054740 0.019250 0.0911182 #> randomForest.fired: hours = 42 0.167650 0.1953890 0.220689 0.2461712 #> randomForest.fired: salary = 2 -0.270298 -0.1751675 -0.160058 -0.1610878 #> randomForest.ok: age = 58 -0.346842 -0.3468420 -0.199269 -0.1834688 #> randomForest.ok: evaluation = 2 0.028666 0.1002960 0.125760 0.1215018 #> randomForest.ok: gender = male -0.282642 -0.1062540 -0.021756 -0.0845928 #> randomForest.ok: hours = 42 -0.106876 -0.0970580 -0.046824 -0.0447352 #> randomForest.ok: salary = 2 0.046824 0.1184785 0.118552 0.1311450 #> randomForest.promoted: age = 58 -0.126732 -0.0061320 -0.006132 -0.0111328 #> randomForest.promoted: evaluation = 2 -0.201822 -0.1749640 -0.166262 -0.1431126 #> randomForest.promoted: gender = male -0.045880 -0.0019940 -0.000019 -0.0065254 #> randomForest.promoted: hours = 42 -0.247972 -0.2398625 -0.189205 -0.2014360 #> randomForest.promoted: salary = 2 -0.003902 0.0069900 0.034329 0.0299428 #> q3 max #> randomForest.fired: age = 58 0.3529740 0.362800 #> randomForest.fired: evaluation = 2 0.0418870 0.045408 #> randomForest.fired: gender = male 0.1521340 0.280686 #> randomForest.fired: hours = 42 0.3072255 0.351330 #> randomForest.fired: salary = 2 -0.1390850 -0.070866 #> randomForest.ok: age = 58 -0.0178415 0.005860 #> randomForest.ok: evaluation = 2 0.1307830 0.196252 #> randomForest.ok: gender = male -0.0061560 -0.003480 #> randomForest.ok: hours = 42 0.0010015 0.030996 #> randomForest.ok: salary = 2 0.1542220 0.268992 #> randomForest.promoted: age = 58 0.0129425 0.015468 #> randomForest.promoted: evaluation = 2 -0.0957510 -0.058120 #> randomForest.promoted: gender = male 0.0023685 0.023564 #> randomForest.promoted: hours = 42 -0.1719955 -0.156930 #> randomForest.promoted: salary = 2 0.0415060 0.077562
plot(bd_rf)
# example for regression - apartment prices # here we do not have intreactions model <- randomForest(m2.price ~ . , data = apartments) explainer_rf <- explain(model, data = apartments_test[1:1000, 2:6], y = apartments_test$m2.price[1:1000])
#> Preparation of a new explainer is initiated #> -> model label : randomForest (default) #> -> data : 1000 rows 5 cols #> -> target variable : 1000 values #> -> predict function : yhat.randomForest will be used (default) #> -> predicted values : numerical, min = 2052.033 , mean = 3487.71 , max = 5776.623 #> -> residual function : difference between y and yhat (default) #> -> residuals : numerical, min = -632.8469 , mean = 1.070017 , max = 1328.352 #> A new explainer has been created!
bd_rf <- break_down_uncertainty(explainer_rf, apartments_test[1,]) bd_rf
#> min q1 median #> randomForest: construction.year = 2000 -128.5908 -119.3910 -75.48837 #> randomForest: district = Srodmiescie 981.8193 1036.9753 1054.79081 #> randomForest: floor = 3 178.8471 189.5230 194.12751 #> randomForest: no.rooms = 5 -229.8610 -225.7194 -212.31243 #> randomForest: surface = 130 -272.2211 -266.0785 -250.70512 #> mean q3 max #> randomForest: construction.year = 2000 -82.87975 -50.06424 -47.64365 #> randomForest: district = Srodmiescie 1046.73182 1054.79081 1091.59037 #> randomForest: floor = 3 197.65920 210.33113 215.52532 #> randomForest: no.rooms = 5 -200.17988 -203.34626 -130.21186 #> randomForest: surface = 130 -250.99715 -234.39585 -229.21426
plot(bd_rf)
bd_rf <- break_down_uncertainty(explainer_rf, apartments_test[1,], path = 1:5) plot(bd_rf)
bd_rf <- break_down_uncertainty(explainer_rf, apartments_test[1,], path = c("floor", "no.rooms", "district", "construction.year", "surface")) plot(bd_rf)
bd_rf <- shap(explainer_rf, apartments_test[1,]) bd_rf
#> min q1 median mean #> randomForest: construction.year = 2000 -128.5908 -127.7759 -116.5361 -97.94983 #> randomForest: district = Srodmiescie 981.8193 1054.7908 1074.7538 1078.05046 #> randomForest: floor = 3 159.4690 172.8786 187.2105 190.61682 #> randomForest: no.rooms = 5 -233.0194 -209.3096 -204.8655 -183.54147 #> randomForest: surface = 130 -343.0658 -284.8054 -273.9106 -276.84173 #> q3 max #> randomForest: construction.year = 2000 -69.15964 -47.64365 #> randomForest: district = Srodmiescie 1100.63043 1139.28110 #> randomForest: floor = 3 206.40993 215.52532 #> randomForest: no.rooms = 5 -135.54778 -130.21186 #> randomForest: surface = 130 -255.12184 -229.21426
plot(bd_rf)
plot(bd_rf, show_boxplots = FALSE)
# }