From DALEX version 1.0 this function calls the feature_importance Find information how to use this function here: https://ema.drwhy.ai/featureImportance.html.

model_parts(
  explainer,
  loss_function = loss_default(explainer$model_info$type),
  ...,
  type = "variable_importance",
  N = n_sample,
  n_sample = 1000
)

Arguments

explainer

a model to be explained, preprocessed by the explain function

loss_function

a function that will be used to assess variable importance. By default it is 1-AUC for classification, cross entropy for multilabel classification and RMSE for regression. Custom, user-made loss function should accept two obligatory parameters (observed, predicted), where observed states for actual values of the target, while predicted for predicted values. If attribute "loss_accuracy" is associated with function object, then it will be plotted as name of the loss function.

...

other parameters

type

character, type of transformation that should be applied for dropout loss. variable_importance and raw results raw drop lossess, ratio returns drop_loss/drop_loss_full_model while difference returns drop_loss - drop_loss_full_model

N

number of observations that should be sampled for calculation of variable importance. If NULL then variable importance will be calculated on whole dataset (no sampling).

n_sample

alias for N held for backwards compatibility. number of observations that should be sampled for calculation of variable importance.

Value

An object of the class feature_importance. It's a data frame with calculated average response.

References

Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://ema.drwhy.ai/

Examples

# \donttest{
# regression

library("ranger")
apartments_ranger_model <- ranger(m2.price~., data = apartments, num.trees = 50)
explainer_ranger  <- explain(apartments_ranger_model, data = apartments[,-1],
                             y = apartments$m2.price, label = "Ranger Apartments")
#> Preparation of a new explainer is initiated
#>   -> model label       :  Ranger Apartments 
#>   -> data              :  1000  rows  5  cols 
#>   -> target variable   :  1000  values 
#>   -> predict function  :  yhat.ranger  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package ranger , ver. 0.13.1 , task regression (  default  ) 
#>   -> predicted values  :  numerical, min =  1852.294 , mean =  3493.404 , max =  6319.938  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -473.439 , mean =  -6.385156 , max =  617.3186  
#>   A new explainer has been created!  
model_parts_ranger_aps <- model_parts(explainer_ranger, type = "raw")
head(model_parts_ranger_aps, 8)
#>            variable mean_dropout_loss             label
#> 1      _full_model_          157.4331 Ranger Apartments
#> 2          no.rooms          308.8444 Ranger Apartments
#> 3 construction.year          401.4053 Ranger Apartments
#> 4             floor          448.1740 Ranger Apartments
#> 5           surface          542.4700 Ranger Apartments
#> 6          district          749.6691 Ranger Apartments
#> 7        _baseline_         1202.7558 Ranger Apartments
plot(model_parts_ranger_aps)


# binary classification

titanic_glm_model <- glm(survived~., data = titanic_imputed, family = "binomial")
explainer_glm_titanic <- explain(titanic_glm_model, data = titanic_imputed[,-8],
                         y = titanic_imputed$survived)
#> Preparation of a new explainer is initiated
#>   -> model label       :  lm  (  default  )
#>   -> data              :  2207  rows  7  cols 
#>   -> target variable   :  2207  values 
#>   -> predict function  :  yhat.glm  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package stats , ver. 4.2.0 , task classification (  default  ) 
#>   -> predicted values  :  numerical, min =  0.008128381 , mean =  0.3221568 , max =  0.9731431  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -0.9628583 , mean =  -2.569729e-10 , max =  0.9663346  
#>   A new explainer has been created!  
logit <- function(x) exp(x)/(1+exp(x))
custom_loss <- function(observed, predicted){
   sum((observed - logit(predicted))^2)
}
attr(custom_loss, "loss_name") <- "Logit residuals"
model_parts_glm_titanic <- model_parts(explainer_glm_titanic, type = "raw",
                                       loss_function = custom_loss)
head(model_parts_glm_titanic, 8)
#>       variable mean_dropout_loss label
#> 1 _full_model_          255.6221    lm
#> 2        parch          255.6110    lm
#> 3         fare          255.7387    lm
#> 4        sibsp          256.0173    lm
#> 5     embarked          256.8066    lm
#> 6          age          258.4443    lm
#> 7        class          265.7038    lm
#> 8       gender          277.5022    lm
plot(model_parts_glm_titanic)


# multilabel classification

HR_ranger_model_HR <- ranger(status~., data = HR, num.trees = 50,
                               probability = TRUE)
explainer_ranger_HR  <- explain(HR_ranger_model_HR, data = HR[,-6],
                             y = HR$status, label = "Ranger HR")
#> Preparation of a new explainer is initiated
#>   -> model label       :  Ranger HR 
#>   -> data              :  7847  rows  5  cols 
#>   -> target variable   :  7847  values 
#>   -> predict function  :  yhat.ranger  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package ranger , ver. 0.13.1 , task multiclass (  default  ) 
#>   -> predicted values  :  predict function returns multiple columns:  3  (  default  ) 
#>   -> residual function :  difference between 1 and probability of true class (  default  )
#>   -> residuals         :  numerical, min =  0 , mean =  0.279479 , max =  0.918585  
#>   A new explainer has been created!  
model_parts_ranger_HR <- model_parts(explainer_ranger_HR, type = "raw")
head(model_parts_ranger_HR, 8)
#>       variable mean_dropout_loss     label
#> 1 _full_model_          379.9046 Ranger HR
#> 2       gender          547.5147 Ranger HR
#> 3          age          620.3475 Ranger HR
#> 4       salary          699.4666 Ranger HR
#> 5   evaluation          912.0855 Ranger HR
#> 6        hours         1240.7030 Ranger HR
#> 7   _baseline_         2151.2435 Ranger HR
plot(model_parts_ranger_HR)


# }