Function model_performance() calculates various performance measures for classification and regression models. For classification models following measures are calculated: F1, accuracy, recall, precision and AUC. For regression models following measures are calculated: mean squared error, R squared, median absolute deviation.

model_performance(explainer, ..., cutoff = 0.5)

Arguments

explainer

a model to be explained, preprocessed by the explain function

...

other parameters

cutoff

a cutoff for classification models, needed for measures like recall, precision, ACC, F1. By default 0.5.

Value

An object of the class model_performance.

It's a list with following fields:

  • residuals - data frame that contains residuals for each observation

  • measures - list with calculated measures that are dedicated for the task, whether it is regression, binary classification or multiclass classification.

  • type - character that specifies type of the task.

References

Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. https://ema.drwhy.ai/

Examples

# \donttest{
# regression

library("ranger")
apartments_ranger_model <- ranger(m2.price~., data = apartments, num.trees = 50)
explainer_ranger_apartments  <- explain(apartments_ranger_model, data = apartments[,-1],
                             y = apartments$m2.price, label = "Ranger Apartments")
#> Preparation of a new explainer is initiated
#>   -> model label       :  Ranger Apartments 
#>   -> data              :  1000  rows  5  cols 
#>   -> target variable   :  1000  values 
#>   -> predict function  :  yhat.ranger  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package ranger , ver. 0.14.1 , task regression (  default  ) 
#>   -> predicted values  :  numerical, min =  1822.446 , mean =  3486.255 , max =  6130.046  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -481.838 , mean =  0.7641192 , max =  785.1507  
#>   A new explainer has been created!  
model_performance_ranger_aps <- model_performance(explainer_ranger_apartments )
model_performance_ranger_aps
#> Measures for:  regression
#> mse        : 28419.56 
#> rmse       : 168.581 
#> r2         : 0.9653955 
#> mad        : 95.57533
#> 
#> Residuals:
#>          0%         10%         20%         30%         40%         50% 
#> -481.838000 -177.822330 -120.084067  -85.170321  -56.779138  -24.095667 
#>         60%         70%         80%         90%        100% 
#>    5.484505   48.352567  115.271415  208.192433  785.150667 
plot(model_performance_ranger_aps)

plot(model_performance_ranger_aps, geom = "boxplot")

plot(model_performance_ranger_aps, geom = "histogram")


# binary classification

titanic_glm_model <- glm(survived~., data = titanic_imputed, family = "binomial")
explainer_glm_titanic <- explain(titanic_glm_model, data = titanic_imputed[,-8],
                         y = titanic_imputed$survived)
#> Preparation of a new explainer is initiated
#>   -> model label       :  lm  (  default  )
#>   -> data              :  2207  rows  7  cols 
#>   -> target variable   :  2207  values 
#>   -> predict function  :  yhat.glm  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package stats , ver. 4.2.1 , task classification (  default  ) 
#>   -> predicted values  :  numerical, min =  0.008128381 , mean =  0.3221568 , max =  0.9731431  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -0.9628583 , mean =  -2.569729e-10 , max =  0.9663346  
#>   A new explainer has been created!  
model_performance_glm_titanic <- model_performance(explainer_glm_titanic)
model_performance_glm_titanic
#> Measures for:  classification
#> recall     : 0.5738397 
#> precision  : 0.7472527 
#> f1         : 0.6491647 
#> accuracy   : 0.8001812 
#> auc        : 0.8115462
#> 
#> Residuals:
#>          0%         10%         20%         30%         40%         50% 
#> -0.96285832 -0.32240247 -0.23986439 -0.19544185 -0.14842925 -0.11460334 
#>         60%         70%         80%         90%        100% 
#> -0.06940964  0.06185475  0.29607060  0.72120412  0.96633458 
plot(model_performance_glm_titanic)

plot(model_performance_glm_titanic, geom = "boxplot")

plot(model_performance_glm_titanic, geom = "histogram")


# multilabel classification

HR_ranger_model <- ranger(status~., data = HR, num.trees = 50,
                               probability = TRUE)
explainer_ranger_HR  <- explain(HR_ranger_model, data = HR[,-6],
                             y = HR$status, label = "Ranger HR")
#> Preparation of a new explainer is initiated
#>   -> model label       :  Ranger HR 
#>   -> data              :  7847  rows  5  cols 
#>   -> target variable   :  7847  values 
#>   -> predict function  :  yhat.ranger  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package ranger , ver. 0.14.1 , task multiclass (  default  ) 
#>   -> predicted values  :  predict function returns multiple columns:  3  (  default  ) 
#>   -> residual function :  difference between 1 and probability of true class (  default  )
#>   -> residuals         :  numerical, min =  0 , mean =  0.2770326 , max =  0.9174926  
#>   A new explainer has been created!  
model_performance_ranger_HR <- model_performance(explainer_ranger_HR)
model_performance_ranger_HR
#> Measures for:  multiclass
#> micro_F1   : 0.8696317 
#> macro_F1   : 0.8676848 
#> w_macro_F1 : 0.8684779 
#> accuracy   : 0.8696317 
#> w_macro_auc: 0.9767629
#> 
#> Residuals:
#>         0%        10%        20%        30%        40%        50%        60% 
#> 0.00000000 0.02485591 0.05939594 0.11407525 0.17323583 0.23985122 0.31184419 
#>        70%        80%        90%       100% 
#> 0.39089087 0.48552726 0.59656336 0.91749257 
plot(model_performance_ranger_HR)

plot(model_performance_ranger_HR, geom = "boxplot")

plot(model_performance_ranger_HR, geom = "histogram")


# }