This function performs model diagnostic of residuals. Residuals are calculated and plotted against predictions, true y values or selected variables. Find information how to use this function here: https://ema.drwhy.ai/residualDiagnostic.html.

model_diagnostics(explainer, variables = NULL, ...)

Arguments

explainer

a model to be explained, preprocessed by the explain function

variables

character - name of variables to be explained. Default NULL stands for all variables

...

other parameters

Value

An object of the class model_diagnostics. It's a data frame with residuals and selected variables.

References

Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://ema.drwhy.ai/

Examples

library(DALEX)
apartments_lm_model <- lm(m2.price ~ ., data = apartments)
explainer_lm <- explain(apartments_lm_model,
                         data = apartments,
                         y = apartments$m2.price)
#> Preparation of a new explainer is initiated
#>   -> model label       :  lm  (  default  )
#>   -> data              :  1000  rows  6  cols 
#>   -> target variable   :  1000  values 
#>   -> predict function  :  yhat.lm  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package stats , ver. 4.2.0 , task regression (  default  ) 
#>   -> predicted values  :  numerical, min =  1781.848 , mean =  3487.019 , max =  6176.032  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -247.4728 , mean =  2.093656e-14 , max =  469.0023  
#>   A new explainer has been created!  
diag_lm <- model_diagnostics(explainer_lm)
diag_lm
#>     m2.price    construction.year    surface           floor       
#>  Min.   :1607   Min.   :1920      Min.   : 20.00   Min.   : 1.000  
#>  1st Qu.:2857   1st Qu.:1943      1st Qu.: 53.00   1st Qu.: 3.000  
#>  Median :3386   Median :1965      Median : 85.50   Median : 6.000  
#>  Mean   :3487   Mean   :1965      Mean   : 85.59   Mean   : 5.623  
#>  3rd Qu.:4018   3rd Qu.:1988      3rd Qu.:118.00   3rd Qu.: 8.000  
#>  Max.   :6595   Max.   :2010      Max.   :150.00   Max.   :10.000  
#>                                                                    
#>     no.rooms           district         y            y_hat     
#>  Min.   :1.00   Mokotow    :107   Min.   :1607   Min.   :1782  
#>  1st Qu.:2.00   Wola       :106   1st Qu.:2857   1st Qu.:2879  
#>  Median :3.00   Ursus      :105   Median :3386   Median :3374  
#>  Mean   :3.36   Ursynow    :103   Mean   :3487   Mean   :3487  
#>  3rd Qu.:4.00   Srodmiescie:100   3rd Qu.:4018   3rd Qu.:3932  
#>  Max.   :6.00   Bemowo     : 98   Max.   :6595   Max.   :6176  
#>                 (Other)    :381                                
#>    residuals      abs_residuals      label                ids        
#>  Min.   :-247.5   Min.   :134.9   Length:1000        Min.   :   1.0  
#>  1st Qu.:-202.8   1st Qu.:180.3   Class :character   1st Qu.: 250.8  
#>  Median :-172.8   Median :212.4   Mode  :character   Median : 500.5  
#>  Mean   :   0.0   Mean   :260.0                      Mean   : 500.5  
#>  3rd Qu.: 381.4   3rd Qu.:381.4                      3rd Qu.: 750.2  
#>  Max.   : 469.0   Max.   :469.0                      Max.   :1000.0  
#>                                                                      
plot(diag_lm)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# \donttest{
library("ranger")
apartments_ranger_model <- ranger(m2.price ~ ., data = apartments)
explainer_ranger <- explain(apartments_ranger_model,
                         data = apartments,
                         y = apartments$m2.price)
#> Preparation of a new explainer is initiated
#>   -> model label       :  ranger  (  default  )
#>   -> data              :  1000  rows  6  cols 
#>   -> target variable   :  1000  values 
#>   -> predict function  :  yhat.ranger  will be used (  default  )
#>   -> predicted values  :  No value for predict function target column. (  default  )
#>   -> model_info        :  package ranger , ver. 0.13.1 , task regression (  default  ) 
#>   -> predicted values  :  numerical, min =  1889.293 , mean =  3488.395 , max =  6142.108  
#>   -> residual function :  difference between y and yhat (  default  )
#>   -> residuals         :  numerical, min =  -399.8979 , mean =  -1.376414 , max =  590.2318  
#>   A new explainer has been created!  
diag_ranger <- model_diagnostics(explainer_ranger)
diag_ranger
#>     m2.price    construction.year    surface           floor       
#>  Min.   :1607   Min.   :1920      Min.   : 20.00   Min.   : 1.000  
#>  1st Qu.:2857   1st Qu.:1943      1st Qu.: 53.00   1st Qu.: 3.000  
#>  Median :3386   Median :1965      Median : 85.50   Median : 6.000  
#>  Mean   :3487   Mean   :1965      Mean   : 85.59   Mean   : 5.623  
#>  3rd Qu.:4018   3rd Qu.:1988      3rd Qu.:118.00   3rd Qu.: 8.000  
#>  Max.   :6595   Max.   :2010      Max.   :150.00   Max.   :10.000  
#>                                                                    
#>     no.rooms           district         y            y_hat     
#>  Min.   :1.00   Mokotow    :107   Min.   :1607   Min.   :1889  
#>  1st Qu.:2.00   Wola       :106   1st Qu.:2857   1st Qu.:2940  
#>  Median :3.00   Ursus      :105   Median :3386   Median :3416  
#>  Mean   :3.36   Ursynow    :103   Mean   :3487   Mean   :3488  
#>  3rd Qu.:4.00   Srodmiescie:100   3rd Qu.:4018   3rd Qu.:3963  
#>  Max.   :6.00   Bemowo     : 98   Max.   :6595   Max.   :6142  
#>                 (Other)    :381                                
#>    residuals        abs_residuals         label                ids        
#>  Min.   :-399.898   Min.   :  0.2762   Length:1000        Min.   :   1.0  
#>  1st Qu.: -89.990   1st Qu.: 39.9885   Class :character   1st Qu.: 250.8  
#>  Median : -26.908   Median : 82.5867   Mode  :character   Median : 500.5  
#>  Mean   :  -1.376   Mean   :108.8013                      Mean   : 500.5  
#>  3rd Qu.:  62.852   3rd Qu.:148.7288                      3rd Qu.: 750.2  
#>  Max.   : 590.232   Max.   :590.2318                      Max.   :1000.0  
#>                                                                           
plot(diag_ranger)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot(diag_ranger, diag_lm)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot(diag_ranger, diag_lm, variable = "y")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot(diag_ranger, diag_lm, variable = "construction.year")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot(diag_ranger, variable = "y", yvariable = "y_hat")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot(diag_ranger, variable = "y", yvariable = "abs_residuals")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot(diag_ranger, variable = "ids")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# }