This function performs model diagnostic of residuals. Residuals are calculated and plotted against predictions, true y values or selected variables. Find information how to use this function here: https://ema.drwhy.ai/residualDiagnostic.html.
model_diagnostics(explainer, variables = NULL, ...)
a model to be explained, preprocessed by the explain
function
character - name of variables to be explained. Default NULL
stands for all variables
other parameters
An object of the class model_diagnostics
.
It's a data frame with residuals and selected variables.
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://ema.drwhy.ai/
library(DALEX)
apartments_lm_model <- lm(m2.price ~ ., data = apartments)
explainer_lm <- explain(apartments_lm_model,
data = apartments,
y = apartments$m2.price)
#> Preparation of a new explainer is initiated
#> -> model label : lm ( default )
#> -> data : 1000 rows 6 cols
#> -> target variable : 1000 values
#> -> predict function : yhat.lm will be used ( default )
#> -> predicted values : No value for predict function target column. ( default )
#> -> model_info : package stats , ver. 4.2.3 , task regression ( default )
#> -> predicted values : numerical, min = 1781.848 , mean = 3487.019 , max = 6176.032
#> -> residual function : difference between y and yhat ( default )
#> -> residuals : numerical, min = -247.4728 , mean = 2.093656e-14 , max = 469.0023
#> A new explainer has been created!
diag_lm <- model_diagnostics(explainer_lm)
diag_lm
#> m2.price construction.year surface floor
#> Min. :1607 Min. :1920 Min. : 20.00 Min. : 1.000
#> 1st Qu.:2857 1st Qu.:1943 1st Qu.: 53.00 1st Qu.: 3.000
#> Median :3386 Median :1965 Median : 85.50 Median : 6.000
#> Mean :3487 Mean :1965 Mean : 85.59 Mean : 5.623
#> 3rd Qu.:4018 3rd Qu.:1988 3rd Qu.:118.00 3rd Qu.: 8.000
#> Max. :6595 Max. :2010 Max. :150.00 Max. :10.000
#>
#> no.rooms district y y_hat
#> Min. :1.00 Mokotow :107 Min. :1607 Min. :1782
#> 1st Qu.:2.00 Wola :106 1st Qu.:2857 1st Qu.:2879
#> Median :3.00 Ursus :105 Median :3386 Median :3374
#> Mean :3.36 Ursynow :103 Mean :3487 Mean :3487
#> 3rd Qu.:4.00 Srodmiescie:100 3rd Qu.:4018 3rd Qu.:3932
#> Max. :6.00 Bemowo : 98 Max. :6595 Max. :6176
#> (Other) :381
#> residuals abs_residuals label ids
#> Min. :-247.5 Min. :134.9 Length:1000 Min. : 1.0
#> 1st Qu.:-202.8 1st Qu.:180.3 Class :character 1st Qu.: 250.8
#> Median :-172.8 Median :212.4 Mode :character Median : 500.5
#> Mean : 0.0 Mean :260.0 Mean : 500.5
#> 3rd Qu.: 381.4 3rd Qu.:381.4 3rd Qu.: 750.2
#> Max. : 469.0 Max. :469.0 Max. :1000.0
#>
plot(diag_lm)
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# \donttest{
library("ranger")
apartments_ranger_model <- ranger(m2.price ~ ., data = apartments)
explainer_ranger <- explain(apartments_ranger_model,
data = apartments,
y = apartments$m2.price)
#> Preparation of a new explainer is initiated
#> -> model label : ranger ( default )
#> -> data : 1000 rows 6 cols
#> -> target variable : 1000 values
#> -> predict function : yhat.ranger will be used ( default )
#> -> predicted values : No value for predict function target column. ( default )
#> -> model_info : package ranger , ver. 0.14.1 , task regression ( default )
#> -> predicted values : numerical, min = 1905.07 , mean = 3489.776 , max = 6151.471
#> -> residual function : difference between y and yhat ( default )
#> -> residuals : numerical, min = -418.6815 , mean = -2.757442 , max = 616.1715
#> A new explainer has been created!
diag_ranger <- model_diagnostics(explainer_ranger)
diag_ranger
#> m2.price construction.year surface floor
#> Min. :1607 Min. :1920 Min. : 20.00 Min. : 1.000
#> 1st Qu.:2857 1st Qu.:1943 1st Qu.: 53.00 1st Qu.: 3.000
#> Median :3386 Median :1965 Median : 85.50 Median : 6.000
#> Mean :3487 Mean :1965 Mean : 85.59 Mean : 5.623
#> 3rd Qu.:4018 3rd Qu.:1988 3rd Qu.:118.00 3rd Qu.: 8.000
#> Max. :6595 Max. :2010 Max. :150.00 Max. :10.000
#>
#> no.rooms district y y_hat
#> Min. :1.00 Mokotow :107 Min. :1607 Min. :1905
#> 1st Qu.:2.00 Wola :106 1st Qu.:2857 1st Qu.:2939
#> Median :3.00 Ursus :105 Median :3386 Median :3416
#> Mean :3.36 Ursynow :103 Mean :3487 Mean :3490
#> 3rd Qu.:4.00 Srodmiescie:100 3rd Qu.:4018 3rd Qu.:3957
#> Max. :6.00 Bemowo : 98 Max. :6595 Max. :6151
#> (Other) :381
#> residuals abs_residuals label ids
#> Min. :-418.682 Min. : 0.0276 Length:1000 Min. : 1.0
#> 1st Qu.: -92.066 1st Qu.: 38.0046 Class :character 1st Qu.: 250.8
#> Median : -24.667 Median : 82.4687 Mode :character Median : 500.5
#> Mean : -2.757 Mean :107.3614 Mean : 500.5
#> 3rd Qu.: 56.946 3rd Qu.:151.5035 3rd Qu.: 750.2
#> Max. : 616.172 Max. :616.1715 Max. :1000.0
#>
plot(diag_ranger)
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plot(diag_ranger, diag_lm)
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plot(diag_ranger, diag_lm, variable = "y")
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plot(diag_ranger, diag_lm, variable = "construction.year")
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plot(diag_ranger, variable = "y", yvariable = "y_hat")
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plot(diag_ranger, variable = "y", yvariable = "abs_residuals")
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plot(diag_ranger, variable = "ids")
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# }