Bayesian Optimization takes relatively a long time - the bigger `iters.n` param, the more time (but if you want to get model parameters better than default params, it is suggested to set `iters.n` equals 20 at least. Also the bigger dataset, the more time takes Bayesian Optimization.

train_models_bayesopt(
  train_data,
  y,
  test_data,
  engine,
  type,
  iters.n = 7,
  return_params = FALSE,
  verbose = TRUE
)

Arguments

train_data

A training data for models created by `prepare_data()` function.

y

A string that indicates a target column name.

test_data

A test data for models created by `prepare_data()` function.

engine

A vector of tree-based models that shall be created. Possible values are: `ranger`, `xgboost`, `decision_tree`, `lightgbm`, `catboost`.

type

A string which determines if Machine Learning task is the `binary_clf` or `regression`.

iters.n

The number of iterations of BayesOpt function.

return_params

If TRUE, returns optimized model params.

verbose

A logical value, if set to TRUE, provides all information about the process, if FALSE gives none.

Value

Trained models with optimized parameters. If `retun_params` is `TRUE`, then returns also training parameters in the one list with models.

Examples

# Binary classification
data(iris)
iris_bin          <- iris[1:100, ]
type              <- guess_type(iris_bin, 'Species')
preprocessed_data <- preprocessing(iris_bin, 'Species', type)
#> Error in if (advanced) {    del_cor <- delete_correlated_values(pre_data, y, verbose = verbose)    pre_data <- del_cor$data    pre_data <- delete_id_columns(pre_data)    pre_data <- boruta_selection(pre_data, y)}: argument is not interpretable as logical
preprocessed_data <- preprocessed_data$data
#> Error in eval(expr, envir, enclos): object 'preprocessed_data' not found
split_data <-
  train_test_balance(preprocessed_data, 'Species', balance = FALSE)
#> Error in train_test_balance(preprocessed_data, "Species", balance = FALSE): object 'preprocessed_data' not found
train_data <-
  prepare_data(split_data$train,
               'Species',
               c('ranger', 'xgboost', 'decision_tree', 'lightgbm', 'catboost'))
#> Error in as.data.frame(unclass(data), stringsAsFactors = TRUE): object 'split_data' not found
test_data <-
  prepare_data(split_data$test,
               'Species',
               engine = c('ranger', 'xgboost', 'decision_tree', 'lightgbm', 'catboost'),
               predict = TRUE,
               train = split_data$train)
#> Error in as.data.frame(unclass(data), stringsAsFactors = TRUE): object 'split_data' not found

models <- train_models_bayesopt(train_data,
                               'Species',
                               test_data,
                               engine = c('ranger', 'xgboost', 'decision_tree',
                               'lightgbm', 'catboost'),
                               type = type,
                               iters.n = 1,)
#> 
#> Error in ranger::ranger(dependent.variable.name = y, data = train_data$ranger_data): object 'train_data' not found

# Regression
type              <- guess_type(lisbon, 'Price')
preprocessed_data <- preprocessing(lisbon, 'Price', type)
#> Error in if (advanced) {    del_cor <- delete_correlated_values(pre_data, y, verbose = verbose)    pre_data <- del_cor$data    pre_data <- delete_id_columns(pre_data)    pre_data <- boruta_selection(pre_data, y)}: argument is not interpretable as logical
preprocessed_data <- preprocessed_data$data
#> Error in eval(expr, envir, enclos): object 'preprocessed_data' not found
split_data2 <-
  train_test_balance(preprocessed_data,
                     y = 'Price',
                     balance = FALSE)
#> Error in train_test_balance(preprocessed_data, y = "Price", balance = FALSE): object 'preprocessed_data' not found
train_data2 <- prepare_data(split_data2$train,
                     y = 'Price',
                     engine = c('ranger', 'xgboost', 'decision_tree', 'lightgbm', 'catboost')
)
#> Error in as.data.frame(unclass(data), stringsAsFactors = TRUE): object 'split_data2' not found
test_data2 <-
  prepare_data(split_data2$test,
               'Price',
               engine = c('ranger', 'xgboost', 'decision_tree', 'lightgbm', 'catboost'),
               predict = TRUE,
               train = split_data2$train)
#> Error in as.data.frame(unclass(data), stringsAsFactors = TRUE): object 'split_data2' not found


models2 <-
   train_models_bayesopt(train_data2,
                        'Price',
                         test_data2,
                         engine = c('ranger', 'xgboost', 'decision_tree', 'lightgbm', 'catboost'),
                         type = type,
                         iters.n = 1)
#> 
#> Error in ranger::ranger(dependent.variable.name = y, data = train_data$ranger_data): object 'train_data2' not found