diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index ece168a04d77..f53a83805a2e 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -1,7 +1,8 @@ require(xgboost) require(methods) + # we load in the agaricus dataset -# In this example, we are aiming to predict whether a mushroom can be eaten +# In this example, we are aiming to predict whether a mushroom is edible data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') train <- agaricus.train @@ -15,33 +16,33 @@ class(train$data) # note: we are putting in sparse matrix here, xgboost naturally handles sparse input # use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector) print("Training xgboost with sparseMatrix") -bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, +bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, objective = "binary:logistic") # alternatively, you can put in dense matrix, i.e. basic R-matrix print("Training xgboost with Matrix") -bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, +bst <- xgboost(data = as.matrix(train$data), label = train$label, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, objective = "binary:logistic") # you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features print("Training xgboost with xgb.DMatrix") dtrain <- xgb.DMatrix(data = train$data, label = train$label) -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, objective = "binary:logistic") # Verbose = 0,1,2 print("Train xgboost with verbose 0, no message") -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, objective = "binary:logistic", verbose = 0) print("Train xgboost with verbose 1, print evaluation metric") -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, objective = "binary:logistic", verbose = 1) print("Train xgboost with verbose 2, also print information about tree") -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, objective = "binary:logistic", verbose = 2) # you can also specify data as file path to a LibSVM format input # since we do not have this file with us, the following line is just for illustration -# bst <- xgboost(data = 'agaricus.train.svm', max.depth = 2, eta = 1, nround = 2,objective = "binary:logistic") +# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic") #--------------------basic prediction using xgboost-------------- # you can do prediction using the following line @@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest) # to train with watchlist, use xgb.train, which contains more advanced features # watchlist allows us to monitor the evaluation result on all data in the list print("Train xgboost using xgb.train with watchlist") -bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, +bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist, nthread = 2, objective = "binary:logistic") # we can change evaluation metrics, or use multiple evaluation metrics print("train xgboost using xgb.train with watchlist, watch logloss and error") -bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, - eval.metric = "error", eval.metric = "logloss", +bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist, + eval_metric = "error", eval_metric = "logloss", nthread = 2, objective = "binary:logistic") # xgb.DMatrix can also be saved using xgb.DMatrix.save xgb.DMatrix.save(dtrain, "dtrain.buffer") # to load it in, simply call xgb.DMatrix dtrain2 <- xgb.DMatrix("dtrain.buffer") -bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist, +bst <- xgb.train(data=dtrain2, max_depth=2, eta=1, nrounds=2, watchlist=watchlist, nthread = 2, objective = "binary:logistic") # information can be extracted from xgb.DMatrix using getinfo label = getinfo(dtest, "label") @@ -98,11 +99,11 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label) print(paste("test-error=", err)) # You can dump the tree you learned using xgb.dump into a text file -xgb.dump(bst, "dump.raw.txt", with.stats = T) +xgb.dump(bst, "dump.raw.txt", with_stats = T) # Finally, you can check which features are the most important. print("Most important features (look at column Gain):") -imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], model = bst) +imp_matrix <- xgb.importance(feature_names = colnames(train$data), model = bst) print(imp_matrix) # Feature importance bar plot by gain diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R index 7fa7d8545de4..17656507f260 100644 --- a/R-package/demo/boost_from_prediction.R +++ b/R-package/demo/boost_from_prediction.R @@ -11,8 +11,8 @@ watchlist <- list(eval = dtest, train = dtrain) # print('start running example to start from a initial prediction') # train xgboost for 1 round -param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic') -bst <- xgb.train( param, dtrain, 1, watchlist ) +param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic') +bst <- xgb.train(param, dtrain, 1, watchlist) # Note: we need the margin value instead of transformed prediction in set_base_margin # do predict with output_margin=TRUE, will always give you margin values before logistic transformation ptrain <- predict(bst, dtrain, outputmargin=TRUE) diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R index 7a8dfaa82532..6069f33d47fc 100644 --- a/R-package/demo/create_sparse_matrix.R +++ b/R-package/demo/create_sparse_matrix.R @@ -65,11 +65,10 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y] # Following is the same process as other demo cat("Learning...\n") -bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9, - eta = 1, nthread = 2, nround = 10,objective = "binary:logistic") +bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 9, + eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic") -# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix. -importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst) +importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst) print(importance) # According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age. The second most important feature is having received a placebo or not. The sex is third. Then we see our generated features (AgeDiscret). We can see that their contribution is very low (Gain column). diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R index 5d748f6797c9..652076165bdd 100644 --- a/R-package/demo/cross_validation.R +++ b/R-package/demo/cross_validation.R @@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) nround <- 2 -param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic') +param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic') cat('running cross validation\n') # do cross validation, this will print result out as @@ -19,7 +19,7 @@ cat('running cross validation, disable standard deviation display\n') # [iteration] metric_name:mean_value+std_value # std_value is standard deviation of the metric xgb.cv(param, dtrain, nround, nfold=5, - metrics={'error'}, showsd = FALSE) + metrics='error', showsd = FALSE) ### # you can also do cross validation with cutomized loss function @@ -40,12 +40,12 @@ evalerror <- function(preds, dtrain) { return(list(metric = "error", value = err)) } -param <- list(max.depth=2,eta=1,silent=1, +param <- list(max_depth=2, eta=1, silent=1, objective = logregobj, eval_metric = evalerror) # train with customized objective xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5) # do cross validation with prediction values for each fold res <- xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5, prediction = TRUE) -res$dt +res$evaluation_log length(res$pred) diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R index 7234ead869a3..3bbb40cca29f 100644 --- a/R-package/demo/custom_objective.R +++ b/R-package/demo/custom_objective.R @@ -33,7 +33,7 @@ evalerror <- function(preds, dtrain) { return(list(metric = "error", value = err)) } -param <- list(max.depth=2, eta=1, nthread = 2, silent=1, +param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective=logregobj, eval_metric=evalerror) print ('start training with user customized objective') # training with customized objective, we can also do step by step training @@ -57,7 +57,7 @@ logregobjattr <- function(preds, dtrain) { hess <- preds * (1 - preds) return(list(grad = grad, hess = hess)) } -param <- list(max.depth=2, eta=1, nthread = 2, silent=1, +param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective=logregobjattr, eval_metric=evalerror) print ('start training with user customized objective, with additional attributes in DMatrix') # training with customized objective, we can also do step by step training diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R index aa74aa2eeac5..08342d0f1f5c 100644 --- a/R-package/demo/early_stopping.R +++ b/R-package/demo/early_stopping.R @@ -7,7 +7,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) # note: for customized objective function, we leave objective as default # note: what we are getting is margin value in prediction # you must know what you are doing -param <- list(max.depth=2,eta=1,nthread = 2, silent=1) +param <- list(max_depth=2, eta=1, nthread = 2, silent=1) watchlist <- list(eval = dtest) num_round <- 20 # user define objective function, given prediction, return gradient and second order gradient @@ -34,7 +34,7 @@ print ('start training with early Stopping setting') bst <- xgb.train(param, dtrain, num_round, watchlist, objective = logregobj, eval_metric = evalerror, maximize = FALSE, - early.stop.round = 3) + early_stopping_round = 3) bst <- xgb.cv(param, dtrain, num_round, nfold = 5, objective = logregobj, eval_metric = evalerror, - maximize = FALSE, early.stop.round = 3) + maximize = FALSE, early_stopping_rounds = 3) diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R index 422201b0a0a2..c8119c594c5c 100644 --- a/R-package/demo/predict_first_ntree.R +++ b/R-package/demo/predict_first_ntree.R @@ -5,7 +5,7 @@ data(agaricus.test, package='xgboost') dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) -param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic') +param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') watchlist <- list(eval = dtest, train = dtrain) nround = 2 diff --git a/R-package/demo/predict_leaf_indices.R b/R-package/demo/predict_leaf_indices.R index fc87befb7abc..9aaa1a9ab7d8 100644 --- a/R-package/demo/predict_leaf_indices.R +++ b/R-package/demo/predict_leaf_indices.R @@ -10,7 +10,7 @@ data(agaricus.test, package='xgboost') dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label) dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label) -param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic') +param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') nround = 4 # training the model for two rounds diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index ed5d696d40fd..25fff9d3eebb 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -168,8 +168,8 @@ Build the model The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). ```{r} -bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4, - eta = 1, nthread = 2, nround = 10,objective = "binary:logistic") +bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4, + eta = 1, nthread = 2, nrounds = 10,objective = "binary:logistic") ``` @@ -179,7 +179,7 @@ A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitti > Here you can see the numbers decrease until line 7 and then increase. > -> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nround = 4`. I will let things like that because I don't really care for the purpose of this example :-) +> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nrounds = 4`. I will let things like that because I don't really care for the purpose of this example :-) Feature importance ------------------ @@ -189,10 +189,10 @@ Feature importance ### Build the feature importance data.table -In the code below, `sparse_matrix@Dimnames[[2]]` represents the column names of the sparse matrix. These names are the original values of the features (remember, each binary column == one value of one *categorical* feature). +Remember, each binary column corresponds to a single value of one of *categorical* features. ```{r} -importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst) +importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst) head(importance) ``` @@ -215,7 +215,7 @@ One simple solution is to count the co-occurrences of a feature and a class of t For that purpose we will execute the same function as above but using two more parameters, `data` and `label`. ```{r} -importanceRaw <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector) +importanceRaw <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst, data = sparse_matrix, label = output_vector) # Cleaning for better display importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)] @@ -328,12 +328,12 @@ train <- agaricus.train test <- agaricus.test #Random Forest™ - 1000 trees -bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nround = 1, objective = "binary:logistic") +bst <- xgboost(data = train$data, label = train$label, max_depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nrounds = 1, objective = "binary:logistic") #Boosting - 3 rounds -bst <- xgboost(data = train$data, label = train$label, max.depth = 4, nround = 3, objective = "binary:logistic") +bst <- xgboost(data = train$data, label = train$label, max_depth = 4, nrounds = 3, objective = "binary:logistic") ``` > Note that the parameter `round` is set to `1`. -> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software. \ No newline at end of file +> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software. diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw index dcd0d88b0393..d6d6361b11bd 100644 --- a/R-package/vignettes/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -84,8 +84,8 @@ data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test -bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, - nround = 2, objective = "binary:logistic") +bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, + nrounds = 2, objective = "binary:logistic") xgb.save(bst, 'model.save') bst = xgb.load('model.save') pred <- predict(bst, test$data) @@ -162,9 +162,9 @@ evalerror <- function(preds, dtrain) { dtest <- xgb.DMatrix(test$data, label = test$label) watchlist <- list(eval = dtest, train = dtrain) -param <- list(max.depth = 2, eta = 1, silent = 1) +param <- list(max_depth = 2, eta = 1, silent = 1) -bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror) +bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror) @ The gradient and second order gradient is required for the output of customized diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index 61ab2f083275..b0eb9effeabf 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -147,12 +147,12 @@ In a *sparse* matrix, cells containing `0` are not stored in memory. Therefore, We will train decision tree model using the following parameters: * `objective = "binary:logistic"`: we will train a binary classification model ; -* `max.deph = 2`: the trees won't be deep, because our case is very simple ; +* `max_depth = 2`: the trees won't be deep, because our case is very simple ; * `nthread = 2`: the number of cpu threads we are going to use; -* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction. +* `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction. ```{r trainingSparse, message=F, warning=F} -bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") +bstSparse <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") ``` > More complex the relationship between your features and your `label` is, more passes you need. @@ -164,7 +164,7 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix. ```{r trainingDense, message=F, warning=F} -bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") +bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") ``` ##### xgb.DMatrix @@ -173,7 +173,7 @@ bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth ```{r trainingDmatrix, message=F, warning=F} dtrain <- xgb.DMatrix(data = train$data, label = train$label) -bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") +bstDMatrix <- xgboost(data = dtrain, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") ``` ##### Verbose option @@ -184,17 +184,17 @@ One of the simplest way to see the training progress is to set the `verbose` opt ```{r trainingVerbose0, message=T, warning=F} # verbose = 0, no message -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0) +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0) ``` ```{r trainingVerbose1, message=T, warning=F} # verbose = 1, print evaluation metric -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1) +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 1) ``` ```{r trainingVerbose2, message=T, warning=F} # verbose = 2, also print information about tree -bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2) +bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 2) ``` ## Basic prediction using XGBoost @@ -287,10 +287,10 @@ For the purpose of this example, we use `watchlist` parameter. It is a list of ` ```{r watchlist, message=F, warning=F} watchlist <- list(train=dtrain, test=dtest) -bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic") +bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nthread = 2, nrounds=2, watchlist=watchlist, objective = "binary:logistic") ``` -**XGBoost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset. +**XGBoost** has computed at each round the same average error metric than seen above (we set `nrounds` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset. Both training and test error related metrics are very similar, and in some way, it makes sense: what we have learned from the training dataset matches the observations from the test dataset. @@ -299,10 +299,10 @@ If with your own dataset you have not such results, you should think about how y For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics. ```{r watchlist2, message=F, warning=F} -bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") +bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nthread = 2, nrounds=2, watchlist=watchlist, eval_metric = "error", eval_metric = "logloss", objective = "binary:logistic") ``` -> `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`. +> `eval_metric` allows us to monitor two new metrics for each round, `logloss` and `error`. ### Linear boosting @@ -310,7 +310,7 @@ bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchli Until now, all the learnings we have performed were based on boosting trees. **XGBoost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter). ```{r linearBoosting, message=F, warning=F} -bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic") +bst <- xgb.train(data=dtrain, booster = "gblinear", max_depth=2, nthread = 2, nrounds=2, watchlist=watchlist, eval_metric = "error", eval_metric = "logloss", objective = "binary:logistic") ``` In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm. @@ -328,7 +328,7 @@ Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome) xgb.DMatrix.save(dtrain, "dtrain.buffer") # to load it in, simply call xgb.DMatrix dtrain2 <- xgb.DMatrix("dtrain.buffer") -bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic") +bst <- xgb.train(data=dtrain2, max_depth=2, eta=1, nthread = 2, nrounds=2, watchlist=watchlist, objective = "binary:logistic") ``` ```{r DMatrixDel, include=FALSE} @@ -363,7 +363,7 @@ xgb.plot.importance(importance_matrix = importance_matrix) You can dump the tree you learned using `xgb.dump` into a text file. ```{r dump, message=T, warning=F} -xgb.dump(bst, with.stats = T) +xgb.dump(bst, with_stats = T) ``` You can plot the trees from your model using ```xgb.plot.tree``