[R] adopt demos and vignettes to a more consistent parameter style

viviancodes · Jun 27, 2016 · 3b6b344 · 3b6b344
1 parent a0aa305
commit 3b6b344
Show file tree

Hide file tree

Showing 11 changed files with 59 additions and 59 deletions.
diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R
@@ -1,7 +1,8 @@
 require(xgboost)
 require(methods)
+
 # we load in the agaricus dataset
-# In this example, we are aiming to predict whether a mushroom can be eaten
+# In this example, we are aiming to predict whether a mushroom is edible
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
@@ -15,33 +16,33 @@ class(train$data)
 # note: we are putting in sparse matrix here, xgboost naturally handles sparse input
 # use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
 print("Training xgboost with sparseMatrix")
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nrounds = 2,
                nthread = 2, objective = "binary:logistic")
 # alternatively, you can put in dense matrix, i.e. basic R-matrix
 print("Training xgboost with Matrix")
-bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
+bst <- xgboost(data = as.matrix(train$data), label = train$label, max_depth = 2, eta = 1, nrounds = 2,
                nthread = 2, objective = "binary:logistic")
 
 # you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features
 print("Training xgboost with xgb.DMatrix")
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, 
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, 
                objective = "binary:logistic")
 
 # Verbose = 0,1,2
 print("Train xgboost with verbose 0, no message")
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
                nthread = 2, objective = "binary:logistic", verbose = 0)
 print("Train xgboost with verbose 1, print evaluation metric")
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
                nthread = 2, objective = "binary:logistic", verbose = 1)
 print("Train xgboost with verbose 2, also print information about tree")
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
+bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
                nthread = 2, objective = "binary:logistic", verbose = 2)
 
 # you can also specify data as file path to a LibSVM format input
 # since we do not have this file with us, the following line is just for illustration
-# bst <- xgboost(data = 'agaricus.train.svm', max.depth = 2, eta = 1, nround = 2,objective = "binary:logistic")
+# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic")
 
 #--------------------basic prediction using xgboost--------------
 # you can do prediction using the following line
@@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest)
 # to train with watchlist, use xgb.train, which contains more advanced features
 # watchlist allows us to monitor the evaluation result on all data in the list 
 print("Train xgboost using xgb.train with watchlist")
-bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
+bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
                  nthread = 2, objective = "binary:logistic")
 # we can change evaluation metrics, or use multiple evaluation metrics
 print("train xgboost using xgb.train with watchlist, watch logloss and error")
-bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
-                 eval.metric = "error", eval.metric = "logloss",
+bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
+                 eval_metric = "error", eval_metric = "logloss",
                  nthread = 2, objective = "binary:logistic")
 
 # xgb.DMatrix can also be saved using xgb.DMatrix.save
 xgb.DMatrix.save(dtrain, "dtrain.buffer")
 # to load it in, simply call xgb.DMatrix
 dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist,
+bst <- xgb.train(data=dtrain2, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
                  nthread = 2, objective = "binary:logistic")
 # information can be extracted from xgb.DMatrix using getinfo
 label = getinfo(dtest, "label")
@@ -98,11 +99,11 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
 print(paste("test-error=", err))
 
 # You can dump the tree you learned using xgb.dump into a text file
-xgb.dump(bst, "dump.raw.txt", with.stats = T)
+xgb.dump(bst, "dump.raw.txt", with_stats = T)
 
 # Finally, you can check which features are the most important.
 print("Most important features (look at column Gain):")
-imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], model = bst)
+imp_matrix <- xgb.importance(feature_names = colnames(train$data), model = bst)
 print(imp_matrix)
 
 # Feature importance bar plot by gain

diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R
@@ -11,8 +11,8 @@ watchlist <- list(eval = dtest, train = dtrain)
 #
 print('start running example to start from a initial prediction')
 # train xgboost for 1 round
-param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic')
-bst <- xgb.train( param, dtrain, 1, watchlist )
+param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic')
+bst <- xgb.train(param, dtrain, 1, watchlist)
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=TRUE, will always give you margin values before logistic transformation
 ptrain <- predict(bst, dtrain, outputmargin=TRUE)

diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R
@@ -65,11 +65,10 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
 
 # Following is the same process as other demo
 cat("Learning...\n")
-bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
-               eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
+bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 9,
+               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
 
-# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.
-importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst)
+importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst)
 print(importance)
 # According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age. The second most important feature is having received a placebo or not. The sex is third. Then we see our generated features (AgeDiscret). We can see that their contribution is very low (Gain column).
 

diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R
@@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 
 nround <- 2
-param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic')
+param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic')
 
 cat('running cross validation\n')
 # do cross validation, this will print result out as
@@ -19,7 +19,7 @@ cat('running cross validation, disable standard deviation display\n')
 # [iteration]  metric_name:mean_value+std_value
 # std_value is standard deviation of the metric
 xgb.cv(param, dtrain, nround, nfold=5,
-       metrics={'error'}, showsd = FALSE)
+       metrics='error', showsd = FALSE)
 
 ###
 # you can also do cross validation with cutomized loss function
@@ -40,12 +40,12 @@ evalerror <- function(preds, dtrain) {
   return(list(metric = "error", value = err))
 }
 
-param <- list(max.depth=2,eta=1,silent=1,
+param <- list(max_depth=2, eta=1, silent=1,
               objective = logregobj, eval_metric = evalerror)
 # train with customized objective
 xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5)
 
 # do cross validation with prediction values for each fold
 res <- xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5, prediction = TRUE)
-res$dt
+res$evaluation_log
 length(res$pred)
diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R
@@ -33,7 +33,7 @@ evalerror <- function(preds, dtrain) {
   return(list(metric = "error", value = err))
 }
 
-param <- list(max.depth=2, eta=1, nthread = 2, silent=1, 
+param <- list(max_depth=2, eta=1, nthread = 2, silent=1, 
               objective=logregobj, eval_metric=evalerror)
 print ('start training with user customized objective')
 # training with customized objective, we can also do step by step training
@@ -57,7 +57,7 @@ logregobjattr <- function(preds, dtrain) {
   hess <- preds * (1 - preds)
   return(list(grad = grad, hess = hess))
 }
-param <- list(max.depth=2, eta=1, nthread = 2, silent=1, 
+param <- list(max_depth=2, eta=1, nthread = 2, silent=1, 
               objective=logregobjattr, eval_metric=evalerror)
 print ('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training

diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
@@ -7,7 +7,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
+param <- list(max_depth=2, eta=1, nthread = 2, silent=1)
 watchlist <- list(eval = dtest)
 num_round <- 20
 # user define objective function, given prediction, return gradient and second order gradient
@@ -34,7 +34,7 @@ print ('start training with early Stopping setting')
 
 bst <- xgb.train(param, dtrain, num_round, watchlist, 
                  objective = logregobj, eval_metric = evalerror, maximize = FALSE,
-                 early.stop.round = 3)
+                 early_stopping_round = 3)
 bst <- xgb.cv(param, dtrain, num_round, nfold = 5, 
               objective = logregobj, eval_metric = evalerror,
-              maximize = FALSE, early.stop.round = 3)
+              maximize = FALSE, early_stopping_rounds = 3)
diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R
@@ -5,7 +5,7 @@ data(agaricus.test, package='xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 
-param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
+param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 watchlist <- list(eval = dtest, train = dtrain)
 nround = 2
 

diff --git a/R-package/demo/predict_leaf_indices.R b/R-package/demo/predict_leaf_indices.R
@@ -10,7 +10,7 @@ data(agaricus.test, package='xgboost')
 dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
 
-param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
+param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 nround = 4
 
 # training the model for two rounds

diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd
@@ -168,8 +168,8 @@ Build the model
 The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
 
 ```{r}
-bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
-               eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
+bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
+               eta = 1, nthread = 2, nrounds = 10,objective = "binary:logistic")
 
 ```
 
@@ -179,7 +179,7 @@ A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitti
 
 > Here you can see the numbers decrease until line 7 and then increase.
 >
-> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nround = 4`. I will let things like that because I don't really care for the purpose of this example :-)
+> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nrounds = 4`. I will let things like that because I don't really care for the purpose of this example :-)
 
 Feature importance
 ------------------
@@ -189,10 +189,10 @@ Feature importance
 
 ### Build the feature importance data.table
 
-In the code below, `sparse_matrix@Dimnames[[2]]` represents the column names of the sparse matrix. These names are the original values of the features (remember, each binary column == one value of one *categorical* feature).
+Remember, each binary column corresponds to a single value of one of *categorical* features.
 
 ```{r}
-importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst)
+importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst)
 head(importance)
 ```
 
@@ -215,7 +215,7 @@ One simple solution is to count the co-occurrences of a feature and a class of t
 For that purpose we will execute the same function as above but using two more parameters, `data` and `label`.
 
 ```{r}
-importanceRaw <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector)
+importanceRaw <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst, data = sparse_matrix, label = output_vector)
 
 # Cleaning for better display
 importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)]
@@ -328,12 +328,12 @@ train <- agaricus.train
 test <- agaricus.test
 
 #Random Forest™ - 1000 trees
-bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nround = 1, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nrounds = 1, objective = "binary:logistic")
 
 #Boosting - 3 rounds
-bst <- xgboost(data = train$data, label = train$label, max.depth = 4, nround = 3, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 4, nrounds = 3, objective = "binary:logistic")
 ```
 
 > Note that the parameter `round` is set to `1`.
 
-> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
+> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw
@@ -84,8 +84,8 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, 
-               nround = 2, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, 
+               nrounds = 2, objective = "binary:logistic")
 xgb.save(bst, 'model.save')
 bst = xgb.load('model.save')
 pred <- predict(bst, test$data)
@@ -162,9 +162,9 @@ evalerror <- function(preds, dtrain) {
 
 dtest <- xgb.DMatrix(test$data, label = test$label)
 watchlist <- list(eval = dtest, train = dtrain)
-param <- list(max.depth = 2, eta = 1, silent = 1)
+param <- list(max_depth = 2, eta = 1, silent = 1)
 
-bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
+bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror)
 @
 
 The gradient and second order gradient is required for the output of customized