-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #274 from biomodhub/xgboost
adding xgboost to the list of models available
- Loading branch information
Showing
110 changed files
with
413 additions
and
334 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
Package: biomod2 | ||
Type: Package | ||
Title: Ensemble Platform for Species Distribution Modeling | ||
Version: 4.2-3-5 | ||
Date: 2023-05-22 | ||
Version: 4.2-4 | ||
Date: 2023-06-15 | ||
Authors@R: c(person("Wilfried", "Thuiller", role = c("aut") | ||
, email = "[email protected]"), | ||
person("Damien", "Georges", role = c("aut")), | ||
|
@@ -46,6 +46,7 @@ Imports: | |
ggplot2, | ||
nnet, | ||
gbm (>= 2.1.3), | ||
xgboost, | ||
mda, | ||
randomForest, | ||
maxnet, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
##' (\emph{a random number by default}) | ||
##' @param models a \code{vector} containing model names to be computed, must be among \code{GLM}, | ||
##' \code{GBM}, \code{GAM}, \code{CTA}, \code{ANN}, \code{SRE}, \code{FDA}, \code{MARS}, | ||
##' \code{RF}, \code{MAXENT}, \code{MAXNET} | ||
##' \code{RF}, \code{MAXENT}, \code{MAXNET}, \code{XGBOOST} | ||
##' @param models.pa (\emph{optional, default} \code{NULL}) \cr | ||
##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets | ||
##' are to be used, must be among \code{colnames([email protected])} | ||
|
@@ -127,6 +127,7 @@ | |
##' \item \code{MAXENT} : Maximum Entropy | ||
##' (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/}) | ||
##' \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}}) | ||
##' \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}}) | ||
##' }} | ||
##' | ||
##' \item{models.pa}{Different models might respond differently to different numbers of | ||
|
@@ -188,15 +189,17 @@ | |
##' @keywords models regression nonlinear multivariate nonparametric tree | ||
##' | ||
##' | ||
##' @seealso \code{\link[stats]{glm}}, \code{\link[gam]{gam}}, \code{\link[mgcv]{gam}}, | ||
##' \code{\link[mgcv]{bam}}, \code{\link[gbm]{gbm}}, \code{\link[rpart]{rpart}}, | ||
##' code{\link[nnet]{nnet}}, \code{\link[mda]{fda}}, \code{\link[earth]{earth}}, | ||
##' \code{\link[randomForest]{randomForest}}, \code{\link[maxnet]{maxnet}}, | ||
##' \code{\link{BIOMOD_FormatingData}}, \code{\link{BIOMOD_ModelingOptions}}, | ||
##' \code{\link{bm_CrossValidation}}, \code{ \link{bm_VariablesImportance}}, | ||
##' \code{\link{BIOMOD_Projection}}, \code{\link{BIOMOD_EnsembleModeling}}, | ||
##' \code{\link{bm_PlotEvalMean}}, \code{\link{bm_PlotEvalBoxplot}}, | ||
##' \code{\link{bm_PlotVarImpBoxplot}}, \code{\link{bm_PlotResponseCurves}} | ||
##' @seealso \code{\link[stats]{glm}}, \code{\link[gam]{gam}}, | ||
##' \code{\link[mgcv]{gam}}, \code{\link[mgcv]{bam}}, \code{\link[gbm]{gbm}}, | ||
##' \code{\link[rpart]{rpart}}, code{\link[nnet]{nnet}}, | ||
##' \code{\link[mda]{fda}}, \code{\link[earth]{earth}}, | ||
##' \code{\link[randomForest]{randomForest}}, \code{\link[maxnet]{maxnet}}, | ||
##' \code{\link[xgboost]{xgboost}}, \code{\link{BIOMOD_FormatingData}}, | ||
##' \code{\link{BIOMOD_ModelingOptions}}, \code{\link{bm_CrossValidation}}, | ||
##' \code{ \link{bm_VariablesImportance}}, \code{\link{BIOMOD_Projection}}, | ||
##' \code{\link{BIOMOD_EnsembleModeling}}, \code{\link{bm_PlotEvalMean}}, | ||
##' \code{\link{bm_PlotEvalBoxplot}}, \code{\link{bm_PlotVarImpBoxplot}}, | ||
##' \code{\link{bm_PlotResponseCurves}} | ||
##' @family Main functions | ||
##' | ||
##' | ||
|
@@ -286,7 +289,8 @@ | |
BIOMOD_Modeling <- function(bm.format, | ||
modeling.id = as.character(format(Sys.time(), "%s")), | ||
models = c('GLM', 'GBM', 'GAM', 'CTA', 'ANN', 'SRE' | ||
, 'FDA', 'MARS', 'RF', 'MAXENT', 'MAXNET'), | ||
, 'FDA', 'MARS', 'RF', 'MAXENT', 'MAXNET', | ||
'XGBOOST'), | ||
models.pa = NULL, | ||
bm.options = NULL, | ||
CV.strategy = 'random', | ||
|
@@ -492,15 +496,15 @@ BIOMOD_Modeling <- function(bm.format, | |
|
||
## check if model is supported | ||
avail.models.list <- c('GLM', 'GBM', 'GAM', 'CTA', 'ANN', 'SRE', 'FDA', 'MARS' | ||
, 'RF', 'MAXENT', 'MAXNET') | ||
, 'RF', 'MAXENT', 'MAXNET', 'XGBOOST') | ||
.fun_testIfIn(TRUE, "models", models, avail.models.list) | ||
|
||
|
||
## 1.1 Remove models not supporting categorical variables -------------------- | ||
categorical_var <- .get_categorical_names(bm.format@data.env.var) | ||
|
||
if (length(categorical_var) > 0) { | ||
models.fact.unsupport <- c("SRE", "MAXENT.Tsuruoka") | ||
models.fact.unsupport <- c("SRE", "XGBOOST") | ||
models.switch.off <- c(models.switch.off, intersect(models, models.fact.unsupport)) | ||
if (length(models.switch.off) > 0) { | ||
models <- setdiff(models, models.switch.off) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,8 @@ | |
##' @param RF (\emph{optional, default} \code{NULL}) \cr A \code{list} containing RF options | ||
##' @param MAXENT (\emph{optional, default} \code{NULL}) \cr A \code{list} | ||
##' containing MAXENT options | ||
##' @param XGBOOST (\emph{optional, default} \code{NULL}) \cr A \code{list} | ||
##' containing XGBOOST options | ||
##' | ||
##' | ||
##' @return | ||
|
@@ -308,20 +310,16 @@ | |
##' \code{numeric} corresponding to the default prevalence of the species \cr | ||
##' (\emph{probability of presence at ordinary occurrence points})} | ||
##' } | ||
##' | ||
##' % @section \bold{MAXENT.Tsuruoka (\code{\link[maxent]{maxent}})} : | ||
##' % \itemize{ | ||
##' % \item{\code{l1_regularizer = 0.0}}{ : a \code{numeric} turning on L1 regularization and setting | ||
##' % the regularization parameter (\emph{a value of \code{0} will disable L1 regularization})} | ||
##' % \item{\code{l2_regularizer = 0.0}}{ : a \code{numeric} turning on L2 regularization and setting | ||
##' % the regularization parameter (\emph{a value of \code{0} will disable L2 regularization})} | ||
##' % \item{\code{use_sgd = FALSE}}{ : a \code{logical} to use SGD parameter estimation} | ||
##' % \item{\code{set_heldout = 0}}{ : an \code{integer} corresponding to the number of documents to | ||
##' % hold out (\emph{to test against and prevent overfitting, use carefully in case of dataset with | ||
##' % low number of occurrences})} | ||
##' % \item{\code{verbose = FALSE}}{ : a \code{logical} specifying whether to provide descriptive | ||
##' % output about the training process} | ||
##' % } | ||
##' @section XGBOOST : (default \code{\link[xgboost]{xgboost}}) | ||
##' | ||
##' \emph{Please refer to \code{\link[xgboost]{xgboost}} help file for more details.} | ||
##' \itemize{ | ||
##' \item{\code{max.depth = 5}} | ||
##' \item{\code{eta = 0.1}} | ||
##' \item{\code{nrounds = 512}} | ||
##' \item{\code{objective = "binary:logistic"}} | ||
##' \item{\code{nthread = 1}} | ||
##' } | ||
##' | ||
##' | ||
##' @keywords models options | ||
|
@@ -402,7 +400,8 @@ BIOMOD_ModelingOptions <- function(GLM = NULL, | |
FDA = NULL, | ||
MARS = NULL, | ||
RF = NULL, | ||
MAXENT = NULL) | ||
MAXENT = NULL, | ||
XGBOOST = NULL) | ||
{ | ||
# .bm_cat("Build Modeling Options") | ||
|
||
|
@@ -653,15 +652,25 @@ BIOMOD_ModelingOptions <- function(GLM = NULL, | |
} else { | ||
opt@MAXENT$path_to_maxent.jar <- getwd() | ||
} | ||
|
||
# if (!is.null(MAXENT.Tsuruoka)) { | ||
# if (!is.null(MAXENT.Tsuruoka$l1_regularizer)) { [email protected]$l1_regularizer <- MAXENT.Tsuruoka$l1_regularizer } | ||
# if (!is.null(MAXENT.Tsuruoka$l2_regularizer)) { [email protected]$l2_regularizer <- MAXENT.Tsuruoka$l2_regularizer } | ||
# if (!is.null(MAXENT.Tsuruoka$use_sgd)) { [email protected]$use_sgd <- MAXENT.Tsuruoka$use_sgd } | ||
# if (!is.null(MAXENT.Tsuruoka$set_heldout)) { [email protected]$set_heldout <- MAXENT.Tsuruoka$set_heldout } | ||
# if (!is.null(MAXENT.Tsuruoka$verbose)) { [email protected]$verbose <- MAXENT.Tsuruoka$verbose } | ||
# } | ||
|
||
## 2.11 XGBOOST ----------------------------------------------------- | ||
if (!is.null(XGBOOST)) { | ||
if (!is.null(XGBOOST$max.depth)) { | ||
opt@XGBOOST$max.depth <- XGBOOST$max.depth | ||
} | ||
if (!is.null(XGBOOST$eta)) { | ||
opt@XGBOOST$eta <- XGBOOST$eta | ||
} | ||
if (!is.null(XGBOOST$max.depth)) { | ||
opt@XGBOOST$nrounds <- XGBOOST$nrounds | ||
} | ||
if (!is.null(XGBOOST$objective)) { | ||
opt@XGBOOST$objective <- XGBOOST$objective | ||
} | ||
if (!is.null(XGBOOST$nthread)) { | ||
opt@XGBOOST$nthread <- XGBOOST$nthread | ||
} | ||
|
||
} | ||
## 3. test validity --------------------------------------------------------- | ||
test <- as.logical(validObject(object = opt, test = TRUE, complete = FALSE)) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -257,16 +257,14 @@ BIOMOD_Tuning <- function(bm.format, | |
if ("MAXENT" %in% models && !isNamespaceLoaded('ENMeval')) { | ||
if(!requireNamespace('ENMeval', quietly = TRUE)) stop("Package 'ENMeval' not found") | ||
} | ||
# if ("MAXENT.Tsuruoka" %in% models && !isNamespaceLoaded('maxent')) { requireNamespace("maxent", quietly = TRUE) } | ||
if ("SRE" %in% models && !isNamespaceLoaded('dismo')) { | ||
if(!requireNamespace('dismo', quietly = TRUE)) stop("Package 'dismo' not found") | ||
} | ||
} | ||
|
||
tune.SRE <- tune.GLM <- tune.MAXENT <- tune.GAM <- tune.GBM <- | ||
tune.CTA.rpart <- tune.CTA.rpart2 <- tune.RF <- tune.ANN <- tune.MARS <- tune.FDA <- NULL | ||
# tune.MAXENT.Tsuruoka <- NULL | ||
|
||
|
||
resp <- bm.format@data.species | ||
# if (is.null(weights)) { weights = rep(1, length([email protected]))} | ||
|
||
|
@@ -686,24 +684,10 @@ BIOMOD_Tuning <- function(bm.format, | |
} | ||
|
||
|
||
# if ('MAXENT.Tsuruoka' %in% models) { | ||
# cat("Start tuning MAXENT.Tsuruoka\n") | ||
# try(tune.MAXENT.Tsuruoka <- as.data.frame(tune.maxent([email protected],[email protected],nfold=ME.kfolds,showall=T))) | ||
# cat(paste("Finished tuning MAXENT.Tsuruoka\n","\n-=-=-=-=-=-=-=-=-=-=\n")) | ||
# | ||
# if(!is.null(tune.MAXENT.Tsuruoka)){ | ||
# [email protected]$l1_regularizer <- tune.MAXENT.Tsuruoka$l1_regularizer[which.max(tune.MAXENT.Tsuruoka$accuracy)] | ||
# [email protected]$l2_regularizer <- tune.MAXENT.Tsuruoka$l2_regularizer[which.max(tune.MAXENT.Tsuruoka$accuracy)] | ||
# [email protected]$use_sgd <- ifelse(tune.MAXENT.Tsuruoka[which.max(tune.MAXENT.Tsuruoka$accuracy),]$use_sgd==0,F,T) | ||
# [email protected]$set_heldout <- tune.MAXENT.Tsuruoka$set_heldout[which.max(tune.MAXENT.Tsuruoka$accuracy)] | ||
# } else { if('MAXENT.Tsuruoka' %in% models){cat("Tuning MAXENT.Tsuruoka failed!"); tune.MAXENT.Tsuruoka <- "FAILED"}} | ||
# } | ||
|
||
.bm_cat("Done") | ||
return(list(models.options = bm.options, tune.SRE = tune.SRE, tune.CTA.rpart = tune.CTA.rpart, tune.CTA.rpart2 = tune.CTA.rpart2, | ||
tune.RF = tune.RF, tune.ANN = tune.ANN, tune.MARS = tune.MARS, tune.FDA = tune.FDA, tune.GBM = tune.GBM, | ||
tune.GAM = tune.GAM, tune.GLM = tune.GLM, tune.MAXENT = tune.MAXENT)) | ||
# tune.MAXENT.Tsuruoka = tune.MAXENT.Tsuruoka, | ||
} | ||
|
||
|
||
|
Oops, something went wrong.