Skip to content

Commit

Permalink
Merge pull request #274 from biomodhub/xgboost
Browse files Browse the repository at this point in the history
adding xgboost to the list of models available
  • Loading branch information
rpatin committed Jun 15, 2023
2 parents edebb39 + 66268d9 commit d4fa520
Show file tree
Hide file tree
Showing 110 changed files with 413 additions and 334 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: biomod2
Type: Package
Title: Ensemble Platform for Species Distribution Modeling
Version: 4.2-3-5
Date: 2023-05-22
Version: 4.2-4
Date: 2023-06-15
Authors@R: c(person("Wilfried", "Thuiller", role = c("aut")
, email = "[email protected]"),
person("Damien", "Georges", role = c("aut")),
Expand Down Expand Up @@ -46,6 +46,7 @@ Imports:
ggplot2,
nnet,
gbm (>= 2.1.3),
xgboost,
mda,
randomForest,
maxnet,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ exportClasses(MAXENT_biomod2_model)
exportClasses(MAXNET_biomod2_model)
exportClasses(RF_biomod2_model)
exportClasses(SRE_biomod2_model)
exportClasses(XGBOOST_biomod2_model)
exportClasses(biomod2_ensemble_model)
exportClasses(biomod2_model)
exportMethods(BIOMOD.formated.data)
Expand Down Expand Up @@ -261,4 +262,5 @@ importFrom(utils,setTxtProgressBar)
importFrom(utils,tail)
importFrom(utils,txtProgressBar)
importFrom(utils,write.table)
importFrom(xgboost,xgboost)
importMethodsFrom(methods,show)
4 changes: 2 additions & 2 deletions R/BIOMOD_LoadModels.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
##' @param algo (\emph{optional, default} \code{NULL}) \cr
##' A \code{character} containing algorithm to be loaded, must be either \code{GLM},
##' \code{GBM}, \code{GAM}, \code{CTA}, \code{ANN}, \code{SRE}, \code{FDA}, \code{MARS},
##' \code{RF}, \code{MAXENT}, \code{MAXNET}
##' \code{RF}, \code{MAXENT}, \code{MAXNET}, \code{XGBOOST}
##'
##' @param merged.by.PA (\emph{optional, default} \code{NULL}) \cr
##' A \code{vector} containing merged pseudo-absence set to be loaded, must be among \code{PA1},
Expand All @@ -37,7 +37,7 @@
##' @param merged.by.algo (\emph{optional, default} \code{NULL}) \cr
##' A \code{character} containing merged algorithm to be loaded, must be among \code{GLM},
##' \code{GBM}, \code{GAM}, \code{CTA}, \code{ANN}, \code{SRE}, \code{FDA}, \code{MARS},
##' \code{RF}, \code{MAXENT}, \code{MAXNET}, \code{mergedAlgo}
##' \code{RF}, \code{MAXENT}, \code{MAXNET}, \code{XGBOOST}, \code{mergedAlgo}
##' @param filtered.by (\emph{optional, default} \code{NULL}) \cr
##' A \code{vector} containing evaluation metric selected to filter single models to build the
##' ensemble models, must be among \code{ROC}, \code{TSS}, \code{KAPPA}, \code{ACCURACY},
Expand Down
30 changes: 17 additions & 13 deletions R/BIOMOD_Modeling.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
##' (\emph{a random number by default})
##' @param models a \code{vector} containing model names to be computed, must be among \code{GLM},
##' \code{GBM}, \code{GAM}, \code{CTA}, \code{ANN}, \code{SRE}, \code{FDA}, \code{MARS},
##' \code{RF}, \code{MAXENT}, \code{MAXNET}
##' \code{RF}, \code{MAXENT}, \code{MAXNET}, \code{XGBOOST}
##' @param models.pa (\emph{optional, default} \code{NULL}) \cr
##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
##' are to be used, must be among \code{colnames([email protected])}
Expand Down Expand Up @@ -127,6 +127,7 @@
##' \item \code{MAXENT} : Maximum Entropy
##' (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/})
##' \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}})
##' \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}})
##' }}
##'
##' \item{models.pa}{Different models might respond differently to different numbers of
Expand Down Expand Up @@ -188,15 +189,17 @@
##' @keywords models regression nonlinear multivariate nonparametric tree
##'
##'
##' @seealso \code{\link[stats]{glm}}, \code{\link[gam]{gam}}, \code{\link[mgcv]{gam}},
##' \code{\link[mgcv]{bam}}, \code{\link[gbm]{gbm}}, \code{\link[rpart]{rpart}},
##' code{\link[nnet]{nnet}}, \code{\link[mda]{fda}}, \code{\link[earth]{earth}},
##' \code{\link[randomForest]{randomForest}}, \code{\link[maxnet]{maxnet}},
##' \code{\link{BIOMOD_FormatingData}}, \code{\link{BIOMOD_ModelingOptions}},
##' \code{\link{bm_CrossValidation}}, \code{ \link{bm_VariablesImportance}},
##' \code{\link{BIOMOD_Projection}}, \code{\link{BIOMOD_EnsembleModeling}},
##' \code{\link{bm_PlotEvalMean}}, \code{\link{bm_PlotEvalBoxplot}},
##' \code{\link{bm_PlotVarImpBoxplot}}, \code{\link{bm_PlotResponseCurves}}
##' @seealso \code{\link[stats]{glm}}, \code{\link[gam]{gam}},
##' \code{\link[mgcv]{gam}}, \code{\link[mgcv]{bam}}, \code{\link[gbm]{gbm}},
##' \code{\link[rpart]{rpart}}, code{\link[nnet]{nnet}},
##' \code{\link[mda]{fda}}, \code{\link[earth]{earth}},
##' \code{\link[randomForest]{randomForest}}, \code{\link[maxnet]{maxnet}},
##' \code{\link[xgboost]{xgboost}}, \code{\link{BIOMOD_FormatingData}},
##' \code{\link{BIOMOD_ModelingOptions}}, \code{\link{bm_CrossValidation}},
##' \code{ \link{bm_VariablesImportance}}, \code{\link{BIOMOD_Projection}},
##' \code{\link{BIOMOD_EnsembleModeling}}, \code{\link{bm_PlotEvalMean}},
##' \code{\link{bm_PlotEvalBoxplot}}, \code{\link{bm_PlotVarImpBoxplot}},
##' \code{\link{bm_PlotResponseCurves}}
##' @family Main functions
##'
##'
Expand Down Expand Up @@ -286,7 +289,8 @@
BIOMOD_Modeling <- function(bm.format,
modeling.id = as.character(format(Sys.time(), "%s")),
models = c('GLM', 'GBM', 'GAM', 'CTA', 'ANN', 'SRE'
, 'FDA', 'MARS', 'RF', 'MAXENT', 'MAXNET'),
, 'FDA', 'MARS', 'RF', 'MAXENT', 'MAXNET',
'XGBOOST'),
models.pa = NULL,
bm.options = NULL,
CV.strategy = 'random',
Expand Down Expand Up @@ -492,15 +496,15 @@ BIOMOD_Modeling <- function(bm.format,

## check if model is supported
avail.models.list <- c('GLM', 'GBM', 'GAM', 'CTA', 'ANN', 'SRE', 'FDA', 'MARS'
, 'RF', 'MAXENT', 'MAXNET')
, 'RF', 'MAXENT', 'MAXNET', 'XGBOOST')
.fun_testIfIn(TRUE, "models", models, avail.models.list)


## 1.1 Remove models not supporting categorical variables --------------------
categorical_var <- .get_categorical_names(bm.format@data.env.var)

if (length(categorical_var) > 0) {
models.fact.unsupport <- c("SRE", "MAXENT.Tsuruoka")
models.fact.unsupport <- c("SRE", "XGBOOST")
models.switch.off <- c(models.switch.off, intersect(models, models.fact.unsupport))
if (length(models.switch.off) > 0) {
models <- setdiff(models, models.switch.off)
Expand Down
57 changes: 33 additions & 24 deletions R/BIOMOD_ModelingOptions.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
##' @param RF (\emph{optional, default} \code{NULL}) \cr A \code{list} containing RF options
##' @param MAXENT (\emph{optional, default} \code{NULL}) \cr A \code{list}
##' containing MAXENT options
##' @param XGBOOST (\emph{optional, default} \code{NULL}) \cr A \code{list}
##' containing XGBOOST options
##'
##'
##' @return
Expand Down Expand Up @@ -308,20 +310,16 @@
##' \code{numeric} corresponding to the default prevalence of the species \cr
##' (\emph{probability of presence at ordinary occurrence points})}
##' }
##'
##' % @section \bold{MAXENT.Tsuruoka (\code{\link[maxent]{maxent}})} :
##' % \itemize{
##' % \item{\code{l1_regularizer = 0.0}}{ : a \code{numeric} turning on L1 regularization and setting
##' % the regularization parameter (\emph{a value of \code{0} will disable L1 regularization})}
##' % \item{\code{l2_regularizer = 0.0}}{ : a \code{numeric} turning on L2 regularization and setting
##' % the regularization parameter (\emph{a value of \code{0} will disable L2 regularization})}
##' % \item{\code{use_sgd = FALSE}}{ : a \code{logical} to use SGD parameter estimation}
##' % \item{\code{set_heldout = 0}}{ : an \code{integer} corresponding to the number of documents to
##' % hold out (\emph{to test against and prevent overfitting, use carefully in case of dataset with
##' % low number of occurrences})}
##' % \item{\code{verbose = FALSE}}{ : a \code{logical} specifying whether to provide descriptive
##' % output about the training process}
##' % }
##' @section XGBOOST : (default \code{\link[xgboost]{xgboost}})
##'
##' \emph{Please refer to \code{\link[xgboost]{xgboost}} help file for more details.}
##' \itemize{
##' \item{\code{max.depth = 5}}
##' \item{\code{eta = 0.1}}
##' \item{\code{nrounds = 512}}
##' \item{\code{objective = "binary:logistic"}}
##' \item{\code{nthread = 1}}
##' }
##'
##'
##' @keywords models options
Expand Down Expand Up @@ -402,7 +400,8 @@ BIOMOD_ModelingOptions <- function(GLM = NULL,
FDA = NULL,
MARS = NULL,
RF = NULL,
MAXENT = NULL)
MAXENT = NULL,
XGBOOST = NULL)
{
# .bm_cat("Build Modeling Options")

Expand Down Expand Up @@ -653,15 +652,25 @@ BIOMOD_ModelingOptions <- function(GLM = NULL,
} else {
opt@MAXENT$path_to_maxent.jar <- getwd()
}

# if (!is.null(MAXENT.Tsuruoka)) {
# if (!is.null(MAXENT.Tsuruoka$l1_regularizer)) { [email protected]$l1_regularizer <- MAXENT.Tsuruoka$l1_regularizer }
# if (!is.null(MAXENT.Tsuruoka$l2_regularizer)) { [email protected]$l2_regularizer <- MAXENT.Tsuruoka$l2_regularizer }
# if (!is.null(MAXENT.Tsuruoka$use_sgd)) { [email protected]$use_sgd <- MAXENT.Tsuruoka$use_sgd }
# if (!is.null(MAXENT.Tsuruoka$set_heldout)) { [email protected]$set_heldout <- MAXENT.Tsuruoka$set_heldout }
# if (!is.null(MAXENT.Tsuruoka$verbose)) { [email protected]$verbose <- MAXENT.Tsuruoka$verbose }
# }

## 2.11 XGBOOST -----------------------------------------------------
if (!is.null(XGBOOST)) {
if (!is.null(XGBOOST$max.depth)) {
opt@XGBOOST$max.depth <- XGBOOST$max.depth
}
if (!is.null(XGBOOST$eta)) {
opt@XGBOOST$eta <- XGBOOST$eta
}
if (!is.null(XGBOOST$max.depth)) {
opt@XGBOOST$nrounds <- XGBOOST$nrounds
}
if (!is.null(XGBOOST$objective)) {
opt@XGBOOST$objective <- XGBOOST$objective
}
if (!is.null(XGBOOST$nthread)) {
opt@XGBOOST$nthread <- XGBOOST$nthread
}

}
## 3. test validity ---------------------------------------------------------
test <- as.logical(validObject(object = opt, test = TRUE, complete = FALSE))

Expand Down
1 change: 1 addition & 0 deletions R/BIOMOD_Projection.R
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ BIOMOD_Projection <- function(bm.mod,
new.env <- mask(new.env, new.env.mask)
} else {
.fun_testIfIn(TRUE, "colnames(new.env)", colnames(new.env), bm.mod@expl.var.names)
new.env <- new.env[ , bm.mod@expl.var.names, drop = FALSE]
}

which.factor <- which(sapply(new.env, is.factor))
Expand Down
18 changes: 1 addition & 17 deletions R/BIOMOD_Tuning.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,16 +257,14 @@ BIOMOD_Tuning <- function(bm.format,
if ("MAXENT" %in% models && !isNamespaceLoaded('ENMeval')) {
if(!requireNamespace('ENMeval', quietly = TRUE)) stop("Package 'ENMeval' not found")
}
# if ("MAXENT.Tsuruoka" %in% models && !isNamespaceLoaded('maxent')) { requireNamespace("maxent", quietly = TRUE) }
if ("SRE" %in% models && !isNamespaceLoaded('dismo')) {
if(!requireNamespace('dismo', quietly = TRUE)) stop("Package 'dismo' not found")
}
}

tune.SRE <- tune.GLM <- tune.MAXENT <- tune.GAM <- tune.GBM <-
tune.CTA.rpart <- tune.CTA.rpart2 <- tune.RF <- tune.ANN <- tune.MARS <- tune.FDA <- NULL
# tune.MAXENT.Tsuruoka <- NULL


resp <- bm.format@data.species
# if (is.null(weights)) { weights = rep(1, length([email protected]))}

Expand Down Expand Up @@ -686,24 +684,10 @@ BIOMOD_Tuning <- function(bm.format,
}


# if ('MAXENT.Tsuruoka' %in% models) {
# cat("Start tuning MAXENT.Tsuruoka\n")
# try(tune.MAXENT.Tsuruoka <- as.data.frame(tune.maxent([email protected],[email protected],nfold=ME.kfolds,showall=T)))
# cat(paste("Finished tuning MAXENT.Tsuruoka\n","\n-=-=-=-=-=-=-=-=-=-=\n"))
#
# if(!is.null(tune.MAXENT.Tsuruoka)){
# [email protected]$l1_regularizer <- tune.MAXENT.Tsuruoka$l1_regularizer[which.max(tune.MAXENT.Tsuruoka$accuracy)]
# [email protected]$l2_regularizer <- tune.MAXENT.Tsuruoka$l2_regularizer[which.max(tune.MAXENT.Tsuruoka$accuracy)]
# [email protected]$use_sgd <- ifelse(tune.MAXENT.Tsuruoka[which.max(tune.MAXENT.Tsuruoka$accuracy),]$use_sgd==0,F,T)
# [email protected]$set_heldout <- tune.MAXENT.Tsuruoka$set_heldout[which.max(tune.MAXENT.Tsuruoka$accuracy)]
# } else { if('MAXENT.Tsuruoka' %in% models){cat("Tuning MAXENT.Tsuruoka failed!"); tune.MAXENT.Tsuruoka <- "FAILED"}}
# }

.bm_cat("Done")
return(list(models.options = bm.options, tune.SRE = tune.SRE, tune.CTA.rpart = tune.CTA.rpart, tune.CTA.rpart2 = tune.CTA.rpart2,
tune.RF = tune.RF, tune.ANN = tune.ANN, tune.MARS = tune.MARS, tune.FDA = tune.FDA, tune.GBM = tune.GBM,
tune.GAM = tune.GAM, tune.GLM = tune.GLM, tune.MAXENT = tune.MAXENT))
# tune.MAXENT.Tsuruoka = tune.MAXENT.Tsuruoka,
}


Expand Down
Loading

0 comments on commit d4fa520

Please sign in to comment.