diff --git a/DESCRIPTION b/DESCRIPTION
index d9930599..ce4c6794 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
Package: biomod2
Type: Package
Title: Ensemble Platform for Species Distribution Modeling
-Version: 4.2-5-2
-Date: 2024-05-31
+Version: 4.2-6
+Date: 2024-06-18
Authors@R: c(person("Wilfried", "Thuiller", role = c("aut")
, email = "wilfried.thuiller@univ-grenoble-alpes.fr"),
person("Damien", "Georges", role = c("aut")),
diff --git a/NAMESPACE b/NAMESPACE
index 81ae0d62..4ca9df7b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -80,6 +80,7 @@ exportClasses(MARS_biomod2_model)
exportClasses(MAXENT_biomod2_model)
exportClasses(MAXNET_biomod2_model)
exportClasses(RF_biomod2_model)
+exportClasses(RFd_biomod2_model)
exportClasses(SRE_biomod2_model)
exportClasses(XGBOOST_biomod2_model)
exportClasses(biomod2_ensemble_model)
@@ -203,6 +204,7 @@ importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(terra,`add<-`)
+importFrom(terra,aggregate)
importFrom(terra,app)
importFrom(terra,as.matrix)
importFrom(terra,as.points)
diff --git a/R/BIOMOD_FormatingData.R b/R/BIOMOD_FormatingData.R
index d51f4f2f..bd1479ef 100644
--- a/R/BIOMOD_FormatingData.R
+++ b/R/BIOMOD_FormatingData.R
@@ -81,6 +81,8 @@
##' If pseudo-absence selection and \code{PA.strategy = 'disk'}, a \code{numeric} defining the
##' maximal distance to presence points used to make the \code{disk} pseudo-absence selection
##' (in meters, see Details)
+##' @param PA.fact.aggr (\emph{optional, default} \code{NULL}) \cr
+##' If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution
##' @param PA.user.table (\emph{optional, default} \code{NULL}) \cr
##' If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or
##' \code{data.frame} with as many rows as \code{resp.var} values, as many columns as
@@ -93,6 +95,8 @@
##' @param filter.raster (\emph{optional, default} \code{FALSE}) \cr
##' If \code{expl.var} is of raster type, a \code{logical} value defining whether \code{resp.var}
##' is to be filtered when several points occur in the same raster cell
+##' @param seed.val (\emph{optional, default} \code{NULL}) \cr
+##' An \code{integer} value corresponding to the new seed value to be set
##'
##'
##' @return
@@ -344,9 +348,11 @@ BIOMOD_FormatingData <- function(resp.name,
PA.dist.min = 0,
PA.dist.max = NULL,
PA.sre.quant = 0.025,
+ PA.fact.aggr = NULL,
PA.user.table = NULL,
na.rm = TRUE,
- filter.raster = FALSE)
+ filter.raster = FALSE,
+ seed.val = NULL)
{
.bm_cat(paste0(resp.name, " Data Formating"))
@@ -391,9 +397,11 @@ BIOMOD_FormatingData <- function(resp.name,
PA.dist.min = PA.dist.min,
PA.dist.max = PA.dist.max,
PA.sre.quant = PA.sre.quant,
+ PA.fact.aggr = PA.fact.aggr,
PA.user.table = PA.user.table,
na.rm = na.rm,
- filter.raster = filter.raster)
+ filter.raster = filter.raster,
+ seed.val)
}
.bm_cat("Done")
return(out)
diff --git a/R/BIOMOD_Modeling.R b/R/BIOMOD_Modeling.R
index e6da4b13..ea117f54 100644
--- a/R/BIOMOD_Modeling.R
+++ b/R/BIOMOD_Modeling.R
@@ -16,7 +16,7 @@
##' (\emph{a random number by default})
##' @param models a \code{vector} containing model names to be computed, must be among
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
+##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}
##' @param models.pa (\emph{optional, default} \code{NULL}) \cr
##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
##' are to be used, must be among \code{colnames(bm.format@PA.table)}
@@ -139,6 +139,7 @@
##' (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/})
##' \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}})
##' \item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}})
+##' \item \code{RFd} : Random Forest downsampled (\code{\link[randomForest]{randomForest}})
##' \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}})
##' \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}})
##' }}
@@ -622,7 +623,7 @@ BIOMOD_Modeling <- function(bm.format,
models.switch.off <- NULL
## check if model is supported
- avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
+ avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST')
.fun_testIfIn(TRUE, "models", models, avail.models.list)
## Specific case of one variable with GBM / MAXNET
diff --git a/R/biomod2-package.R b/R/biomod2-package.R
index 040e71e3..f6a55170 100644
--- a/R/biomod2-package.R
+++ b/R/biomod2-package.R
@@ -10,7 +10,8 @@
{
RFver <- read.dcf(file = system.file("DESCRIPTION", package = pkgname), fields = "Version")
mess <- paste(pkgname, RFver, "loaded.\n")
- mess <- paste(mess, "/!\\ New set up for modeling options. We apologize for the trouble ^[*.*]^")
+ mess <- paste(mess, "/!\\ Be careful : note that some BigBoss options have been changed between biomod2 v4.2-6 and previous versions./!\\
+ \n\n Please welcome a new model : RFd. This is a random forest model with a automatic down sampling to make your life easier.\n")
packageStartupMessage(mess)
toLoad <- unique(ModelsTable$package[-which(ModelsTable$package %in% c("MAXENT", "biomod2"))])
diff --git a/R/biomod2_classes_0.R b/R/biomod2_classes_0.R
index 9087fcfc..0dd40197 100644
--- a/R/biomod2_classes_0.R
+++ b/R/biomod2_classes_0.R
@@ -75,7 +75,7 @@ setGeneric("BIOMOD.options.default", def = function(mod, typ, pkg, fun) { standa
.BIOMOD.options.default.check.args <- function(mod, typ, pkg, fun)
{
## check if model is supported
- avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
+ avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST')
.fun_testIfIn(TRUE, "mod", mod, avail.models.list)
## check if type is supported
@@ -305,6 +305,16 @@ setGeneric("BIOMOD.options.dataset",
}
}
if (strategy == "user.defined" && !is.null(user.val)) {
+
+ if ("for_all_datasets" %in% names(user.val)){
+ if (length(names(user.val)) > 1 ){
+ user.val <- user.val["for_all_datasets"]
+ cat("\n\t\t> Only the options defined for 'for_all_datasets' will be taken into account")
+ }
+ user.val <- rep(user.val, length(expected_CVnames))
+ names(user.val) <- expected_CVnames
+ }
+
.fun_testIfIn(TRUE, "names(user.val)", names(user.val), expected_CVnames)
if (length(names(user.val)) != length(expected_CVnames)) {
warning(paste0("Options will be changed only for a subset of datasets ("
@@ -316,7 +326,8 @@ setGeneric("BIOMOD.options.dataset",
}
return(list(strategy = strategy,
- expected_CVnames = expected_CVnames))
+ expected_CVnames = expected_CVnames,
+ user.val = user.val))
}
# .BIOMOD.options.dataset.test <- function(bm.opt)
@@ -445,6 +456,11 @@ setMethod('BIOMOD.options.dataset', signature(strategy = 'character'),
argstmp$mtry = 1
argstmp$type <- "classification"
}
+ if (mod == "RFd") {
+ argstmp[["x"]] = NULL
+ argstmp$mtry = 1
+ argstmp$type <- "classification"
+ }
if (mod == "XGBOOST") { argstmp$nrounds = 4 }
argstmp[["..."]] = NULL
diff --git a/R/biomod2_classes_1.R b/R/biomod2_classes_1.R
index dbb7d362..0e5f4fa4 100644
--- a/R/biomod2_classes_1.R
+++ b/R/biomod2_classes_1.R
@@ -1321,6 +1321,8 @@ setMethod('summary', signature(object = 'BIOMOD.formated.data'),
##' If pseudo-absence selection and \code{PA.strategy = 'disk'}, a \code{numeric} defining the
##' maximal distance to presence points used to make the \code{disk} pseudo-absence selection
##' (in meters, see Details)
+##' @param PA.fact.aggr (\emph{optional, default} \code{NULL}) \cr
+##' If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution
##' @param PA.user.table (\emph{optional, default} \code{NULL}) \cr
##' If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or
##' \code{data.frame} with as many rows as \code{resp.var} values, as many columns as
@@ -1333,6 +1335,8 @@ setMethod('summary', signature(object = 'BIOMOD.formated.data'),
##' @param filter.raster (\emph{optional, default} \code{FALSE}) \cr
##' If \code{env} is of raster type, a \code{logical} value defining whether \code{sp}
##' is to be filtered when several points occur in the same raster cell
+##' @param seed.val (\emph{optional, default} \code{NULL}) \cr
+##' An \code{integer} value corresponding to the new seed value to be set
##'
##' @slot dir.name a \code{character} corresponding to the modeling folder
##' @slot sp.name a \code{character} corresponding to the species name
@@ -1443,15 +1447,17 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'data.frame
, eval.sp = NULL, eval.env = NULL, eval.xy = NULL
, PA.nb.rep = 1, PA.strategy = 'random', PA.nb.absences = NULL
, PA.dist.min = 0, PA.dist.max = NULL
- , PA.sre.quant = 0.025, PA.user.table = NULL
- , na.rm = TRUE, filter.raster = FALSE) {
+ , PA.sre.quant = 0.025, PA.fact.aggr = NULL
+ , PA.user.table = NULL
+ , na.rm = TRUE, filter.raster = FALSE, seed.val = NULL) {
.BIOMOD.formated.data.PA(sp, env, xy, dir.name, sp.name
, eval.sp, eval.env, eval.xy
, PA.nb.rep, PA.strategy, PA.nb.absences
, PA.dist.min, PA.dist.max
- , PA.sre.quant, PA.user.table
+ , PA.sre.quant, PA.fact.aggr, PA.user.table
, na.rm
- , filter.raster = filter.raster)
+ , filter.raster = filter.raster
+ , seed.val)
})
### BIOMOD.formated.data.PA(sp = numeric, env = SpatRaster) -------------------
@@ -1465,15 +1471,17 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster
, eval.sp = NULL, eval.env = NULL, eval.xy = NULL
, PA.nb.rep = 1, PA.strategy = 'random', PA.nb.absences = NULL
, PA.dist.min = 0, PA.dist.max = NULL
- , PA.sre.quant = 0.025, PA.user.table = NULL
- , na.rm = TRUE, filter.raster = FALSE) {
+ , PA.sre.quant = 0.025, PA.fact.aggr = NULL
+ , PA.user.table = NULL
+ , na.rm = TRUE, filter.raster = FALSE, seed.val = NULL) {
.BIOMOD.formated.data.PA(sp, env, xy, dir.name, sp.name
, eval.sp, eval.env, eval.xy
, PA.nb.rep, PA.strategy, PA.nb.absences
, PA.dist.min, PA.dist.max
- , PA.sre.quant, PA.user.table
+ , PA.sre.quant, PA.fact.aggr, PA.user.table
, na.rm
- , filter.raster = filter.raster)
+ , filter.raster = filter.raster
+ , seed.val)
})
### .BIOMOD.formated.data.PA ---------------------------------------------------
@@ -1481,8 +1489,9 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster
, eval.sp = NULL, eval.env = NULL, eval.xy = NULL
, PA.nb.rep = 1, PA.strategy = 'random', PA.nb.absences = NULL
, PA.dist.min = 0, PA.dist.max = NULL
- , PA.sre.quant = 0.025, PA.user.table = NULL
- , na.rm = TRUE, filter.raster = FALSE)
+ , PA.sre.quant = 0.025, PA.fact.aggr = NULL
+ , PA.user.table = NULL
+ , na.rm = TRUE, filter.raster = FALSE, seed.val = NULL)
{
args <- .BIOMOD.formated.data.check.args(sp, env, xy, eval.sp, eval.env, eval.xy, filter.raster)
for (argi in names(args)) { assign(x = argi, value = args[[argi]]) }
@@ -1529,7 +1538,10 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster
sre.quant = PA.sre.quant,
dist.min = PA.dist.min,
dist.max = PA.dist.max,
- user.table = PA.user.table)
+ fact.aggr = PA.fact.aggr,
+ user.table = PA.user.table,
+ seed.val = seed.val)
+
if (!is.null(pa.data.tmp)) {
## Keep same env variable for eval than calib (+ check for factor)
@@ -1581,15 +1593,15 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster
rm(list = 'BFD')
} else {
- cat("\n ! PA selection not done", fill = .Options$width)
- BFDP <- BIOMOD.formated.data(sp = as.vector(values(sp)[,1]),
- env = env,
- xy = xy,
- dir.name = dir.name,
- sp.name = sp.name,
- eval.sp = eval.sp,
- eval.env = eval.env,
- eval.xy = eval.xy)
+ stop("\n ! PA selection not done : \n The PA selection is not possible with the parameters selected." )
+ # BFDP <- BIOMOD.formated.data(sp = as.vector(values(sp)[,1]),
+ # env = env,
+ # xy = xy,
+ # dir.name = dir.name,
+ # sp.name = sp.name,
+ # eval.sp = eval.sp,
+ # eval.env = eval.env,
+ # eval.xy = eval.xy)
}
rm(list = "pa.data.tmp" )
return(BFDP)
diff --git a/R/biomod2_classes_4.R b/R/biomod2_classes_4.R
index b78ec2bf..c199bf12 100644
--- a/R/biomod2_classes_4.R
+++ b/R/biomod2_classes_4.R
@@ -76,6 +76,7 @@ NULL
##' @aliases MAXENT_biomod2_model-class
##' @aliases MAXNET_biomod2_model-class
##' @aliases RF_biomod2_model-class
+##' @aliases RFd_biomod2_model-class
##' @aliases SRE_biomod2_model-class
##' @aliases XGBOOST_biomod2_model-class
##' @author Damien Georges
@@ -117,6 +118,7 @@ NULL
##' \item \code{MAXNET_biomod2_model} : \code{model_class} is
##' \code{MAXNET}
##' \item \code{RF_biomod2_model} : \code{model_class} is \code{RF}
+##' \item \code{RFd_biomod2_model} : \code{model_class} is \code{RFd}
##' \item \code{SRE_biomod2_model} : \code{model_class} is \code{SRE}
##' }
##'
@@ -138,6 +140,7 @@ NULL
##' showClass("MAXENT_biomod2_model")
##' showClass("MAXNET_biomod2_model")
##' showClass("RF_biomod2_model")
+##' showClass("RFd_biomod2_model")
##' showClass("SRE_biomod2_model")
##'
NULL
@@ -275,6 +278,8 @@ setMethod('predict', signature(object = 'biomod2_model'),
##' @aliases predict2.MAXNET_biomod2_model.data.frame
##' @aliases predict2.RF_biomod2_model.SpatRaster
##' @aliases predict2.RF_biomod2_model.data.frame
+##' @aliases predict2.RFd_biomod2_model.SpatRaster
+##' @aliases predict2.RFd_biomod2_model.data.frame
##' @aliases predict2.SRE_biomod2_model.SpatRaster
##' @aliases predict2.SRE_biomod2_model.data.frame
##' @author Remi Patin
@@ -1112,6 +1117,81 @@ setMethod('predict2', signature(object = 'RF_biomod2_model', newdata = "data.fra
}
)
+#----------------------------------------------------------------------------- #
+## 8.10_bis RFd_biomod2_model -----------------------------------------------------
+#----------------------------------------------------------------------------- #
+##' @name RFd_biomod2_model-class
+##' @rdname biomod2_model
+##' @export
+
+setClass('RFd_biomod2_model',
+ representation(),
+ contains = 'biomod2_model',
+ prototype = list(model_class = 'RFd'),
+ validity = function(object) { # check model class
+ if (!inherits(object@model, "randomForest")) { return(FALSE)} else { return(TRUE) }
+ })
+
+##'
+##' @rdname predict2.bm
+##'
+
+
+setMethod('predict2', signature(object = 'RFd_biomod2_model', newdata = "SpatRaster"),
+ function(object, newdata, ...) {
+ pa <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "PA")
+ run <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "run")
+ dataset <- paste0("_", pa, "_", run)
+
+ if (!is.null(object@model_options@args.values[[dataset]]$type) && object@model_options@args.values[[dataset]]$type == "classification") {
+ predfun <- function(object, newdata, mod.name){
+ # new predict command used with terra
+ subset(predict(newdata, model = get_formal_model(object),
+ type = 'prob',
+ wopt = list(names = rep(mod.name,2))),
+ 2)
+ # old predict function used with raster
+ # predict(newdata, model = get_formal_model(object), type = 'prob', index = 2)
+ }
+ } else { #regression case
+ predfun <- function(object, newdata, mod.name){
+ predict(newdata, model = get_formal_model(object),
+ type = 'response',
+ wopt = list(names = rep(mod.name,2)))
+ }
+ }
+
+ # old predict function used with raster
+ # predict(newdata, model = get_formal_model(object), type = 'prob', index = 2)
+ # redirect to predict2.biomod2_model.SpatRaster
+
+ callNextMethod(object, newdata, predfun = predfun, ...)
+
+ }
+)
+
+##' @rdname predict2.bm
+setMethod('predict2', signature(object = 'RFd_biomod2_model', newdata = "data.frame"),
+ function(object, newdata, ...) {
+ pa <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "PA")
+ run <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "run")
+ dataset <- paste0("_", pa, "_", run)
+
+ if (!is.null(object@model_options@args.values[[dataset]]$type) && object@model_options@args.values[[dataset]]$type == "classification") {
+ predfun <- function(object, newdata, not_na_rows) {
+ as.numeric(predict(get_formal_model(object), as.data.frame(newdata[not_na_rows, , drop = FALSE]), type = 'prob')[, '1'])
+ }
+ } else { # regression case
+ predfun <- function(object, newdata, not_na_rows) {
+ as.numeric(predict(get_formal_model(object), as.data.frame(newdata[not_na_rows, , drop = FALSE]), type = 'response'))
+ }
+ }
+ # redirect to predict2.biomod2_model.data.frame
+ callNextMethod(object, newdata, predfun = predfun, ...)
+ }
+)
+
+
#----------------------------------------------------------------------------- #
## 8.11 SRE_biomod2_model ----------------------------------------------------
diff --git a/R/biomod2_data.R b/R/biomod2_data.R
index 94d15114..84b6150a 100644
--- a/R/biomod2_data.R
+++ b/R/biomod2_data.R
@@ -32,14 +32,14 @@
"ModelsTable"
# ModelsTable <- data.frame(model = c('ANN', 'CTA', 'FDA', 'GAM', 'GAM', 'GAM', 'GBM', 'GLM'
-# , 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
+# , 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST')
# , type = 'binary'
# , package = c('nnet', 'rpart', 'mda', 'gam', 'mgcv', 'mgcv', 'gbm', 'stats'
-# , 'earth', 'MAXENT', 'maxnet', 'randomForest', 'biomod2', 'xgboost')
+# , 'earth', 'MAXENT', 'maxnet', 'randomForest','randomForest', 'biomod2', 'xgboost')
# , func = c('nnet', 'rpart', 'fda', 'gam', 'bam', 'gam', 'gbm', 'glm'
-# , 'earth', 'MAXENT', 'maxnet', 'randomForest', 'bm_SRE', 'xgboost')
+# , 'earth', 'MAXENT', 'maxnet', 'randomForest','randomForest', 'bm_SRE', 'xgboost')
# , train = c('avNNet', 'rpart', 'fda', 'gamLoess', 'bam', 'gam', 'gbm', 'glm'
-# , 'earth', 'ENMevaluate', 'maxnet', 'rf', 'bm_SRE', 'xgbTree'))
+# , 'earth', 'ENMevaluate', 'maxnet', 'rf','rf', 'bm_SRE', 'xgbTree'))
# usethis::use_data(ModelsTable, overwrite = TRUE)
# usethis::use_data(ModelsTable, overwrite = TRUE, internal = TRUE)
@@ -55,7 +55,7 @@
#' \item{\code{ANN.binary.nnet.nnet}}{
#' \itemize{
#' \item \code{size = 5}
-#' \item \code{decay = 5}
+#' \item \code{decay = 0.1}
#' \item \code{trace = FALSE}
#' \item \code{rang = 0.1}
#' \item \code{maxit = 200}
@@ -64,7 +64,7 @@
#' \item{\code{CTA.binary.rpart.rpart}}{
#' \itemize{
#' \item \code{method = 'class'}
-#' \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)}
+#' \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)}
#' \item \code{cost = NULL}
#' }
#' }
@@ -128,7 +128,18 @@
#' \itemize{
#' \item \code{type = 'classification'}
#' \item \code{ntree = 500}
-#' \item \code{mtry = NULL}
+#' \item \code{mtry = 2}
+#' \item \code{strata = factor(c(0, 1))}
+#' \item \code{sampsize = NULL}
+#' \item \code{nodesize = 5}
+#' \item \code{maxnodes = NULL}
+#' }
+#' }
+#' \item{\code{RFd.binary.randomForest.randomForest}}{
+#' \itemize{
+#' \item \code{type = 'classification'}
+#' \item \code{ntree = 500}
+#' \item \code{mtry = 2}
#' \item \code{strata = factor(c(0, 1))}
#' \item \code{sampsize = NULL}
#' \item \code{nodesize = 5}
@@ -161,12 +172,12 @@
# bm.opt@options <- c(bm.opt@options, bm.opt_gam2@options, bm.opt_gam3@options)
# bm.opt@options <- bm.opt@options[bm.opt@models]
# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$size = 5 #NULL
-# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$decay = 5
+# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$decay = 0.1
# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$trace = FALSE
# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$rang = 0.1
# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$maxit = 200
# bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$method = "class"
-# bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)
+# bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)
# bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$cost = NULL
# bm.opt@options$FDA.binary.mda.fda@args.values[['_allData_allRun']]$method = "mars"
# bm.opt@options$GAM.binary.mgcv.gam@args.values[['_allData_allRun']]$family = binomial(link = 'logit')
@@ -193,11 +204,18 @@
# bm.opt@options$MAXENT.binary.MAXENT.MAXENT@args.values[['_allData_allRun']]$path_to_maxent.jar = '.'
# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$type = 'classification'
# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$ntree = 500
-# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$mtry = NULL
+# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$mtry = 2
# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$strata = factor(c(0, 1))
# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$sampsize = NULL
# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$nodesize = 5
# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$maxnodes = NULL
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$type = 'classification'
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$ntree = 500
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$mtry = 2
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$strata = factor(c(0, 1))
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$sampsize = NULL
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$nodesize = 5
+# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$maxnodes = NULL
# bm.opt@options$SRE.binary.biomod2.bm_SRE@args.values[['_allData_allRun']]$do.extrem = TRUE
# bm.opt@options$XGBOOST.binary.xgboost.xgboost@args.values[['_allData_allRun']]$params = list(max_depth = 2, eta = 1)
# bm.opt@options$XGBOOST.binary.xgboost.xgboost@args.values[['_allData_allRun']]$nthread = 2
@@ -205,7 +223,7 @@
# bm.opt@options$XGBOOST.binary.xgboost.xgboost@args.values[['_allData_allRun']]$objective = "binary:logistic"
# OptionsBigboss <- bm.opt
-# usethis::use_data(OptionsBigboss, overwrite = TRUE, internal = TRUE)
+# usethis::use_data(OptionsBigboss, overwrite = TRUE, internal = T)
# usethis::use_data(OptionsBigboss, ModelsTable, overwrite = TRUE, internal = TRUE)
diff --git a/R/bm_ModelingOptions.R b/R/bm_ModelingOptions.R
index c9f0c4be..c21e4ef5 100644
--- a/R/bm_ModelingOptions.R
+++ b/R/bm_ModelingOptions.R
@@ -11,7 +11,7 @@
##' \code{binary}, \code{binary.PA}, \code{abundance}, \code{compositional}
##' @param models a \code{vector} containing model names to be computed, must be among
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
+##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}
##' @param strategy a \code{character} corresponding to the method to select models' parameters
##' values, must be either \code{default}, \code{bigboss}, \code{user.defined}, \code{tuned}
##' @param user.val (\emph{optional, default} \code{NULL}) \cr
@@ -54,6 +54,9 @@
##' function}
##' }
##'
+##' To define the same options for all datasets of a model, you can provide these options as a list in
+##' user.val with the names "for_all_datasets".
+##'
##' @note \code{MAXENT} being the only external model (not called through a \code{R} package),
##' default parameters, and their values, are the following :
##'
@@ -239,7 +242,7 @@
bm_ModelingOptions <- function(data.type
, models = c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM'
- , 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
+ , 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST')
, strategy, user.val = NULL, user.base = "bigboss"
, bm.format = NULL, calib.lines = NULL)
{
@@ -321,7 +324,7 @@ bm_ModelingOptions <- function(data.type
## check if model is supported
avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GAM.gam.gam', 'GAM.mgcv.bam', 'GAM.mgcv.gam'
- , 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
+ , 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST')
.fun_testIfIn(TRUE, "models", models, avail.models.list)
if (length(grep('GAM', models)) > 1) {
stop("Only one GAM model can be activated. Please choose betwen 'GAM', 'GAM.gam.gam', 'GAM.mgcv.bam' or 'GAM.mgcv.gam'")
@@ -357,7 +360,7 @@ bm_ModelingOptions <- function(data.type
}
.fun_testIfInherits(TRUE, "calib.lines", calib.lines, c("matrix"))
- expected_CVnames <- c(paste0("_allData_RUN", seq_len(ncol(calib.lines))), "_allData_allRun")
+ expected_CVnames <- c(paste0("_allData_RUN", seq_len(ncol(calib.lines))), "_allData_allRun", "for_all_datasets")
if (inherits(bm.format, "BIOMOD.formated.data.PA")) {
expected_CVnames <- c(expected_CVnames
, sapply(1:ncol(bm.format@PA.table)
diff --git a/R/bm_PseudoAbsences.R b/R/bm_PseudoAbsences.R
index b9c22f68..88aadb3a 100644
--- a/R/bm_PseudoAbsences.R
+++ b/R/bm_PseudoAbsences.R
@@ -38,6 +38,8 @@
##' @param dist.max (\emph{optional, default} \code{NULL}) \cr
##' If \code{strategy = 'disk'}, a \code{numeric} defining the maximal distance to presence points
##' used to make the \code{disk} pseudo-absence selection (in meters)
+##' @param fact.aggr (\emph{optional, default} \code{NULL}) \cr
+##' If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution
##' @param user.table (\emph{optional, default} \code{NULL}) \cr
##' If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} with as many rows as
##' \code{resp.var} values, as many columns as \code{nb.rep}, and containing \code{TRUE} or
@@ -46,6 +48,8 @@
##'
##' @param \ldots (\emph{optional, one or several of the above arguments depending on the selected
##' method})
+##' @param seed.val (\emph{optional, default} \code{NULL}) \cr
+##' An \code{integer} value corresponding to the new seed value to be set
##'
##'
##' @return
@@ -188,7 +192,7 @@
##'
##'
##' @importFrom foreach foreach %do%
-##' @importFrom terra rast vect freq spatSample values extract
+##' @importFrom terra rast vect freq spatSample values extract aggregate
##' @importFrom utils packageVersion
##'
##' @export
@@ -198,11 +202,12 @@
bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random', nb.absences = NULL
- , sre.quant = 0, dist.min = 0, dist.max = NULL, user.table = NULL)
+ , sre.quant = 0, dist.min = 0, dist.max = NULL, fact.aggr = NULL
+ , user.table = NULL, seed.val = NULL)
{
## 0. Check arguments ---------------------------------------------------------------------------
args <- .bm_PseudoAbsences.check.args(resp.var, expl.var, nb.rep, strategy, nb.absences
- , sre.quant, dist.min, dist.max, user.table)
+ , sre.quant, dist.min, dist.max, user.table,seed.val)
for (argi in names(args)) { assign(x = argi, value = args[[argi]]) }
rm(args)
@@ -213,9 +218,9 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random
if (length(nb.absences) == 1) {
out <- switch(strategy,
user.defined = bm_PseudoAbsences_user.defined(resp.var, expl.var, user.table),
- random = bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep),
+ random = bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep, fact.aggr),
sre = bm_PseudoAbsences_sre(resp.var, expl.var, sre.quant, nb.absences, nb.rep),
- disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep))
+ disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep, fact.aggr))
} else if (length(nb.absences) == nb.rep) {
out.list = foreach(i.abs = unique(nb.absences)) %do%
{
@@ -224,9 +229,9 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random
out <- switch(strategy,
user.defined = bm_PseudoAbsences_user.defined(resp.var, expl.var, user.table),
- random = bm_PseudoAbsences_random(resp.var, expl.var, i.abs, length(i.rep)),
+ random = bm_PseudoAbsences_random(resp.var, expl.var, i.abs, length(i.rep), fact.aggr),
sre = bm_PseudoAbsences_sre(resp.var, expl.var, sre.quant, i.abs, length(i.rep)),
- disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, i.abs, length(i.rep)))
+ disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, i.abs, length(i.rep), fact.aggr))
## CASE where all available cells have been selected :
## give back only one dataset, even if several were asked
@@ -324,7 +329,8 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random
# Argument Check --------------------------------------------------------------
-.bm_PseudoAbsences.check.args <- function(resp.var, expl.var, nb.rep, strategy, nb.absences, sre.quant, dist.min, dist.max, user.table)
+.bm_PseudoAbsences.check.args <- function(resp.var, expl.var, nb.rep, strategy, nb.absences
+ , sre.quant, dist.min, dist.max, user.table, seed.val)
{
cat('\n\nChecking Pseudo-absence selection arguments...\n')
## 1. Check resp.var argument -----------------------------------------------
@@ -422,6 +428,11 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random
}
}
+ ## 8. Set the seed (if needed) ---------------------------------------------
+ if (!is.null(seed.val)) {
+ set.seed(seed.val)
+ }
+
return(list(resp.var = resp.var,
expl.var = expl.var,
nb.rep = nb.rep,
@@ -546,7 +557,7 @@ setGeneric("bm_PseudoAbsences_random",
##'
setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatVector"),
- function(resp.var, expl.var, nb.absences, nb.rep)
+ function(resp.var, expl.var, nb.absences, nb.rep, fact.aggr)
{
cat("\n > random pseudo absences selection")
@@ -587,7 +598,7 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatVector"),
env = as.data.frame(expl.var),
pa.tab = pa.tab))
} else {
- cat("\nUnsupported case yet!")
+ cat("\n Unsupported case yet!")
return(NULL)
}
})
@@ -600,7 +611,7 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatVector"),
##'
setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatRaster"),
- function(resp.var, expl.var, nb.absences, nb.rep)
+ function(resp.var, expl.var, nb.absences, nb.rep, fact.aggr)
{
cat("\n > random pseudo absences selection")
# 1. Check if NA are present in resp.var observations or not to determine which dataset to use
@@ -635,6 +646,10 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatRaster"),
cat("\n > Pseudo absences are selected in explanatory variables")
# create a mask containing all not already sampled points (presences and absences)
+ if (!is.null(fact.aggr)) {
+ expl.var <- terra::aggregate(expl.var, fact = fact.aggr)
+ }
+
## the area we want to sample
mask.out <- mask.env <- .get_data_mask(expl.var, value.out = -1)
# add presences and true absences in our mask
@@ -642,6 +657,7 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatRaster"),
mask.env[in.cells] <- NA
mask.out[in.cells] <- 1
+
# checking of nb candidates
nb.cells <- .get_nb_available_pa_cells(mask.env, PA.flag = -1)
@@ -871,7 +887,7 @@ setGeneric("bm_PseudoAbsences_disk",
##'
setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatVector"),
- function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep) {
+ function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep, fact.aggr) {
cat("\n > Disk pseudo absences selection")
# 1. determining area which can be selected
coor <- crds(resp.var)
@@ -908,11 +924,11 @@ setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatVector"),
##'
##' @rdname bm_PseudoAbsences
##' @export
-##' @importFrom terra rast distance subset mask crds cellFromXY extract
+##' @importFrom terra rast distance subset mask crds cellFromXY extract aggregate
##'
setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatRaster"),
- function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep)
+ function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep, fact.aggr)
{
cat("\n > Disk pseudo absences selection")
@@ -926,7 +942,14 @@ setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatRaster"),
nb.absences, nb.rep)
)
} else {
+
+
cat("\n > Pseudo absences are selected in explanatory variables")
+ cat("\n")
+
+ if (!is.null(fact.aggr)) {
+ expl.var <- terra::aggregate(expl.var, fact = fact.aggr)
+ }
# create a mask
dist.mask <- subset(expl.var, 1)
@@ -951,7 +974,7 @@ setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatRaster"),
names(mask.in) = names(expl.var)
# 2. selecting randomly pseudo absences
- return(bm_PseudoAbsences_random(resp.var, expl.var = mask.in, nb.absences, nb.rep))
+ return(bm_PseudoAbsences_random(resp.var, expl.var = mask.in, nb.absences, nb.rep, fact.aggr = NULL))
}
})
diff --git a/R/bm_RunModelsLoop.R b/R/bm_RunModelsLoop.R
index 81132010..d845bc4a 100644
--- a/R/bm_RunModelsLoop.R
+++ b/R/bm_RunModelsLoop.R
@@ -22,7 +22,7 @@
##' (\emph{a random number by default})
##' @param models a \code{vector} containing model names to be computed, must be among
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
+##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}
##' @param models.pa (\emph{optional, default} \code{NULL}) \cr
##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
##' are to be used, must be among \code{colnames(bm.format@PA.table)}
@@ -49,7 +49,7 @@
##'
##' @param model a \code{character} corresponding to the model name to be computed, must be either
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
+##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}
##' @param run.name a \code{character} corresponding to the model to be run (sp.name + pa.id +
##' run.id)
##' @param dir.name (\emph{optional, default} \code{.}) \cr
@@ -215,7 +215,8 @@ bm_RunModel <- function(model, run.name, dir.name = '.'
opt_name <- grep(model, names(bm.options@options), value = TRUE)
if (length(opt_name) == 1) {
bm.opt <- bm.options@options[[opt_name]]
- } else { stop("pitiprobleum") } ## Should not happen now
+ } else if (model == 'RF'){bm.opt <- bm.options@options[["RF.binary.randomForest.randomForest"]] }
+ else { stop("pitiprobleum") } ## Should not happen now
if (length(grep("GAM", model)) == 1) {
subclass_name <- paste0(bm.opt@model, "_", bm.opt@type, "_", bm.opt@package)
@@ -234,7 +235,9 @@ bm_RunModel <- function(model, run.name, dir.name = '.'
if (model != "MAXENT") { ## ANY MODEL BUT MAXENT ------------------------------------------------
## PRELIMINAR ---------------------------------------------------
- if (model %in% c("ANN", "MARS", "RF") & is.null(bm.opt.val$formula)) {
+
+
+ if (model %in% c("ANN", "MARS", "RF","RFd") & is.null(bm.opt.val$formula)) {
bm.opt.val$formula <- bm_MakeFormula(resp.name = resp_name
, expl.var = head(data_env)
, type = 'simple'
@@ -247,9 +250,17 @@ bm_RunModel <- function(model, run.name, dir.name = '.'
bm.opt.val$strata <- data_mod[calib.lines.vec, , drop = FALSE][ , resp_name]
bm.opt.val$sampsize <- unlist(ifelse(!is.null(bm.opt.val$sampsize), list(bm.opt.val$sampsize), length(data_sp[calib.lines.vec]))) ## TOCHECK !!
}
+ if (model == "RFd" && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") {
+ # defining occurrences as factor for doing classification and not regression in RF
+ data_mod <- data_mod %>% mutate_at(resp_name, factor)
+ bm.opt.val$strata <- data_mod[calib.lines.vec, , drop = FALSE][ , resp_name]
+ nb_presences <- summary(data_mod[calib.lines.vec,resp_name])[["1"]]
+ bm.opt.val$sampsize <- unlist(ifelse(!is.null(bm.opt.val$sampsize), list(bm.opt.val$sampsize), list(c("0" =nb_presences,"1" =nb_presences))))
+ bm.opt.val$replace <- unlist(ifelse(!is.null(bm.opt.val$replace), list(bm.opt.val$replace), TRUE))
+ }
## FILL data parameter ------------------------------------------
- if (model %in% c("ANN", "CTA", "FDA", "GAM", "GBM", "MARS", "RF")) {
+ if (model %in% c("ANN", "CTA", "FDA", "GAM", "GBM", "MARS", "RF","RFd")) {
bm.opt.val$data <- data_mod[calib.lines.vec, , drop = FALSE]
} else if (model == "GLM") {
bm.opt.val$data <- cbind(data_mod[calib.lines.vec, , drop = FALSE],
@@ -281,7 +292,7 @@ bm_RunModel <- function(model, run.name, dir.name = '.'
}
## REORGANIZE order of parameters -------------------------------
- if (model %in% c("ANN", "MARS", "RF")) {
+ if (model %in% c("ANN", "MARS", "RF","RFd")) {
bm.opt.val <- bm.opt.val[c("formula", "data", names(bm.opt.val)[which(!(names(bm.opt.val) %in% c("formula", "data")))])]
}
if (model %in% c("FDA")) {
@@ -333,7 +344,7 @@ bm_RunModel <- function(model, run.name, dir.name = '.'
}
## POSTLIMINAR --------------------------------------------------
- if (model == "RF" && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") {
+ if (model %in% c("RF","RFd") && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") {
# canceling occurences class modifications
data_mod <- data_mod %>% mutate_at(resp_name, function(.x) {
.x %>% as.character() %>% as.numeric()
diff --git a/R/bm_Tuning.R b/R/bm_Tuning.R
index 8da027c1..d73aeeda 100644
--- a/R/bm_Tuning.R
+++ b/R/bm_Tuning.R
@@ -10,7 +10,7 @@
##'
##' @param model a \code{character} corresponding to the algorithm to be tuned, must be either
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
+##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}
##' @param tuning.fun a \code{character} corresponding to the model function name to be called
##' through \code{\link[caret]{train}} function for tuning parameters (see \code{\link{ModelsTable}}
##' dataset)
@@ -76,6 +76,7 @@
##' \item{MARS}{\code{degree}, \code{nprune}}
##' \item{MAXENT}{\code{algorithm}, \code{parallel}}
##' \item{RF}{\code{mtry}}
+##' \item{RFd}{\code{mtry}}
##' \item{SRE}{\code{quant}}
##' \item{XGBOOST}{\code{nrounds}, \code{max_depth}, \code{eta}, \code{gamma},
##' \code{colsampl_bytree}, \code{min_child_weight}, \code{subsample}}
@@ -202,10 +203,10 @@ bm_Tuning <- function(model,
weights = NULL,
ctrl.train = NULL,
params.train = list(ANN.size = c(2, 4, 6, 8),
- ANN.decay = c(0.001, 0.01, 0.05, 0.1),
+ ANN.decay = c(0.01, 0.05, 0.1),
ANN.bag = FALSE,
FDA.degree = 1:2,
- FDA.nprune = 2:38,
+ FDA.nprune = 2:25,
GAM.select = c(TRUE, FALSE),
GAM.method = c('GCV.Cp', 'GACV.Cp', 'REML', 'P-REML', 'ML', 'P-ML'),
GAM.span = c(0.3, 0.5, 0.7),
@@ -215,10 +216,11 @@ bm_Tuning <- function(model,
GBM.shrinkage = c(0.001, 0.01, 0.1),
GBM.n.minobsinnode = 10,
MARS.degree = 1:2,
- MARS.nprune = 2:max(38, 2 * ncol(bm.format@data.env.var) + 1),
+ MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1),
MAXENT.algorithm = 'maxnet',
MAXENT.parallel = TRUE,
RF.mtry = 1:min(10, ncol(bm.format@data.env.var)),
+ RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)),
SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1),
XGBOOST.nrounds = 50,
XGBOOST.max_depth = 1,
@@ -425,7 +427,7 @@ bm_Tuning <- function(model,
tuning.form <- tuning.grid[which.max(tmp[, metric.eval]), ]
- if (model == "RF") {
+ if (model %in% c("RF","RFd")) {
tuning.form <- data.frame(mtry = tuning.grid[which.max(tmp[, metric.eval]), ])
}
@@ -475,7 +477,7 @@ bm_Tuning <- function(model,
}
}
} else {
- if (model == "RF") { typ.vec = c('simple','quadratic', 'polynomial') }
+ if (model %in% c("RF","RFd")) { typ.vec = c('simple','quadratic', 'polynomial') }
TMP <- foreach (typ = typ.vec, .combine = "rbind") %:%
foreach (intlev = 0:max.intlev, .combine = "rbind") %do%
@@ -494,7 +496,7 @@ bm_Tuning <- function(model,
}
}
argstmp$formula <- TMP[which.max(TMP[, metric.eval]), "formula"]
- if (model %in% c("ANN", "GAM", "GBM", "MARS", "RF")) {
+ if (model %in% c("ANN", "GAM", "GBM", "MARS", "RF","RFd")) {
argstmp$formula <- formula(argstmp$formula)
}
} else {
@@ -563,7 +565,7 @@ bm_Tuning <- function(model,
{
## check model --------------------------------------------------------------
.fun_testIfIn(TRUE, "model", model, c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM"
- , "MARS", "MAXENT", "MAXNET", "RF", "SRE", "XGBOOST"))
+ , "MARS", "MAXENT", "MAXNET", "RF","RFd", "SRE", "XGBOOST"))
## check namespace ----------------------------------------------------------
if (!isNamespaceLoaded("caret")) {
@@ -587,10 +589,10 @@ bm_Tuning <- function(model,
.fun_testIfInherits(TRUE, "bm.format", bm.format, c("BIOMOD.formated.data", "BIOMOD.formated.data.PA"))
## check params.train -------------------------------------------------------
params.train_init = list(ANN.size = c(2, 4, 6, 8),
- ANN.decay = c(0.001, 0.01, 0.05, 0.1),
+ ANN.decay = c(0.01, 0.05, 0.1),
ANN.bag = FALSE,
FDA.degree = 1:2,
- FDA.nprune = 2:38,
+ FDA.nprune = 2:25,
GAM.select = c(TRUE, FALSE),
GAM.method = c('GCV.Cp', 'GACV.Cp', 'REML', 'P-REML', 'ML', 'P-ML'),
GAM.span = c(0.3, 0.5, 0.7),
@@ -600,10 +602,11 @@ bm_Tuning <- function(model,
GBM.shrinkage = c(0.001, 0.01, 0.1),
GBM.n.minobsinnode = 10,
MARS.degree = 1:2,
- MARS.nprune = 2:max(38, 2 * ncol(bm.format@data.env.var) + 1),
+ MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1),
MAXENT.algorithm = 'maxnet',
MAXENT.parallel = TRUE,
RF.mtry = 1:min(10, ncol(bm.format@data.env.var)),
+ RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)),
SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1),
XGBOOST.nrounds = 50,
XGBOOST.max_depth = 1,
@@ -619,6 +622,7 @@ bm_Tuning <- function(model,
}
}
params.train = params.train_init
+
## check evaluation metric --------------------------------------------------
if (model == "MAXENT") {
.fun_testIfIn(TRUE, "metric.eval", metric.eval, c("auc.val.avg", "auc.diff.avg", "or.mtp.avg", "or.10p.avg", "AICc"))
@@ -647,9 +651,10 @@ bm_Tuning <- function(model,
train.params <- all.params[[tuning.fun]]
## get tuning grid through params.train -------------------------------------
tuning.grid <- NULL
- if (model %in% c("ANN", "FDA", "GAM", "GBM", "MARS", "RF", "XGBOOST")) {
+
+ if (model %in% c("ANN", "FDA", "GAM", "GBM", "MARS", "RF", "RFd", "XGBOOST")) {
if (!(model == "GAM")) {
- params.train = params.train[grep(model, names(params.train))]
+ params.train = params.train[grep(paste0(model,"\\."), names(params.train))]
.fun_testIfIn(TRUE, "names(params.train)", names(params.train), paste0(model, ".", train.params$params))
} else if (tuning.fun == "gamLoess"){
params.train = params.train[c('GAM.span', "GAM.degree")]
@@ -663,7 +668,7 @@ bm_Tuning <- function(model,
## get tuning length --------------------------------------------------------
tuning.length <- 1
if (model == "CTA") tuning.length <- 30
- if (model == "RF") tuning.length <- min(30, ncol(bm.format@data.env.var))
+ if (model == "RF" | model == "RFd") tuning.length <- min(30, ncol(bm.format@data.env.var))
## Do formula ---------------------------------------------------------------
if (model %in% c("MAXENT", "MAXNET", "SRE", "XGBOOST") && do.formula == TRUE) {
diff --git a/R/sysdata.rda b/R/sysdata.rda
index 61adfad3..ca3d04d5 100644
Binary files a/R/sysdata.rda and b/R/sysdata.rda differ
diff --git a/README.md b/README.md
index 0c0dec2b..6dca2476 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
[![Cran Version](https://www.r-pkg.org/badges/version/biomod2?color=yellow)](https://cran.r-project.org/package=biomod2)
-[![Github Version](https://img.shields.io/badge/devel%20version-4.2--5--2-blue.svg)](https://github.com/biomodhub/biomod2)
+[![Github Version](https://img.shields.io/badge/devel%20version-4.2--6-blue.svg)](https://github.com/biomodhub/biomod2)
[![Last Commit](https://img.shields.io/github/last-commit/biomodhub/biomod2.svg)](https://github.com/biomodhub/biomod2/commits/master)
[![R-CMD-check](https://github.com/biomodhub/biomod2/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/biomodhub/biomod2/actions/workflows/R-CMD-check.yml)
@@ -49,7 +49,7 @@ install.packages("biomod2", dependencies = TRUE)
-- **Development version** [![v](https://img.shields.io/badge/devel%20version-4.2--5--2-blue.svg)](https://github.com/biomodhub/biomod2) from [biomodhub](https://github.com/biomodhub/biomod2) :
+- **Development version** [![v](https://img.shields.io/badge/devel%20version-4.2--6-blue.svg)](https://github.com/biomodhub/biomod2) from [biomodhub](https://github.com/biomodhub/biomod2) :
```R
library(devtools)
@@ -58,25 +58,22 @@ devtools::install_github("biomodhub/biomod2", dependencies = TRUE)
-### `biomod 4.2-5` - Modeling options & Tuning Update
+### `biomod 4.2-6` - Improved OptionsBigBoss and new model
-**! `biomod2 4.2-5` is now available on CRAN !**
`/!\` Please **feel free to indicate if you notice some strange new behaviors** !
#### What is changed ?
-- modeling options are now automatically retrieved from single models functions, normally allowing the use of all arguments taken into account by these functions
-- tuning has been cleaned up, but keep in mind that it is still a quite long running process
-- in consequence, `BIOMOD_ModelingOptions` and `BIOMOD_Tuning` functions become secundary functions (`bm_ModelingOptions` and `bm_Tuning`), and modeling options can be directly built through `BIOMOD_Modeling` function
+- To improve the models, we made some change in the options for [**`OptionsBigboss`**](https://biomodhub.github.io/biomod2/reference/OptionsBigboss.html). (This only affects the ANN, CTA and RF models.) You can check all your options with the `get_options()` function.
+- To reduce the tuning calculation time, we update the tuning ranges for ANN, FDA and MARS models.
#### What is new ?
-- `ModelsTable` and `OptionsBigboss` datasets (*note that improvement of bigboss modeling options is planned in near future*)
-- 3 new vignettes have been created :
- - [data preparation](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*questions you should ask yourself before modeling*)
- - [cross-validation](https://biomodhub.github.io/biomod2/articles/vignette_crossValidation.html) (*to prepare your own calibration / validation datasets*)
- - [modeling options](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*to help you navigate through the new way of parameterizing single models*)
+- `biomod2` has a new model: **RFd**. It's a Random Forest model with a down-sampling method.
+- You can now define _seed.val_ for `bm_PseudoAbsences()` and `BIOMOD_FormatingData()`.
+- New _fact.aggr_ argument, for pseudo-absences selection with the random and disk methods, allows to reduce the resolution of the environment.
+- Possibility to give the same options for all datasets with _"for_all_datasets"_ in `bm_ModelingOptions()`.
@@ -86,7 +83,26 @@ devtools::install_github("biomodhub/biomod2", dependencies = TRUE)
+
+
+
+### `biomod 4.2-5` - Modeling options & Tuning Update
+
+#### What is changed ?
+
+- modeling options are now automatically retrieved from single models functions, normally allowing the use of all arguments taken into account by these functions
+- tuning has been cleaned up, but keep in mind that it is still a quite long running process
+- in consequence, `BIOMOD_ModelingOptions` and `BIOMOD_Tuning` functions become secundary functions (`bm_ModelingOptions` and `bm_Tuning`), and modeling options can be directly built through `BIOMOD_Modeling` function
+
+#### What is new ?
+- `ModelsTable` and `OptionsBigboss` datasets (*note that improvement of bigboss modeling options is planned in near future*)
+- 3 new vignettes have been created :
+ - [data preparation](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*questions you should ask yourself before modeling*)
+ - [cross-validation](https://biomodhub.github.io/biomod2/articles/vignette_crossValidation.html) (*to prepare your own calibration / validation datasets*)
+ - [modeling options](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*to help you navigate through the new way of parameterizing single models*)
+
+
### `biomod 4.2` - Terra Update
diff --git a/data/ModelsTable.rda b/data/ModelsTable.rda
index 61adfad3..a2995ad8 100644
Binary files a/data/ModelsTable.rda and b/data/ModelsTable.rda differ
diff --git a/data/OptionsBigboss.rda b/data/OptionsBigboss.rda
index c360dbc4..ca3d04d5 100644
Binary files a/data/OptionsBigboss.rda and b/data/OptionsBigboss.rda differ
diff --git a/docs/404.html b/docs/404.html
index dd978977..7fe67438 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -32,7 +32,7 @@
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 4d0a2198..f5edf5d5 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -17,7 +17,7 @@
diff --git a/docs/articles/news.html b/docs/articles/news.html
index 945f6212..0c2cf331 100644
--- a/docs/articles/news.html
+++ b/docs/articles/news.html
@@ -33,7 +33,7 @@
@@ -145,7 +145,29 @@
OptionsBigboss
have been
+modified (concern only ANN, CTA and RF models)bm_pseudoAbsences
and
+BIOMOD_FormatingData
.bm_ModelingOptions
.ProbDensFunc()
function to package to produce nice
@@ -1210,7 +1232,7 @@ Thuiller W, Georges D, Gueguen M, Engler R, Breiner F, Lafourcade B, Patin R, Blancheteau H (2024). biomod2: Ensemble Platform for Species Distribution Modeling. -R package version 4.2-5-2, https://biomodhub.github.io/biomod2/. +R package version 4.2-6, https://biomodhub.github.io/biomod2/.
@Manual{, title = {biomod2: Ensemble Platform for Species Distribution Modeling}, author = {Wilfried Thuiller and Damien Georges and Maya Gueguen and Robin Engler and Frank Breiner and Bruno Lafourcade and Remi Patin and Helene Blancheteau}, year = {2024}, - note = {R package version 4.2-5-2}, + note = {R package version 4.2-6}, url = {https://biomodhub.github.io/biomod2/}, }diff --git a/docs/index.html b/docs/index.html index 6ed272d6..c86e0a05 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@
biomod 4.2-5
- Modeling options & Tuning Update
+biomod 4.2-6
- Improved OptionsBigBoss and new model
! biomod2 4.2-5
is now available on CRAN !
/!\
Please feel free to indicate if you notice some strange new behaviors !
OptionsBigboss
. (This only affects the ANN, CTA and RF models.) You can check all your options with the get_options()
function.biomod2
has a new model: RFd. It’s a Random Forest model with a down-sampling method.bm_PseudoAbsences()
and BIOMOD_FormatingData()
.bm_ModelingOptions()
.+
+biomod 4.2-5
- Modeling options & Tuning Update
+BIOMOD_ModelingOptions
and BIOMOD_Tuning
functions become secundary functions (bm_ModelingOptions
and bm_Tuning
), and modeling options can be directly built through BIOMOD_Modeling
functionbiomod 4.2
- Terra Update
/!\
Package fresh start… meaning some changes in function names and parameters. We apologize for the trouble >{o.o}<
Sorry for the inconvenience, and please feel free to indicate if you notice some strange new behaviors !
ggplot2
@@ -324,7 +351,7 @@ BIOMOD_FormatingData()
output object class (with pseudo-absence
PA.dist.min = 0,
PA.dist.max = NULL,
PA.sre.quant = 0.025,
+ PA.fact.aggr = NULL,
PA.user.table = NULL,
na.rm = TRUE,
- filter.raster = FALSE
+ filter.raster = FALSE,
+ seed.val = NULL
)
# S4 method for numeric,SpatRaster
@@ -153,9 +155,11 @@ BIOMOD_FormatingData()
output object class (with pseudo-absence
PA.dist.min = 0,
PA.dist.max = NULL,
PA.sre.quant = 0.025,
+ PA.fact.aggr = NULL,
PA.user.table = NULL,
na.rm = TRUE,
- filter.raster = FALSE
+ filter.raster = FALSE,
+ seed.val = NULL
)(optional, default NULL
)
+If strategy = 'random'
or strategy = 'disk'
, a integer
defining the factor of aggregation to reduce the resolution
(optional, default NULL
)
If pseudo-absence selection and PA.strategy = 'user.defined'
, a matrix
or
@@ -287,6 +296,11 @@
env
is of raster type, a logical
value defining whether sp
is to be filtered when several points occur in the same raster cell(optional, default NULL
)
+An integer
value corresponding to the new seed value to be set
(optional, default NULL
)
+If strategy = 'random'
or strategy = 'disk'
, a integer
defining the factor of aggregation to reduce the resolution
(optional, default NULL
)
If pseudo-absence selection and PA.strategy = 'user.defined'
, a matrix
or
@@ -268,6 +275,11 @@
expl.var
is of raster type, a logical
value defining whether resp.var
is to be filtered when several points occur in the same raster cell(optional, default NULL
)
+An integer
value corresponding to the new seed value to be set
a vector
containing model names to be computed, must be among
ANN
, CTA
, FDA
, GAM
, GBM
, GLM
, MARS
,
-MAXENT
, MAXNET
, RF
, SRE
, XGBOOST
MAXENT
, MAXNET
, RF
, RFd
, SRE
, XGBOOST
MAXNET
: Maximum Entropy (maxnet
)
RF
: Random Forest (randomForest
)
RFd
: Random Forest downsampled (randomForest
)
SRE
: Surface Range Envelop or usually called BIOCLIM (bm_SRE
)
XGBOOST
: eXtreme Gradient Boosting Training (xgboost
)
A BIOMOD.models.options
object with some changed values :
ANN.binary.nnet.nnet
size = 5
decay = 5
decay = 0.1
trace = FALSE
rang = 0.1
maxit = 200
CTA.binary.rpart.rpart
method = 'class'
control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)
control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)
cost = NULL
RF.binary.randomForest.randomForest
type = 'classification'
ntree = 500
mtry = NULL
mtry = 2
strata = factor(c(0, 1))
sampsize = NULL
nodesize = 5
maxnodes = NULL
RFd.binary.randomForest.randomForest
type = 'classification'
ntree = 500
mtry = 2
strata = factor(c(0, 1))
sampsize = NULL
nodesize = 5
MAXNET_biomod2_model
: model_class
is
MAXNET
RF_biomod2_model
: model_class
is RF
RFd_biomod2_model
: model_class
is RFd
SRE_biomod2_model
: model_class
is SRE
bm_ModelingOptions(
data.type,
models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXENT", "MAXNET", "RF",
- "SRE", "XGBOOST"),
+ "RFd", "SRE", "XGBOOST"),
strategy,
user.val = NULL,
user.base = "bigboss",
@@ -134,7 +134,7 @@ Arguments
- models
a vector
containing model names to be computed, must be among
ANN
, CTA
, FDA
, GAM
, GBM
, GLM
, MARS
,
-MAXENT
, MAXNET
, RF
, SRE
, XGBOOST
+MAXENT
, MAXNET
, RF
, RFd
, SRE
, XGBOOST
- strategy
@@ -195,7 +195,9 @@ Details
function
-
To define the same options for all datasets of a model, you can provide these options as a list in +user.val with the names "for_all_datasets".
+MAXENT
being the only external model (not called through a R
package),
diff --git a/docs/reference/bm_PlotEvalMean.html b/docs/reference/bm_PlotEvalMean.html
index 0d7a6595..51461704 100644
--- a/docs/reference/bm_PlotEvalMean.html
+++ b/docs/reference/bm_PlotEvalMean.html
@@ -22,7 +22,7 @@
disk
pseudo-absence selection (in meters)
+(optional, default NULL
)
+If strategy = 'random'
or strategy = 'disk'
, a integer
defining the factor of aggregation to reduce the resolution
(optional, default NULL
)
If strategy = 'user.defined'
, a matrix
or data.frame
with as many rows as
@@ -231,6 +240,11 @@
(optional, default NULL
)
+An integer
value corresponding to the new seed value to be set
(optional, one or several of the above arguments depending on the selected method)
a vector
containing model names to be computed, must be among
ANN
, CTA
, FDA
, GAM
, GBM
, GLM
, MARS
,
-MAXENT
, MAXNET
, RF
, SRE
, XGBOOST
MAXENT
, MAXNET
, RF
, RFd
, SRE
, XGBOOST
a character
corresponding to the model name to be computed, must be either
ANN
, CTA
, FDA
, GAM
, GBM
, GLM
, MARS
,
-MAXENT
, MAXNET
, RF
, SRE
, XGBOOST
MAXENT
, MAXNET
, RF
, RFd
, SRE
, XGBOOST
a character
corresponding to the algorithm to be tuned, must be either
ANN
, CTA
, FDA
, GAM
, GBM
, GLM
, MARS
,
-MAXENT
, MAXNET
, RF
, SRE
, XGBOOST
MAXENT
, MAXNET
, RF
, RFd
, SRE
, XGBOOST
mtry
mtry
quant
biomod2
# S4 method for RF_biomod2_model,data.frame
predict2(object, newdata, ...)
+# S4 method for RFd_biomod2_model,SpatRaster
+predict2(object, newdata, ...)
+
+# S4 method for RFd_biomod2_model,data.frame
+predict2(object, newdata, ...)
+
# S4 method for SRE_biomod2_model,SpatRaster
predict2(object, newdata, ...)
diff --git a/man/BIOMOD.formated.data.PA.Rd b/man/BIOMOD.formated.data.PA.Rd
index e79be0eb..d2a1d533 100644
--- a/man/BIOMOD.formated.data.PA.Rd
+++ b/man/BIOMOD.formated.data.PA.Rd
@@ -23,9 +23,11 @@
PA.dist.min = 0,
PA.dist.max = NULL,
PA.sre.quant = 0.025,
+ PA.fact.aggr = NULL,
PA.user.table = NULL,
na.rm = TRUE,
- filter.raster = FALSE
+ filter.raster = FALSE,
+ seed.val = NULL
)
\S4method{BIOMOD.formated.data.PA}{numeric,SpatRaster}(
@@ -43,9 +45,11 @@
PA.dist.min = 0,
PA.dist.max = NULL,
PA.sre.quant = 0.025,
+ PA.fact.aggr = NULL,
PA.user.table = NULL,
na.rm = TRUE,
- filter.raster = FALSE
+ filter.raster = FALSE,
+ seed.val = NULL
)
}
\arguments{
@@ -128,6 +132,9 @@ If pseudo-absence selection and \code{PA.strategy = 'sre'}, a \code{numeric} bet
and \code{0.5} defining the half-quantile used to make the \code{sre} pseudo-absence selection
(see Details)}
+\item{PA.fact.aggr}{(\emph{optional, default} \code{NULL}) \cr
+If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution}
+
\item{PA.user.table}{(\emph{optional, default} \code{NULL}) \cr
If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or
\code{data.frame} with as many rows as \code{resp.var} values, as many columns as
@@ -141,6 +148,9 @@ values for explanatory variables should be removed from the analysis or not}
\item{filter.raster}{(\emph{optional, default} \code{FALSE}) \cr
If \code{env} is of raster type, a \code{logical} value defining whether \code{sp}
is to be filtered when several points occur in the same raster cell}
+
+\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr
+An \code{integer} value corresponding to the new seed value to be set}
}
\description{
Class returned by \code{\link{BIOMOD_FormatingData}}, and used by
diff --git a/man/BIOMOD_FormatingData.Rd b/man/BIOMOD_FormatingData.Rd
index f7c6f2cd..a74825d4 100644
--- a/man/BIOMOD_FormatingData.Rd
+++ b/man/BIOMOD_FormatingData.Rd
@@ -19,9 +19,11 @@ BIOMOD_FormatingData(
PA.dist.min = 0,
PA.dist.max = NULL,
PA.sre.quant = 0.025,
+ PA.fact.aggr = NULL,
PA.user.table = NULL,
na.rm = TRUE,
- filter.raster = FALSE
+ filter.raster = FALSE,
+ seed.val = NULL
)
}
\arguments{
@@ -105,6 +107,9 @@ If pseudo-absence selection and \code{PA.strategy = 'sre'}, a \code{numeric} bet
and \code{0.5} defining the half-quantile used to make the \code{sre} pseudo-absence selection
(see Details)}
+\item{PA.fact.aggr}{(\emph{optional, default} \code{NULL}) \cr
+If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution}
+
\item{PA.user.table}{(\emph{optional, default} \code{NULL}) \cr
If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or
\code{data.frame} with as many rows as \code{resp.var} values, as many columns as
@@ -118,6 +123,9 @@ explanatory variables should be removed from the analysis or not}
\item{filter.raster}{(\emph{optional, default} \code{FALSE}) \cr
If \code{expl.var} is of raster type, a \code{logical} value defining whether \code{resp.var}
is to be filtered when several points occur in the same raster cell}
+
+\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr
+An \code{integer} value corresponding to the new seed value to be set}
}
\value{
A \code{\link{BIOMOD.formated.data}} object that can be used to build species distribution
diff --git a/man/BIOMOD_Modeling.Rd b/man/BIOMOD_Modeling.Rd
index d1e4676b..58a1a0ea 100644
--- a/man/BIOMOD_Modeling.Rd
+++ b/man/BIOMOD_Modeling.Rd
@@ -48,7 +48,7 @@ object returned by the \code{\link{BIOMOD_FormatingData}} function}
\item{models}{a \code{vector} containing model names to be computed, must be among
\code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}}
+\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}}
\item{models.pa}{(\emph{optional, default} \code{NULL}) \cr
A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
@@ -198,6 +198,7 @@ metrics (see Details).
(\url{https://biodiversityinformatics.amnh.org/open_source/maxent/})
\item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}})
\item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}})
+ \item \code{RFd} : Random Forest downsampled (\code{\link[randomForest]{randomForest}})
\item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}})
\item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}})
}}
diff --git a/man/OptionsBigboss.Rd b/man/OptionsBigboss.Rd
index 8549fa4e..0a7b5582 100644
--- a/man/OptionsBigboss.Rd
+++ b/man/OptionsBigboss.Rd
@@ -11,7 +11,7 @@ A \code{\link{BIOMOD.models.options}} object with some changed values :
\item{\code{ANN.binary.nnet.nnet}}{
\itemize{
\item \code{size = 5}
- \item \code{decay = 5}
+ \item \code{decay = 0.1}
\item \code{trace = FALSE}
\item \code{rang = 0.1}
\item \code{maxit = 200}
@@ -20,7 +20,7 @@ A \code{\link{BIOMOD.models.options}} object with some changed values :
\item{\code{CTA.binary.rpart.rpart}}{
\itemize{
\item \code{method = 'class'}
- \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)}
+ \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)}
\item \code{cost = NULL}
}
}
@@ -78,7 +78,18 @@ A \code{\link{BIOMOD.models.options}} object with some changed values :
\itemize{
\item \code{type = 'classification'}
\item \code{ntree = 500}
- \item \code{mtry = NULL}
+ \item \code{mtry = 2}
+ \item \code{strata = factor(c(0, 1))}
+ \item \code{sampsize = NULL}
+ \item \code{nodesize = 5}
+ \item \code{maxnodes = NULL}
+ }
+ }
+ \item{\code{RFd.binary.randomForest.randomForest}}{
+ \itemize{
+ \item \code{type = 'classification'}
+ \item \code{ntree = 500}
+ \item \code{mtry = 2}
\item \code{strata = factor(c(0, 1))}
\item \code{sampsize = NULL}
\item \code{nodesize = 5}
diff --git a/man/biomod2_model.Rd b/man/biomod2_model.Rd
index 25add197..e690b50f 100644
--- a/man/biomod2_model.Rd
+++ b/man/biomod2_model.Rd
@@ -14,6 +14,7 @@
\alias{MAXENT_biomod2_model-class}
\alias{MAXNET_biomod2_model-class}
\alias{RF_biomod2_model-class}
+\alias{RFd_biomod2_model-class}
\alias{SRE_biomod2_model-class}
\alias{XGBOOST_biomod2_model-class}
\alias{show,biomod2_model-method}
@@ -43,6 +44,7 @@ Class created by \code{\link{BIOMOD_Modeling}} and \code{\link{bm_RunModel}}
\item \code{MAXNET_biomod2_model} : \code{model_class} is
\code{MAXNET}
\item \code{RF_biomod2_model} : \code{model_class} is \code{RF}
+ \item \code{RFd_biomod2_model} : \code{model_class} is \code{RFd}
\item \code{SRE_biomod2_model} : \code{model_class} is \code{SRE}
}
}
@@ -87,6 +89,7 @@ showClass("MARS_biomod2_model")
showClass("MAXENT_biomod2_model")
showClass("MAXNET_biomod2_model")
showClass("RF_biomod2_model")
+showClass("RFd_biomod2_model")
showClass("SRE_biomod2_model")
}
diff --git a/man/bm_ModelingOptions.Rd b/man/bm_ModelingOptions.Rd
index 1cfbf8cf..8b113963 100644
--- a/man/bm_ModelingOptions.Rd
+++ b/man/bm_ModelingOptions.Rd
@@ -7,7 +7,7 @@
bm_ModelingOptions(
data.type,
models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXENT", "MAXNET", "RF",
- "SRE", "XGBOOST"),
+ "RFd", "SRE", "XGBOOST"),
strategy,
user.val = NULL,
user.base = "bigboss",
@@ -21,7 +21,7 @@ bm_ModelingOptions(
\item{models}{a \code{vector} containing model names to be computed, must be among
\code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}}
+\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}}
\item{strategy}{a \code{character} corresponding to the method to select models' parameters
values, must be either \code{default}, \code{bigboss}, \code{user.defined}, \code{tuned}}
@@ -66,6 +66,9 @@ argument :
\item{tuned}{default parameter values are updated by calling \code{\link{bm_Tuning}}
function}
}
+
+To define the same options for all datasets of a model, you can provide these options as a list in
+user.val with the names "for_all_datasets".
}
\note{
\code{MAXENT} being the only external model (not called through a \code{R} package),
diff --git a/man/bm_PseudoAbsences.Rd b/man/bm_PseudoAbsences.Rd
index 561e1d1f..d9c1b75d 100644
--- a/man/bm_PseudoAbsences.Rd
+++ b/man/bm_PseudoAbsences.Rd
@@ -25,7 +25,9 @@ bm_PseudoAbsences(
sre.quant = 0,
dist.min = 0,
dist.max = NULL,
- user.table = NULL
+ fact.aggr = NULL,
+ user.table = NULL,
+ seed.val = NULL
)
bm_PseudoAbsences_user.defined(resp.var, expl.var, ...)
@@ -36,9 +38,9 @@ bm_PseudoAbsences_user.defined(resp.var, expl.var, ...)
bm_PseudoAbsences_random(resp.var, expl.var, ...)
-\S4method{bm_PseudoAbsences_random}{ANY,SpatVector}(resp.var, expl.var, nb.absences, nb.rep)
+\S4method{bm_PseudoAbsences_random}{ANY,SpatVector}(resp.var, expl.var, nb.absences, nb.rep, fact.aggr)
-\S4method{bm_PseudoAbsences_random}{ANY,SpatRaster}(resp.var, expl.var, nb.absences, nb.rep)
+\S4method{bm_PseudoAbsences_random}{ANY,SpatRaster}(resp.var, expl.var, nb.absences, nb.rep, fact.aggr)
bm_PseudoAbsences_sre(resp.var, expl.var, ...)
@@ -54,7 +56,8 @@ bm_PseudoAbsences_disk(resp.var, expl.var, ...)
dist.min,
dist.max,
nb.absences,
- nb.rep
+ nb.rep,
+ fact.aggr
)
\S4method{bm_PseudoAbsences_disk}{ANY,SpatRaster}(
@@ -63,7 +66,8 @@ bm_PseudoAbsences_disk(resp.var, expl.var, ...)
dist.min,
dist.max,
nb.absences,
- nb.rep
+ nb.rep,
+ fact.aggr
)
}
\arguments{
@@ -99,12 +103,18 @@ used to make the \code{disk} pseudo-absence selection (in meters)}
If \code{strategy = 'disk'}, a \code{numeric} defining the maximal distance to presence points
used to make the \code{disk} pseudo-absence selection (in meters)}
+\item{fact.aggr}{(\emph{optional, default} \code{NULL}) \cr
+If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution}
+
\item{user.table}{(\emph{optional, default} \code{NULL}) \cr
If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} with as many rows as
\code{resp.var} values, as many columns as \code{nb.rep}, and containing \code{TRUE} or
\code{FALSE} values defining which points will be used to build the species distribution
model(s) for each repetition}
+\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr
+An \code{integer} value corresponding to the new seed value to be set}
+
\item{\ldots}{(\emph{optional, one or several of the above arguments depending on the selected
method})}
}
diff --git a/man/bm_RunModelsLoop.Rd b/man/bm_RunModelsLoop.Rd
index de43aa1c..99ca370a 100644
--- a/man/bm_RunModelsLoop.Rd
+++ b/man/bm_RunModelsLoop.Rd
@@ -55,7 +55,7 @@ obtained with the \code{\link{bm_CrossValidation}} function}
\item{models}{a \code{vector} containing model names to be computed, must be among
\code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}}
+\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}}
\item{models.pa}{(\emph{optional, default} \code{NULL}) \cr
A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
@@ -89,7 +89,7 @@ A \code{logical} value defining whether the progress bar is to be rendered or no
\item{model}{a \code{character} corresponding to the model name to be computed, must be either
\code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}}
+\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}}
\item{run.name}{a \code{character} corresponding to the model to be run (sp.name + pa.id +
run.id)}
diff --git a/man/bm_Tuning.Rd b/man/bm_Tuning.Rd
index c9a4d5dc..eb6a7209 100644
--- a/man/bm_Tuning.Rd
+++ b/man/bm_Tuning.Rd
@@ -16,24 +16,24 @@ bm_Tuning(
metric.AIC = "AIC",
weights = NULL,
ctrl.train = NULL,
- params.train = list(ANN.size = c(2, 4, 6, 8), ANN.decay = c(0.001, 0.01, 0.05, 0.1),
- ANN.bag = FALSE, FDA.degree = 1:2, FDA.nprune = 2:38, GAM.select = c(TRUE, FALSE),
- GAM.method = c("GCV.Cp", "GACV.Cp", "REML", "P-REML", "ML", "P-ML"), GAM.span =
- c(0.3, 0.5, 0.7), GAM.degree = 1, GBM.n.trees = c(500, 1000, 2500),
- GBM.interaction.depth = seq(2, 8, by = 3), GBM.shrinkage = c(0.001, 0.01, 0.1),
- GBM.n.minobsinnode = 10, MARS.degree = 1:2, MARS.nprune = 2:max(38, 2 *
- ncol(bm.format@data.env.var) + 1), MAXENT.algorithm = "maxnet",
- MAXENT.parallel
- = TRUE, RF.mtry = 1:min(10, ncol(bm.format@data.env.var)), SRE.quant = c(0, 0.0125,
- 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = 1, XGBOOST.eta = c(0.3,
- 0.4), XGBOOST.gamma = 0, XGBOOST.colsample_bytree = c(0.6, 0.8),
- XGBOOST.min_child_weight = 1, XGBOOST.subsample = 0.5)
+ params.train = list(ANN.size = c(2, 4, 6, 8), ANN.decay = c(0.01, 0.05, 0.1), ANN.bag =
+ FALSE, FDA.degree = 1:2, FDA.nprune = 2:25, GAM.select = c(TRUE, FALSE), GAM.method =
+ c("GCV.Cp", "GACV.Cp", "REML", "P-REML", "ML", "P-ML"), GAM.span = c(0.3, 0.5, 0.7),
+ GAM.degree = 1, GBM.n.trees = c(500, 1000, 2500), GBM.interaction.depth = seq(2, 8,
+ by = 3), GBM.shrinkage = c(0.001, 0.01, 0.1), GBM.n.minobsinnode = 10, MARS.degree =
+ 1:2, MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1), MAXENT.algorithm
+ = "maxnet",
+ MAXENT.parallel = TRUE, RF.mtry = 1:min(10,
+ ncol(bm.format@data.env.var)), RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)),
+ SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth =
+ 1, XGBOOST.eta = c(0.3, 0.4), XGBOOST.gamma = 0, XGBOOST.colsample_bytree = c(0.6,
+ 0.8), XGBOOST.min_child_weight = 1, XGBOOST.subsample = 0.5)
)
}
\arguments{
\item{model}{a \code{character} corresponding to the algorithm to be tuned, must be either
\code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
-\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}}
+\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}}
\item{tuning.fun}{a \code{character} corresponding to the model function name to be called
through \code{\link[caret]{train}} function for tuning parameters (see \code{\link{ModelsTable}}
@@ -110,6 +110,7 @@ Currently, the available parameters to be tuned are the following :
\item{MARS}{\code{degree}, \code{nprune}}
\item{MAXENT}{\code{algorithm}, \code{parallel}}
\item{RF}{\code{mtry}}
+ \item{RFd}{\code{mtry}}
\item{SRE}{\code{quant}}
\item{XGBOOST}{\code{nrounds}, \code{max_depth}, \code{eta}, \code{gamma},
\code{colsampl_bytree}, \code{min_child_weight}, \code{subsample}}
diff --git a/man/predict2.bm.Rd b/man/predict2.bm.Rd
index fe235ca1..b1912b35 100644
--- a/man/predict2.bm.Rd
+++ b/man/predict2.bm.Rd
@@ -24,6 +24,8 @@
\alias{predict2.MAXNET_biomod2_model.data.frame}
\alias{predict2.RF_biomod2_model.SpatRaster}
\alias{predict2.RF_biomod2_model.data.frame}
+\alias{predict2.RFd_biomod2_model.SpatRaster}
+\alias{predict2.RFd_biomod2_model.data.frame}
\alias{predict2.SRE_biomod2_model.SpatRaster}
\alias{predict2.SRE_biomod2_model.data.frame}
\alias{predict2,biomod2_model,SpatRaster-method}
@@ -48,6 +50,8 @@
\alias{predict2,MAXNET_biomod2_model,data.frame-method}
\alias{predict2,RF_biomod2_model,SpatRaster-method}
\alias{predict2,RF_biomod2_model,data.frame-method}
+\alias{predict2,RFd_biomod2_model,SpatRaster-method}
+\alias{predict2,RFd_biomod2_model,data.frame-method}
\alias{predict2,SRE_biomod2_model,SpatRaster-method}
\alias{predict2,SRE_biomod2_model,data.frame-method}
\alias{predict2,XGBOOST_biomod2_model,SpatRaster-method}
@@ -100,6 +104,10 @@ predict2(object, newdata, ...)
\S4method{predict2}{RF_biomod2_model,data.frame}(object, newdata, ...)
+\S4method{predict2}{RFd_biomod2_model,SpatRaster}(object, newdata, ...)
+
+\S4method{predict2}{RFd_biomod2_model,data.frame}(object, newdata, ...)
+
\S4method{predict2}{SRE_biomod2_model,SpatRaster}(object, newdata, ...)
\S4method{predict2}{SRE_biomod2_model,data.frame}(object, newdata, ...)
diff --git a/vignettes/news.Rmd b/vignettes/news.Rmd
index edaaba18..b0ae8dd1 100644
--- a/vignettes/news.Rmd
+++ b/vignettes/news.Rmd
@@ -13,6 +13,18 @@ vignette: >
## Development updates
+### 2024
+
+#### 4.2-6 Devel actuel
+
+- Warning ! Some options for `OptionsBigboss` have been modified (concern only ANN, CTA and RF models)
+- Some changes for the tuning ranges.
+- Add *RFd*: Random Forest with a down-sampling method.
+- Add _seed.val_ for `bm_pseudoAbsences` and `BIOMOD_FormatingData`.
+- Add _fact.aggr_ argument for pseudo-absences selection with the random and disk methods. It allows to reduce the resolution of the environment.
+- Possibility to give the same options for all datasets with _"for_all_datasets"_ in `bm_ModelingOptions`.
+
+
### 2024
##### Bugfix