diff --git a/DESCRIPTION b/DESCRIPTION index d9930599..ce4c6794 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: biomod2 Type: Package Title: Ensemble Platform for Species Distribution Modeling -Version: 4.2-5-2 -Date: 2024-05-31 +Version: 4.2-6 +Date: 2024-06-18 Authors@R: c(person("Wilfried", "Thuiller", role = c("aut") , email = "wilfried.thuiller@univ-grenoble-alpes.fr"), person("Damien", "Georges", role = c("aut")), diff --git a/NAMESPACE b/NAMESPACE index 81ae0d62..4ca9df7b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,6 +80,7 @@ exportClasses(MARS_biomod2_model) exportClasses(MAXENT_biomod2_model) exportClasses(MAXNET_biomod2_model) exportClasses(RF_biomod2_model) +exportClasses(RFd_biomod2_model) exportClasses(SRE_biomod2_model) exportClasses(XGBOOST_biomod2_model) exportClasses(biomod2_ensemble_model) @@ -203,6 +204,7 @@ importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,sd) importFrom(terra,`add<-`) +importFrom(terra,aggregate) importFrom(terra,app) importFrom(terra,as.matrix) importFrom(terra,as.points) diff --git a/R/BIOMOD_FormatingData.R b/R/BIOMOD_FormatingData.R index d51f4f2f..bd1479ef 100644 --- a/R/BIOMOD_FormatingData.R +++ b/R/BIOMOD_FormatingData.R @@ -81,6 +81,8 @@ ##' If pseudo-absence selection and \code{PA.strategy = 'disk'}, a \code{numeric} defining the ##' maximal distance to presence points used to make the \code{disk} pseudo-absence selection ##' (in meters, see Details) +##' @param PA.fact.aggr (\emph{optional, default} \code{NULL}) \cr +##' If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution ##' @param PA.user.table (\emph{optional, default} \code{NULL}) \cr ##' If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or ##' \code{data.frame} with as many rows as \code{resp.var} values, as many columns as @@ -93,6 +95,8 @@ ##' @param filter.raster (\emph{optional, default} \code{FALSE}) \cr ##' If \code{expl.var} is of raster type, a \code{logical} value defining whether \code{resp.var} ##' is to be filtered when several points occur in the same raster cell +##' @param seed.val (\emph{optional, default} \code{NULL}) \cr +##' An \code{integer} value corresponding to the new seed value to be set ##' ##' ##' @return @@ -344,9 +348,11 @@ BIOMOD_FormatingData <- function(resp.name, PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE) + filter.raster = FALSE, + seed.val = NULL) { .bm_cat(paste0(resp.name, " Data Formating")) @@ -391,9 +397,11 @@ BIOMOD_FormatingData <- function(resp.name, PA.dist.min = PA.dist.min, PA.dist.max = PA.dist.max, PA.sre.quant = PA.sre.quant, + PA.fact.aggr = PA.fact.aggr, PA.user.table = PA.user.table, na.rm = na.rm, - filter.raster = filter.raster) + filter.raster = filter.raster, + seed.val) } .bm_cat("Done") return(out) diff --git a/R/BIOMOD_Modeling.R b/R/BIOMOD_Modeling.R index e6da4b13..ea117f54 100644 --- a/R/BIOMOD_Modeling.R +++ b/R/BIOMOD_Modeling.R @@ -16,7 +16,7 @@ ##' (\emph{a random number by default}) ##' @param models a \code{vector} containing model names to be computed, must be among ##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST} +##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST} ##' @param models.pa (\emph{optional, default} \code{NULL}) \cr ##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets ##' are to be used, must be among \code{colnames(bm.format@PA.table)} @@ -139,6 +139,7 @@ ##' (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/}) ##' \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}}) ##' \item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}}) +##' \item \code{RFd} : Random Forest downsampled (\code{\link[randomForest]{randomForest}}) ##' \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}}) ##' \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}}) ##' }} @@ -622,7 +623,7 @@ BIOMOD_Modeling <- function(bm.format, models.switch.off <- NULL ## check if model is supported - avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST') + avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST') .fun_testIfIn(TRUE, "models", models, avail.models.list) ## Specific case of one variable with GBM / MAXNET diff --git a/R/biomod2-package.R b/R/biomod2-package.R index 040e71e3..f6a55170 100644 --- a/R/biomod2-package.R +++ b/R/biomod2-package.R @@ -10,7 +10,8 @@ { RFver <- read.dcf(file = system.file("DESCRIPTION", package = pkgname), fields = "Version") mess <- paste(pkgname, RFver, "loaded.\n") - mess <- paste(mess, "/!\\ New set up for modeling options. We apologize for the trouble ^[*.*]^") + mess <- paste(mess, "/!\\ Be careful : note that some BigBoss options have been changed between biomod2 v4.2-6 and previous versions./!\\ + \n\n Please welcome a new model : RFd. This is a random forest model with a automatic down sampling to make your life easier.\n") packageStartupMessage(mess) toLoad <- unique(ModelsTable$package[-which(ModelsTable$package %in% c("MAXENT", "biomod2"))]) diff --git a/R/biomod2_classes_0.R b/R/biomod2_classes_0.R index 9087fcfc..0dd40197 100644 --- a/R/biomod2_classes_0.R +++ b/R/biomod2_classes_0.R @@ -75,7 +75,7 @@ setGeneric("BIOMOD.options.default", def = function(mod, typ, pkg, fun) { standa .BIOMOD.options.default.check.args <- function(mod, typ, pkg, fun) { ## check if model is supported - avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST') + avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST') .fun_testIfIn(TRUE, "mod", mod, avail.models.list) ## check if type is supported @@ -305,6 +305,16 @@ setGeneric("BIOMOD.options.dataset", } } if (strategy == "user.defined" && !is.null(user.val)) { + + if ("for_all_datasets" %in% names(user.val)){ + if (length(names(user.val)) > 1 ){ + user.val <- user.val["for_all_datasets"] + cat("\n\t\t> Only the options defined for 'for_all_datasets' will be taken into account") + } + user.val <- rep(user.val, length(expected_CVnames)) + names(user.val) <- expected_CVnames + } + .fun_testIfIn(TRUE, "names(user.val)", names(user.val), expected_CVnames) if (length(names(user.val)) != length(expected_CVnames)) { warning(paste0("Options will be changed only for a subset of datasets (" @@ -316,7 +326,8 @@ setGeneric("BIOMOD.options.dataset", } return(list(strategy = strategy, - expected_CVnames = expected_CVnames)) + expected_CVnames = expected_CVnames, + user.val = user.val)) } # .BIOMOD.options.dataset.test <- function(bm.opt) @@ -445,6 +456,11 @@ setMethod('BIOMOD.options.dataset', signature(strategy = 'character'), argstmp$mtry = 1 argstmp$type <- "classification" } + if (mod == "RFd") { + argstmp[["x"]] = NULL + argstmp$mtry = 1 + argstmp$type <- "classification" + } if (mod == "XGBOOST") { argstmp$nrounds = 4 } argstmp[["..."]] = NULL diff --git a/R/biomod2_classes_1.R b/R/biomod2_classes_1.R index dbb7d362..0e5f4fa4 100644 --- a/R/biomod2_classes_1.R +++ b/R/biomod2_classes_1.R @@ -1321,6 +1321,8 @@ setMethod('summary', signature(object = 'BIOMOD.formated.data'), ##' If pseudo-absence selection and \code{PA.strategy = 'disk'}, a \code{numeric} defining the ##' maximal distance to presence points used to make the \code{disk} pseudo-absence selection ##' (in meters, see Details) +##' @param PA.fact.aggr (\emph{optional, default} \code{NULL}) \cr +##' If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution ##' @param PA.user.table (\emph{optional, default} \code{NULL}) \cr ##' If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or ##' \code{data.frame} with as many rows as \code{resp.var} values, as many columns as @@ -1333,6 +1335,8 @@ setMethod('summary', signature(object = 'BIOMOD.formated.data'), ##' @param filter.raster (\emph{optional, default} \code{FALSE}) \cr ##' If \code{env} is of raster type, a \code{logical} value defining whether \code{sp} ##' is to be filtered when several points occur in the same raster cell +##' @param seed.val (\emph{optional, default} \code{NULL}) \cr +##' An \code{integer} value corresponding to the new seed value to be set ##' ##' @slot dir.name a \code{character} corresponding to the modeling folder ##' @slot sp.name a \code{character} corresponding to the species name @@ -1443,15 +1447,17 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'data.frame , eval.sp = NULL, eval.env = NULL, eval.xy = NULL , PA.nb.rep = 1, PA.strategy = 'random', PA.nb.absences = NULL , PA.dist.min = 0, PA.dist.max = NULL - , PA.sre.quant = 0.025, PA.user.table = NULL - , na.rm = TRUE, filter.raster = FALSE) { + , PA.sre.quant = 0.025, PA.fact.aggr = NULL + , PA.user.table = NULL + , na.rm = TRUE, filter.raster = FALSE, seed.val = NULL) { .BIOMOD.formated.data.PA(sp, env, xy, dir.name, sp.name , eval.sp, eval.env, eval.xy , PA.nb.rep, PA.strategy, PA.nb.absences , PA.dist.min, PA.dist.max - , PA.sre.quant, PA.user.table + , PA.sre.quant, PA.fact.aggr, PA.user.table , na.rm - , filter.raster = filter.raster) + , filter.raster = filter.raster + , seed.val) }) ### BIOMOD.formated.data.PA(sp = numeric, env = SpatRaster) ------------------- @@ -1465,15 +1471,17 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster , eval.sp = NULL, eval.env = NULL, eval.xy = NULL , PA.nb.rep = 1, PA.strategy = 'random', PA.nb.absences = NULL , PA.dist.min = 0, PA.dist.max = NULL - , PA.sre.quant = 0.025, PA.user.table = NULL - , na.rm = TRUE, filter.raster = FALSE) { + , PA.sre.quant = 0.025, PA.fact.aggr = NULL + , PA.user.table = NULL + , na.rm = TRUE, filter.raster = FALSE, seed.val = NULL) { .BIOMOD.formated.data.PA(sp, env, xy, dir.name, sp.name , eval.sp, eval.env, eval.xy , PA.nb.rep, PA.strategy, PA.nb.absences , PA.dist.min, PA.dist.max - , PA.sre.quant, PA.user.table + , PA.sre.quant, PA.fact.aggr, PA.user.table , na.rm - , filter.raster = filter.raster) + , filter.raster = filter.raster + , seed.val) }) ### .BIOMOD.formated.data.PA --------------------------------------------------- @@ -1481,8 +1489,9 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster , eval.sp = NULL, eval.env = NULL, eval.xy = NULL , PA.nb.rep = 1, PA.strategy = 'random', PA.nb.absences = NULL , PA.dist.min = 0, PA.dist.max = NULL - , PA.sre.quant = 0.025, PA.user.table = NULL - , na.rm = TRUE, filter.raster = FALSE) + , PA.sre.quant = 0.025, PA.fact.aggr = NULL + , PA.user.table = NULL + , na.rm = TRUE, filter.raster = FALSE, seed.val = NULL) { args <- .BIOMOD.formated.data.check.args(sp, env, xy, eval.sp, eval.env, eval.xy, filter.raster) for (argi in names(args)) { assign(x = argi, value = args[[argi]]) } @@ -1529,7 +1538,10 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster sre.quant = PA.sre.quant, dist.min = PA.dist.min, dist.max = PA.dist.max, - user.table = PA.user.table) + fact.aggr = PA.fact.aggr, + user.table = PA.user.table, + seed.val = seed.val) + if (!is.null(pa.data.tmp)) { ## Keep same env variable for eval than calib (+ check for factor) @@ -1581,15 +1593,15 @@ setMethod('BIOMOD.formated.data.PA', signature(sp = 'numeric', env = 'SpatRaster rm(list = 'BFD') } else { - cat("\n ! PA selection not done", fill = .Options$width) - BFDP <- BIOMOD.formated.data(sp = as.vector(values(sp)[,1]), - env = env, - xy = xy, - dir.name = dir.name, - sp.name = sp.name, - eval.sp = eval.sp, - eval.env = eval.env, - eval.xy = eval.xy) + stop("\n ! PA selection not done : \n The PA selection is not possible with the parameters selected." ) + # BFDP <- BIOMOD.formated.data(sp = as.vector(values(sp)[,1]), + # env = env, + # xy = xy, + # dir.name = dir.name, + # sp.name = sp.name, + # eval.sp = eval.sp, + # eval.env = eval.env, + # eval.xy = eval.xy) } rm(list = "pa.data.tmp" ) return(BFDP) diff --git a/R/biomod2_classes_4.R b/R/biomod2_classes_4.R index b78ec2bf..c199bf12 100644 --- a/R/biomod2_classes_4.R +++ b/R/biomod2_classes_4.R @@ -76,6 +76,7 @@ NULL ##' @aliases MAXENT_biomod2_model-class ##' @aliases MAXNET_biomod2_model-class ##' @aliases RF_biomod2_model-class +##' @aliases RFd_biomod2_model-class ##' @aliases SRE_biomod2_model-class ##' @aliases XGBOOST_biomod2_model-class ##' @author Damien Georges @@ -117,6 +118,7 @@ NULL ##' \item \code{MAXNET_biomod2_model} : \code{model_class} is ##' \code{MAXNET} ##' \item \code{RF_biomod2_model} : \code{model_class} is \code{RF} +##' \item \code{RFd_biomod2_model} : \code{model_class} is \code{RFd} ##' \item \code{SRE_biomod2_model} : \code{model_class} is \code{SRE} ##' } ##' @@ -138,6 +140,7 @@ NULL ##' showClass("MAXENT_biomod2_model") ##' showClass("MAXNET_biomod2_model") ##' showClass("RF_biomod2_model") +##' showClass("RFd_biomod2_model") ##' showClass("SRE_biomod2_model") ##' NULL @@ -275,6 +278,8 @@ setMethod('predict', signature(object = 'biomod2_model'), ##' @aliases predict2.MAXNET_biomod2_model.data.frame ##' @aliases predict2.RF_biomod2_model.SpatRaster ##' @aliases predict2.RF_biomod2_model.data.frame +##' @aliases predict2.RFd_biomod2_model.SpatRaster +##' @aliases predict2.RFd_biomod2_model.data.frame ##' @aliases predict2.SRE_biomod2_model.SpatRaster ##' @aliases predict2.SRE_biomod2_model.data.frame ##' @author Remi Patin @@ -1112,6 +1117,81 @@ setMethod('predict2', signature(object = 'RF_biomod2_model', newdata = "data.fra } ) +#----------------------------------------------------------------------------- # +## 8.10_bis RFd_biomod2_model ----------------------------------------------------- +#----------------------------------------------------------------------------- # +##' @name RFd_biomod2_model-class +##' @rdname biomod2_model +##' @export + +setClass('RFd_biomod2_model', + representation(), + contains = 'biomod2_model', + prototype = list(model_class = 'RFd'), + validity = function(object) { # check model class + if (!inherits(object@model, "randomForest")) { return(FALSE)} else { return(TRUE) } + }) + +##' +##' @rdname predict2.bm +##' + + +setMethod('predict2', signature(object = 'RFd_biomod2_model', newdata = "SpatRaster"), + function(object, newdata, ...) { + pa <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "PA") + run <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "run") + dataset <- paste0("_", pa, "_", run) + + if (!is.null(object@model_options@args.values[[dataset]]$type) && object@model_options@args.values[[dataset]]$type == "classification") { + predfun <- function(object, newdata, mod.name){ + # new predict command used with terra + subset(predict(newdata, model = get_formal_model(object), + type = 'prob', + wopt = list(names = rep(mod.name,2))), + 2) + # old predict function used with raster + # predict(newdata, model = get_formal_model(object), type = 'prob', index = 2) + } + } else { #regression case + predfun <- function(object, newdata, mod.name){ + predict(newdata, model = get_formal_model(object), + type = 'response', + wopt = list(names = rep(mod.name,2))) + } + } + + # old predict function used with raster + # predict(newdata, model = get_formal_model(object), type = 'prob', index = 2) + # redirect to predict2.biomod2_model.SpatRaster + + callNextMethod(object, newdata, predfun = predfun, ...) + + } +) + +##' @rdname predict2.bm +setMethod('predict2', signature(object = 'RFd_biomod2_model', newdata = "data.frame"), + function(object, newdata, ...) { + pa <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "PA") + run <- .extract_modelNamesInfo(object@model_name, obj.type = "mod", info = "run") + dataset <- paste0("_", pa, "_", run) + + if (!is.null(object@model_options@args.values[[dataset]]$type) && object@model_options@args.values[[dataset]]$type == "classification") { + predfun <- function(object, newdata, not_na_rows) { + as.numeric(predict(get_formal_model(object), as.data.frame(newdata[not_na_rows, , drop = FALSE]), type = 'prob')[, '1']) + } + } else { # regression case + predfun <- function(object, newdata, not_na_rows) { + as.numeric(predict(get_formal_model(object), as.data.frame(newdata[not_na_rows, , drop = FALSE]), type = 'response')) + } + } + # redirect to predict2.biomod2_model.data.frame + callNextMethod(object, newdata, predfun = predfun, ...) + } +) + + #----------------------------------------------------------------------------- # ## 8.11 SRE_biomod2_model ---------------------------------------------------- diff --git a/R/biomod2_data.R b/R/biomod2_data.R index 94d15114..84b6150a 100644 --- a/R/biomod2_data.R +++ b/R/biomod2_data.R @@ -32,14 +32,14 @@ "ModelsTable" # ModelsTable <- data.frame(model = c('ANN', 'CTA', 'FDA', 'GAM', 'GAM', 'GAM', 'GBM', 'GLM' -# , 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST') +# , 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST') # , type = 'binary' # , package = c('nnet', 'rpart', 'mda', 'gam', 'mgcv', 'mgcv', 'gbm', 'stats' -# , 'earth', 'MAXENT', 'maxnet', 'randomForest', 'biomod2', 'xgboost') +# , 'earth', 'MAXENT', 'maxnet', 'randomForest','randomForest', 'biomod2', 'xgboost') # , func = c('nnet', 'rpart', 'fda', 'gam', 'bam', 'gam', 'gbm', 'glm' -# , 'earth', 'MAXENT', 'maxnet', 'randomForest', 'bm_SRE', 'xgboost') +# , 'earth', 'MAXENT', 'maxnet', 'randomForest','randomForest', 'bm_SRE', 'xgboost') # , train = c('avNNet', 'rpart', 'fda', 'gamLoess', 'bam', 'gam', 'gbm', 'glm' -# , 'earth', 'ENMevaluate', 'maxnet', 'rf', 'bm_SRE', 'xgbTree')) +# , 'earth', 'ENMevaluate', 'maxnet', 'rf','rf', 'bm_SRE', 'xgbTree')) # usethis::use_data(ModelsTable, overwrite = TRUE) # usethis::use_data(ModelsTable, overwrite = TRUE, internal = TRUE) @@ -55,7 +55,7 @@ #' \item{\code{ANN.binary.nnet.nnet}}{ #' \itemize{ #' \item \code{size = 5} -#' \item \code{decay = 5} +#' \item \code{decay = 0.1} #' \item \code{trace = FALSE} #' \item \code{rang = 0.1} #' \item \code{maxit = 200} @@ -64,7 +64,7 @@ #' \item{\code{CTA.binary.rpart.rpart}}{ #' \itemize{ #' \item \code{method = 'class'} -#' \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)} +#' \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)} #' \item \code{cost = NULL} #' } #' } @@ -128,7 +128,18 @@ #' \itemize{ #' \item \code{type = 'classification'} #' \item \code{ntree = 500} -#' \item \code{mtry = NULL} +#' \item \code{mtry = 2} +#' \item \code{strata = factor(c(0, 1))} +#' \item \code{sampsize = NULL} +#' \item \code{nodesize = 5} +#' \item \code{maxnodes = NULL} +#' } +#' } +#' \item{\code{RFd.binary.randomForest.randomForest}}{ +#' \itemize{ +#' \item \code{type = 'classification'} +#' \item \code{ntree = 500} +#' \item \code{mtry = 2} #' \item \code{strata = factor(c(0, 1))} #' \item \code{sampsize = NULL} #' \item \code{nodesize = 5} @@ -161,12 +172,12 @@ # bm.opt@options <- c(bm.opt@options, bm.opt_gam2@options, bm.opt_gam3@options) # bm.opt@options <- bm.opt@options[bm.opt@models] # bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$size = 5 #NULL -# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$decay = 5 +# bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$decay = 0.1 # bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$trace = FALSE # bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$rang = 0.1 # bm.opt@options$ANN.binary.nnet.nnet@args.values[['_allData_allRun']]$maxit = 200 # bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$method = "class" -# bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25) +# bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10) # bm.opt@options$CTA.binary.rpart.rpart@args.values[['_allData_allRun']]$cost = NULL # bm.opt@options$FDA.binary.mda.fda@args.values[['_allData_allRun']]$method = "mars" # bm.opt@options$GAM.binary.mgcv.gam@args.values[['_allData_allRun']]$family = binomial(link = 'logit') @@ -193,11 +204,18 @@ # bm.opt@options$MAXENT.binary.MAXENT.MAXENT@args.values[['_allData_allRun']]$path_to_maxent.jar = '.' # bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$type = 'classification' # bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$ntree = 500 -# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$mtry = NULL +# bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$mtry = 2 # bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$strata = factor(c(0, 1)) # bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$sampsize = NULL # bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$nodesize = 5 # bm.opt@options$RF.binary.randomForest.randomForest@args.values[['_allData_allRun']]$maxnodes = NULL +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$type = 'classification' +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$ntree = 500 +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$mtry = 2 +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$strata = factor(c(0, 1)) +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$sampsize = NULL +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$nodesize = 5 +# bm.opt@options$RFd.binary.randomForest.randomForest@args.values[['_allData_allRun']]$maxnodes = NULL # bm.opt@options$SRE.binary.biomod2.bm_SRE@args.values[['_allData_allRun']]$do.extrem = TRUE # bm.opt@options$XGBOOST.binary.xgboost.xgboost@args.values[['_allData_allRun']]$params = list(max_depth = 2, eta = 1) # bm.opt@options$XGBOOST.binary.xgboost.xgboost@args.values[['_allData_allRun']]$nthread = 2 @@ -205,7 +223,7 @@ # bm.opt@options$XGBOOST.binary.xgboost.xgboost@args.values[['_allData_allRun']]$objective = "binary:logistic" # OptionsBigboss <- bm.opt -# usethis::use_data(OptionsBigboss, overwrite = TRUE, internal = TRUE) +# usethis::use_data(OptionsBigboss, overwrite = TRUE, internal = T) # usethis::use_data(OptionsBigboss, ModelsTable, overwrite = TRUE, internal = TRUE) diff --git a/R/bm_ModelingOptions.R b/R/bm_ModelingOptions.R index c9f0c4be..c21e4ef5 100644 --- a/R/bm_ModelingOptions.R +++ b/R/bm_ModelingOptions.R @@ -11,7 +11,7 @@ ##' \code{binary}, \code{binary.PA}, \code{abundance}, \code{compositional} ##' @param models a \code{vector} containing model names to be computed, must be among ##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST} +##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST} ##' @param strategy a \code{character} corresponding to the method to select models' parameters ##' values, must be either \code{default}, \code{bigboss}, \code{user.defined}, \code{tuned} ##' @param user.val (\emph{optional, default} \code{NULL}) \cr @@ -54,6 +54,9 @@ ##' function} ##' } ##' +##' To define the same options for all datasets of a model, you can provide these options as a list in +##' user.val with the names "for_all_datasets". +##' ##' @note \code{MAXENT} being the only external model (not called through a \code{R} package), ##' default parameters, and their values, are the following : ##' @@ -239,7 +242,7 @@ bm_ModelingOptions <- function(data.type , models = c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM' - , 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST') + , 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST') , strategy, user.val = NULL, user.base = "bigboss" , bm.format = NULL, calib.lines = NULL) { @@ -321,7 +324,7 @@ bm_ModelingOptions <- function(data.type ## check if model is supported avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GAM.gam.gam', 'GAM.mgcv.bam', 'GAM.mgcv.gam' - , 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST') + , 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST') .fun_testIfIn(TRUE, "models", models, avail.models.list) if (length(grep('GAM', models)) > 1) { stop("Only one GAM model can be activated. Please choose betwen 'GAM', 'GAM.gam.gam', 'GAM.mgcv.bam' or 'GAM.mgcv.gam'") @@ -357,7 +360,7 @@ bm_ModelingOptions <- function(data.type } .fun_testIfInherits(TRUE, "calib.lines", calib.lines, c("matrix")) - expected_CVnames <- c(paste0("_allData_RUN", seq_len(ncol(calib.lines))), "_allData_allRun") + expected_CVnames <- c(paste0("_allData_RUN", seq_len(ncol(calib.lines))), "_allData_allRun", "for_all_datasets") if (inherits(bm.format, "BIOMOD.formated.data.PA")) { expected_CVnames <- c(expected_CVnames , sapply(1:ncol(bm.format@PA.table) diff --git a/R/bm_PseudoAbsences.R b/R/bm_PseudoAbsences.R index b9c22f68..88aadb3a 100644 --- a/R/bm_PseudoAbsences.R +++ b/R/bm_PseudoAbsences.R @@ -38,6 +38,8 @@ ##' @param dist.max (\emph{optional, default} \code{NULL}) \cr ##' If \code{strategy = 'disk'}, a \code{numeric} defining the maximal distance to presence points ##' used to make the \code{disk} pseudo-absence selection (in meters) +##' @param fact.aggr (\emph{optional, default} \code{NULL}) \cr +##' If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution ##' @param user.table (\emph{optional, default} \code{NULL}) \cr ##' If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} with as many rows as ##' \code{resp.var} values, as many columns as \code{nb.rep}, and containing \code{TRUE} or @@ -46,6 +48,8 @@ ##' ##' @param \ldots (\emph{optional, one or several of the above arguments depending on the selected ##' method}) +##' @param seed.val (\emph{optional, default} \code{NULL}) \cr +##' An \code{integer} value corresponding to the new seed value to be set ##' ##' ##' @return @@ -188,7 +192,7 @@ ##' ##' ##' @importFrom foreach foreach %do% -##' @importFrom terra rast vect freq spatSample values extract +##' @importFrom terra rast vect freq spatSample values extract aggregate ##' @importFrom utils packageVersion ##' ##' @export @@ -198,11 +202,12 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random', nb.absences = NULL - , sre.quant = 0, dist.min = 0, dist.max = NULL, user.table = NULL) + , sre.quant = 0, dist.min = 0, dist.max = NULL, fact.aggr = NULL + , user.table = NULL, seed.val = NULL) { ## 0. Check arguments --------------------------------------------------------------------------- args <- .bm_PseudoAbsences.check.args(resp.var, expl.var, nb.rep, strategy, nb.absences - , sre.quant, dist.min, dist.max, user.table) + , sre.quant, dist.min, dist.max, user.table,seed.val) for (argi in names(args)) { assign(x = argi, value = args[[argi]]) } rm(args) @@ -213,9 +218,9 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random if (length(nb.absences) == 1) { out <- switch(strategy, user.defined = bm_PseudoAbsences_user.defined(resp.var, expl.var, user.table), - random = bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep), + random = bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep, fact.aggr), sre = bm_PseudoAbsences_sre(resp.var, expl.var, sre.quant, nb.absences, nb.rep), - disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep)) + disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep, fact.aggr)) } else if (length(nb.absences) == nb.rep) { out.list = foreach(i.abs = unique(nb.absences)) %do% { @@ -224,9 +229,9 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random out <- switch(strategy, user.defined = bm_PseudoAbsences_user.defined(resp.var, expl.var, user.table), - random = bm_PseudoAbsences_random(resp.var, expl.var, i.abs, length(i.rep)), + random = bm_PseudoAbsences_random(resp.var, expl.var, i.abs, length(i.rep), fact.aggr), sre = bm_PseudoAbsences_sre(resp.var, expl.var, sre.quant, i.abs, length(i.rep)), - disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, i.abs, length(i.rep))) + disk = bm_PseudoAbsences_disk(resp.var, expl.var, dist.min, dist.max, i.abs, length(i.rep), fact.aggr)) ## CASE where all available cells have been selected : ## give back only one dataset, even if several were asked @@ -324,7 +329,8 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random # Argument Check -------------------------------------------------------------- -.bm_PseudoAbsences.check.args <- function(resp.var, expl.var, nb.rep, strategy, nb.absences, sre.quant, dist.min, dist.max, user.table) +.bm_PseudoAbsences.check.args <- function(resp.var, expl.var, nb.rep, strategy, nb.absences + , sre.quant, dist.min, dist.max, user.table, seed.val) { cat('\n\nChecking Pseudo-absence selection arguments...\n') ## 1. Check resp.var argument ----------------------------------------------- @@ -422,6 +428,11 @@ bm_PseudoAbsences <- function(resp.var, expl.var, nb.rep = 1, strategy = 'random } } + ## 8. Set the seed (if needed) --------------------------------------------- + if (!is.null(seed.val)) { + set.seed(seed.val) + } + return(list(resp.var = resp.var, expl.var = expl.var, nb.rep = nb.rep, @@ -546,7 +557,7 @@ setGeneric("bm_PseudoAbsences_random", ##' setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatVector"), - function(resp.var, expl.var, nb.absences, nb.rep) + function(resp.var, expl.var, nb.absences, nb.rep, fact.aggr) { cat("\n > random pseudo absences selection") @@ -587,7 +598,7 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatVector"), env = as.data.frame(expl.var), pa.tab = pa.tab)) } else { - cat("\nUnsupported case yet!") + cat("\n Unsupported case yet!") return(NULL) } }) @@ -600,7 +611,7 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatVector"), ##' setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatRaster"), - function(resp.var, expl.var, nb.absences, nb.rep) + function(resp.var, expl.var, nb.absences, nb.rep, fact.aggr) { cat("\n > random pseudo absences selection") # 1. Check if NA are present in resp.var observations or not to determine which dataset to use @@ -635,6 +646,10 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatRaster"), cat("\n > Pseudo absences are selected in explanatory variables") # create a mask containing all not already sampled points (presences and absences) + if (!is.null(fact.aggr)) { + expl.var <- terra::aggregate(expl.var, fact = fact.aggr) + } + ## the area we want to sample mask.out <- mask.env <- .get_data_mask(expl.var, value.out = -1) # add presences and true absences in our mask @@ -642,6 +657,7 @@ setMethod('bm_PseudoAbsences_random', signature(expl.var = "SpatRaster"), mask.env[in.cells] <- NA mask.out[in.cells] <- 1 + # checking of nb candidates nb.cells <- .get_nb_available_pa_cells(mask.env, PA.flag = -1) @@ -871,7 +887,7 @@ setGeneric("bm_PseudoAbsences_disk", ##' setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatVector"), - function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep) { + function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep, fact.aggr) { cat("\n > Disk pseudo absences selection") # 1. determining area which can be selected coor <- crds(resp.var) @@ -908,11 +924,11 @@ setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatVector"), ##' ##' @rdname bm_PseudoAbsences ##' @export -##' @importFrom terra rast distance subset mask crds cellFromXY extract +##' @importFrom terra rast distance subset mask crds cellFromXY extract aggregate ##' setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatRaster"), - function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep) + function(resp.var, expl.var, dist.min, dist.max, nb.absences, nb.rep, fact.aggr) { cat("\n > Disk pseudo absences selection") @@ -926,7 +942,14 @@ setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatRaster"), nb.absences, nb.rep) ) } else { + + cat("\n > Pseudo absences are selected in explanatory variables") + cat("\n") + + if (!is.null(fact.aggr)) { + expl.var <- terra::aggregate(expl.var, fact = fact.aggr) + } # create a mask dist.mask <- subset(expl.var, 1) @@ -951,7 +974,7 @@ setMethod('bm_PseudoAbsences_disk', signature(expl.var = "SpatRaster"), names(mask.in) = names(expl.var) # 2. selecting randomly pseudo absences - return(bm_PseudoAbsences_random(resp.var, expl.var = mask.in, nb.absences, nb.rep)) + return(bm_PseudoAbsences_random(resp.var, expl.var = mask.in, nb.absences, nb.rep, fact.aggr = NULL)) } }) diff --git a/R/bm_RunModelsLoop.R b/R/bm_RunModelsLoop.R index 81132010..d845bc4a 100644 --- a/R/bm_RunModelsLoop.R +++ b/R/bm_RunModelsLoop.R @@ -22,7 +22,7 @@ ##' (\emph{a random number by default}) ##' @param models a \code{vector} containing model names to be computed, must be among ##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST} +##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST} ##' @param models.pa (\emph{optional, default} \code{NULL}) \cr ##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets ##' are to be used, must be among \code{colnames(bm.format@PA.table)} @@ -49,7 +49,7 @@ ##' ##' @param model a \code{character} corresponding to the model name to be computed, must be either ##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST} +##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST} ##' @param run.name a \code{character} corresponding to the model to be run (sp.name + pa.id + ##' run.id) ##' @param dir.name (\emph{optional, default} \code{.}) \cr @@ -215,7 +215,8 @@ bm_RunModel <- function(model, run.name, dir.name = '.' opt_name <- grep(model, names(bm.options@options), value = TRUE) if (length(opt_name) == 1) { bm.opt <- bm.options@options[[opt_name]] - } else { stop("pitiprobleum") } ## Should not happen now + } else if (model == 'RF'){bm.opt <- bm.options@options[["RF.binary.randomForest.randomForest"]] } + else { stop("pitiprobleum") } ## Should not happen now if (length(grep("GAM", model)) == 1) { subclass_name <- paste0(bm.opt@model, "_", bm.opt@type, "_", bm.opt@package) @@ -234,7 +235,9 @@ bm_RunModel <- function(model, run.name, dir.name = '.' if (model != "MAXENT") { ## ANY MODEL BUT MAXENT ------------------------------------------------ ## PRELIMINAR --------------------------------------------------- - if (model %in% c("ANN", "MARS", "RF") & is.null(bm.opt.val$formula)) { + + + if (model %in% c("ANN", "MARS", "RF","RFd") & is.null(bm.opt.val$formula)) { bm.opt.val$formula <- bm_MakeFormula(resp.name = resp_name , expl.var = head(data_env) , type = 'simple' @@ -247,9 +250,17 @@ bm_RunModel <- function(model, run.name, dir.name = '.' bm.opt.val$strata <- data_mod[calib.lines.vec, , drop = FALSE][ , resp_name] bm.opt.val$sampsize <- unlist(ifelse(!is.null(bm.opt.val$sampsize), list(bm.opt.val$sampsize), length(data_sp[calib.lines.vec]))) ## TOCHECK !! } + if (model == "RFd" && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") { + # defining occurrences as factor for doing classification and not regression in RF + data_mod <- data_mod %>% mutate_at(resp_name, factor) + bm.opt.val$strata <- data_mod[calib.lines.vec, , drop = FALSE][ , resp_name] + nb_presences <- summary(data_mod[calib.lines.vec,resp_name])[["1"]] + bm.opt.val$sampsize <- unlist(ifelse(!is.null(bm.opt.val$sampsize), list(bm.opt.val$sampsize), list(c("0" =nb_presences,"1" =nb_presences)))) + bm.opt.val$replace <- unlist(ifelse(!is.null(bm.opt.val$replace), list(bm.opt.val$replace), TRUE)) + } ## FILL data parameter ------------------------------------------ - if (model %in% c("ANN", "CTA", "FDA", "GAM", "GBM", "MARS", "RF")) { + if (model %in% c("ANN", "CTA", "FDA", "GAM", "GBM", "MARS", "RF","RFd")) { bm.opt.val$data <- data_mod[calib.lines.vec, , drop = FALSE] } else if (model == "GLM") { bm.opt.val$data <- cbind(data_mod[calib.lines.vec, , drop = FALSE], @@ -281,7 +292,7 @@ bm_RunModel <- function(model, run.name, dir.name = '.' } ## REORGANIZE order of parameters ------------------------------- - if (model %in% c("ANN", "MARS", "RF")) { + if (model %in% c("ANN", "MARS", "RF","RFd")) { bm.opt.val <- bm.opt.val[c("formula", "data", names(bm.opt.val)[which(!(names(bm.opt.val) %in% c("formula", "data")))])] } if (model %in% c("FDA")) { @@ -333,7 +344,7 @@ bm_RunModel <- function(model, run.name, dir.name = '.' } ## POSTLIMINAR -------------------------------------------------- - if (model == "RF" && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") { + if (model %in% c("RF","RFd") && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") { # canceling occurences class modifications data_mod <- data_mod %>% mutate_at(resp_name, function(.x) { .x %>% as.character() %>% as.numeric() diff --git a/R/bm_Tuning.R b/R/bm_Tuning.R index 8da027c1..d73aeeda 100644 --- a/R/bm_Tuning.R +++ b/R/bm_Tuning.R @@ -10,7 +10,7 @@ ##' ##' @param model a \code{character} corresponding to the algorithm to be tuned, must be either ##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST} +##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST} ##' @param tuning.fun a \code{character} corresponding to the model function name to be called ##' through \code{\link[caret]{train}} function for tuning parameters (see \code{\link{ModelsTable}} ##' dataset) @@ -76,6 +76,7 @@ ##' \item{MARS}{\code{degree}, \code{nprune}} ##' \item{MAXENT}{\code{algorithm}, \code{parallel}} ##' \item{RF}{\code{mtry}} +##' \item{RFd}{\code{mtry}} ##' \item{SRE}{\code{quant}} ##' \item{XGBOOST}{\code{nrounds}, \code{max_depth}, \code{eta}, \code{gamma}, ##' \code{colsampl_bytree}, \code{min_child_weight}, \code{subsample}} @@ -202,10 +203,10 @@ bm_Tuning <- function(model, weights = NULL, ctrl.train = NULL, params.train = list(ANN.size = c(2, 4, 6, 8), - ANN.decay = c(0.001, 0.01, 0.05, 0.1), + ANN.decay = c(0.01, 0.05, 0.1), ANN.bag = FALSE, FDA.degree = 1:2, - FDA.nprune = 2:38, + FDA.nprune = 2:25, GAM.select = c(TRUE, FALSE), GAM.method = c('GCV.Cp', 'GACV.Cp', 'REML', 'P-REML', 'ML', 'P-ML'), GAM.span = c(0.3, 0.5, 0.7), @@ -215,10 +216,11 @@ bm_Tuning <- function(model, GBM.shrinkage = c(0.001, 0.01, 0.1), GBM.n.minobsinnode = 10, MARS.degree = 1:2, - MARS.nprune = 2:max(38, 2 * ncol(bm.format@data.env.var) + 1), + MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1), MAXENT.algorithm = 'maxnet', MAXENT.parallel = TRUE, RF.mtry = 1:min(10, ncol(bm.format@data.env.var)), + RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)), SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = 1, @@ -425,7 +427,7 @@ bm_Tuning <- function(model, tuning.form <- tuning.grid[which.max(tmp[, metric.eval]), ] - if (model == "RF") { + if (model %in% c("RF","RFd")) { tuning.form <- data.frame(mtry = tuning.grid[which.max(tmp[, metric.eval]), ]) } @@ -475,7 +477,7 @@ bm_Tuning <- function(model, } } } else { - if (model == "RF") { typ.vec = c('simple','quadratic', 'polynomial') } + if (model %in% c("RF","RFd")) { typ.vec = c('simple','quadratic', 'polynomial') } TMP <- foreach (typ = typ.vec, .combine = "rbind") %:% foreach (intlev = 0:max.intlev, .combine = "rbind") %do% @@ -494,7 +496,7 @@ bm_Tuning <- function(model, } } argstmp$formula <- TMP[which.max(TMP[, metric.eval]), "formula"] - if (model %in% c("ANN", "GAM", "GBM", "MARS", "RF")) { + if (model %in% c("ANN", "GAM", "GBM", "MARS", "RF","RFd")) { argstmp$formula <- formula(argstmp$formula) } } else { @@ -563,7 +565,7 @@ bm_Tuning <- function(model, { ## check model -------------------------------------------------------------- .fun_testIfIn(TRUE, "model", model, c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM" - , "MARS", "MAXENT", "MAXNET", "RF", "SRE", "XGBOOST")) + , "MARS", "MAXENT", "MAXNET", "RF","RFd", "SRE", "XGBOOST")) ## check namespace ---------------------------------------------------------- if (!isNamespaceLoaded("caret")) { @@ -587,10 +589,10 @@ bm_Tuning <- function(model, .fun_testIfInherits(TRUE, "bm.format", bm.format, c("BIOMOD.formated.data", "BIOMOD.formated.data.PA")) ## check params.train ------------------------------------------------------- params.train_init = list(ANN.size = c(2, 4, 6, 8), - ANN.decay = c(0.001, 0.01, 0.05, 0.1), + ANN.decay = c(0.01, 0.05, 0.1), ANN.bag = FALSE, FDA.degree = 1:2, - FDA.nprune = 2:38, + FDA.nprune = 2:25, GAM.select = c(TRUE, FALSE), GAM.method = c('GCV.Cp', 'GACV.Cp', 'REML', 'P-REML', 'ML', 'P-ML'), GAM.span = c(0.3, 0.5, 0.7), @@ -600,10 +602,11 @@ bm_Tuning <- function(model, GBM.shrinkage = c(0.001, 0.01, 0.1), GBM.n.minobsinnode = 10, MARS.degree = 1:2, - MARS.nprune = 2:max(38, 2 * ncol(bm.format@data.env.var) + 1), + MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1), MAXENT.algorithm = 'maxnet', MAXENT.parallel = TRUE, RF.mtry = 1:min(10, ncol(bm.format@data.env.var)), + RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)), SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = 1, @@ -619,6 +622,7 @@ bm_Tuning <- function(model, } } params.train = params.train_init + ## check evaluation metric -------------------------------------------------- if (model == "MAXENT") { .fun_testIfIn(TRUE, "metric.eval", metric.eval, c("auc.val.avg", "auc.diff.avg", "or.mtp.avg", "or.10p.avg", "AICc")) @@ -647,9 +651,10 @@ bm_Tuning <- function(model, train.params <- all.params[[tuning.fun]] ## get tuning grid through params.train ------------------------------------- tuning.grid <- NULL - if (model %in% c("ANN", "FDA", "GAM", "GBM", "MARS", "RF", "XGBOOST")) { + + if (model %in% c("ANN", "FDA", "GAM", "GBM", "MARS", "RF", "RFd", "XGBOOST")) { if (!(model == "GAM")) { - params.train = params.train[grep(model, names(params.train))] + params.train = params.train[grep(paste0(model,"\\."), names(params.train))] .fun_testIfIn(TRUE, "names(params.train)", names(params.train), paste0(model, ".", train.params$params)) } else if (tuning.fun == "gamLoess"){ params.train = params.train[c('GAM.span', "GAM.degree")] @@ -663,7 +668,7 @@ bm_Tuning <- function(model, ## get tuning length -------------------------------------------------------- tuning.length <- 1 if (model == "CTA") tuning.length <- 30 - if (model == "RF") tuning.length <- min(30, ncol(bm.format@data.env.var)) + if (model == "RF" | model == "RFd") tuning.length <- min(30, ncol(bm.format@data.env.var)) ## Do formula --------------------------------------------------------------- if (model %in% c("MAXENT", "MAXNET", "SRE", "XGBOOST") && do.formula == TRUE) { diff --git a/R/sysdata.rda b/R/sysdata.rda index 61adfad3..ca3d04d5 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/README.md b/README.md index 0c0dec2b..6dca2476 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![Cran Version](https://www.r-pkg.org/badges/version/biomod2?color=yellow)](https://cran.r-project.org/package=biomod2) -[![Github Version](https://img.shields.io/badge/devel%20version-4.2--5--2-blue.svg)](https://github.com/biomodhub/biomod2) +[![Github Version](https://img.shields.io/badge/devel%20version-4.2--6-blue.svg)](https://github.com/biomodhub/biomod2) [![Last Commit](https://img.shields.io/github/last-commit/biomodhub/biomod2.svg)](https://github.com/biomodhub/biomod2/commits/master) [![R-CMD-check](https://github.com/biomodhub/biomod2/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/biomodhub/biomod2/actions/workflows/R-CMD-check.yml) @@ -49,7 +49,7 @@ install.packages("biomod2", dependencies = TRUE)
-- **Development version** [![v](https://img.shields.io/badge/devel%20version-4.2--5--2-blue.svg)](https://github.com/biomodhub/biomod2) from [biomodhub](https://github.com/biomodhub/biomod2) : +- **Development version** [![v](https://img.shields.io/badge/devel%20version-4.2--6-blue.svg)](https://github.com/biomodhub/biomod2) from [biomodhub](https://github.com/biomodhub/biomod2) : ```R library(devtools) @@ -58,25 +58,22 @@ devtools::install_github("biomodhub/biomod2", dependencies = TRUE)

-### `biomod 4.2-5` - Modeling options & Tuning Update +### `biomod 4.2-6` - Improved OptionsBigBoss and new model -**! `biomod2 4.2-5` is now available on CRAN !** `/!\` Please **feel free to indicate if you notice some strange new behaviors** ! #### What is changed ? -- modeling options are now automatically retrieved from single models functions, normally allowing the use of all arguments taken into account by these functions -- tuning has been cleaned up, but keep in mind that it is still a quite long running process -- in consequence, `BIOMOD_ModelingOptions` and `BIOMOD_Tuning` functions become secundary functions (`bm_ModelingOptions` and `bm_Tuning`), and modeling options can be directly built through `BIOMOD_Modeling` function +- To improve the models, we made some change in the options for [**`OptionsBigboss`**](https://biomodhub.github.io/biomod2/reference/OptionsBigboss.html). (This only affects the ANN, CTA and RF models.) You can check all your options with the `get_options()` function. +- To reduce the tuning calculation time, we update the tuning ranges for ANN, FDA and MARS models. #### What is new ? -- `ModelsTable` and `OptionsBigboss` datasets (*note that improvement of bigboss modeling options is planned in near future*) -- 3 new vignettes have been created : - - [data preparation](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*questions you should ask yourself before modeling*) - - [cross-validation](https://biomodhub.github.io/biomod2/articles/vignette_crossValidation.html) (*to prepare your own calibration / validation datasets*) - - [modeling options](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*to help you navigate through the new way of parameterizing single models*) +- `biomod2` has a new model: **RFd**. It's a Random Forest model with a down-sampling method. +- You can now define _seed.val_ for `bm_PseudoAbsences()` and `BIOMOD_FormatingData()`. +- New _fact.aggr_ argument, for pseudo-absences selection with the random and disk methods, allows to reduce the resolution of the environment. +- Possibility to give the same options for all datasets with _"for_all_datasets"_ in `bm_ModelingOptions()`.
@@ -86,7 +83,26 @@ devtools::install_github("biomodhub/biomod2", dependencies = TRUE)


+

+ + +### `biomod 4.2-5` - Modeling options & Tuning Update + +#### What is changed ? + +- modeling options are now automatically retrieved from single models functions, normally allowing the use of all arguments taken into account by these functions +- tuning has been cleaned up, but keep in mind that it is still a quite long running process +- in consequence, `BIOMOD_ModelingOptions` and `BIOMOD_Tuning` functions become secundary functions (`bm_ModelingOptions` and `bm_Tuning`), and modeling options can be directly built through `BIOMOD_Modeling` function + +#### What is new ? +- `ModelsTable` and `OptionsBigboss` datasets (*note that improvement of bigboss modeling options is planned in near future*) +- 3 new vignettes have been created : + - [data preparation](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*questions you should ask yourself before modeling*) + - [cross-validation](https://biomodhub.github.io/biomod2/articles/vignette_crossValidation.html) (*to prepare your own calibration / validation datasets*) + - [modeling options](https://biomodhub.github.io/biomod2/articles/vignette_dataPreparation.html) (*to help you navigate through the new way of parameterizing single models*) + +
### `biomod 4.2` - Terra Update diff --git a/data/ModelsTable.rda b/data/ModelsTable.rda index 61adfad3..a2995ad8 100644 Binary files a/data/ModelsTable.rda and b/data/ModelsTable.rda differ diff --git a/data/OptionsBigboss.rda b/data/OptionsBigboss.rda index c360dbc4..ca3d04d5 100644 Binary files a/data/OptionsBigboss.rda and b/data/OptionsBigboss.rda differ diff --git a/docs/404.html b/docs/404.html index dd978977..7fe67438 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ biomod2 - 4.2-5-2 + 4.2-6 diff --git a/docs/articles/index.html b/docs/articles/index.html index 4d0a2198..f5edf5d5 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ biomod2 - 4.2-5-2 + 4.2-6 diff --git a/docs/articles/news.html b/docs/articles/news.html index 945f6212..0c2cf331 100644 --- a/docs/articles/news.html +++ b/docs/articles/news.html @@ -33,7 +33,7 @@ biomod2 - 4.2-5-2 + 4.2-6 @@ -145,7 +145,29 @@

- 2024 + 2024 +

+
+

4.2-6 Devel actuel +

+ +
+
+
+

+ 2024

Bugfix @@ -240,8 +262,8 @@
Bugfix<
-

- 2023 +

+ 2023

4.2-5 Modeling options and Tuning (2023-09-12) @@ -588,8 +610,8 @@

Internal Changes
-

- 2022 +

+ 2022

4.2-1 Bugfix patch @@ -781,7 +803,7 @@

Internal Changes -

4.1-3 +

4.1-3

Bugfix @@ -810,7 +832,7 @@

Internal Changes -

4.1-2 (2022-09-29) +

4.1-2 (2022-09-29)

Major changes @@ -906,7 +928,7 @@
Miscellaneous -

4.1-1 (2022-08-30) +

4.1-1 (2022-08-30)

  • add do.progress parameter (to render or not progress @@ -918,14 +940,14 @@

    4.1-1 (2022-08-30)

-

4.1 (2022-07-12) +

4.1 (2022-07-12)

  • fix bugs following major release 4.0
-

4.0 (2022-03-01) +

4.0 (2022-03-01)

  • MAJOR RELEASE
  • @@ -940,11 +962,11 @@

    4.0 (2022-03-01) -

    - 2021 +

    + 2021

    -

    3.5-3 (2021-11-02) +

    3.5-3 (2021-11-02)

    • clean BIOMOD classes definitions and functions @@ -959,7 +981,7 @@

      3.5-3 (2021-11-02)

    -

    3.5-2 (2021-10-18) +

    3.5-2 (2021-10-18)

    • fix BIOMOD_PresenceOnly function (previously @@ -972,11 +994,11 @@

      3.5-2 (2021-10-18)

    -

    - 2018-2019 +

    + 2018-2019

    -

    3.3-20 (2019-03-05) +

    3.3-20 (2019-03-05)

    • Remove maxent Tsurukoa because not maintained anymore (required by @@ -984,14 +1006,14 @@

      3.3-20 (2019-03-05)

    -

    3.3-18 (2018-07-04) +

    3.3-18 (2018-07-04)

    • fix the gbm multicore issue
    -

    3.3-17 (2018-04-23) +

    3.3-17 (2018-04-23)

    • correct the single presence pseudo-absences generation bug @@ -1000,18 +1022,18 @@

      3.3-17 (2018-04-23)

    -

    - 2016 +

    + 2016

    -

    3.3-6 (2016-01-14) +

    3.3-6 (2016-01-14)

    • add get_predictions function for ensemble models
    -

    3.3-5 (2016-01-04) +

    3.3-5 (2016-01-04)

    • MARS models are now computed throw earth package (was @@ -1023,11 +1045,11 @@

      3.3-5 (2016-01-04)

    -

    - 2015 +

    + 2015

    -

    3.3-4 (2015-11-04) +

    3.3-4 (2015-11-04)

    • update BIOMOD_tuning function (Frank @@ -1035,7 +1057,7 @@

      3.3-4 (2015-11-04)

    -

    3.3-3 (2015-10-27) +

    3.3-3 (2015-10-27)

    • force sampling of each level of factorial variables
    • @@ -1044,7 +1066,7 @@

      3.3-3 (2015-10-27)

    -

    3.3-00 (2015-10-05) +

    3.3-00 (2015-10-05)

    • MAJOR RELEASE
    • @@ -1060,7 +1082,7 @@

      3.3-00 (2015-10-05)

    -

    3.2-00 (2015-07-28) +

    3.2-00 (2015-07-28)

    • add 3 new functions in biomod2 (Frank B. @@ -1078,11 +1100,11 @@

      3.2-00 (2015-07-28)

    -

    - 2014 +

    + 2014

    -

    3.1-59 (2014-10-23) +

    3.1-59 (2014-10-23)

    • add model evaluation scores plotting function
    • @@ -1091,7 +1113,7 @@

      3.1-59 (2014-10-23)

    -

    3.1-53 (2014-08-06) +

    3.1-53 (2014-08-06)

    • new ensemble models names to be more coherent with formal models @@ -1099,7 +1121,7 @@

      3.1-53 (2014-08-06)

    -

    3.1-44 (2014-05-20) +

    3.1-44 (2014-05-20)

    • possibility to use user defined function to influence the way models @@ -1108,7 +1130,7 @@

      3.1-44 (2014-05-20)

    -

    3.1-43 (2014-05-20) +

    3.1-43 (2014-05-20)

    • add of as.data.frame argument for @@ -1117,7 +1139,7 @@

      3.1-43 (2014-05-20)

    -

    3.1-42 (2014-05-19) +

    3.1-42 (2014-05-19)

    • enable ensemble forecasting models selection (thanks to @@ -1126,11 +1148,11 @@

      3.1-42 (2014-05-19)

    -

    - 2013 +

    + 2013

    -

    3.1-17 (2013-10-23) +

    3.1-17 (2013-10-23)

    • add parameter to control amount of memory reserved for @@ -1143,7 +1165,7 @@

      3.1-17 (2013-10-23)

    -

    3.1-1 (2013-09-04) +

    3.1-1 (2013-09-04)

    • limitation of package dependencies
    • @@ -1153,7 +1175,7 @@

      3.1-1 (2013-09-04)

    -

    3.0.2 (2013-07-23) +

    3.0.2 (2013-07-23)

    • new functions to evaluate a-posteriori models quality
    • @@ -1162,7 +1184,7 @@

      3.0.2 (2013-07-23)

    -

    3.0.0 (2013-07-01) +

    3.0.0 (2013-07-01)

    • MAJOR RELEASES
    • @@ -1181,7 +1203,7 @@

      3.0.0 (2013-07-01)

    -

    2.1.37 (2013-06-12) +

    2.1.37 (2013-06-12)

    • change (temporally?) gam default package from mgcv to @@ -1192,7 +1214,7 @@

      2.1.37 (2013-06-12)

    -

    2.1.32 (2013-05-30) +

    2.1.32 (2013-05-30)

    • weights for user defined pseudo-absences are now supported @@ -1202,7 +1224,7 @@

      2.1.32 (2013-05-30)

    -

    2.1.13 (2013-03-06) +

    2.1.13 (2013-03-06)

    • Add ProbDensFunc() function to package to produce nice @@ -1210,7 +1232,7 @@

      2.1.13 (2013-03-06)

    -

    2.1.12 (2013-03-04) +

    2.1.12 (2013-03-04)

    • add rasterVis dependency for nicer biomod2 @@ -1222,7 +1244,7 @@

      2.1.12 (2013-03-04)

    -

    2.1.9 (2013-02-28) +

    2.1.9 (2013-02-28)

    • possibility to indicate manually which data should be used for @@ -1232,7 +1254,7 @@

      2.1.9 (2013-02-28)

    -

    2.1.9 (2013-02-27) +

    2.1.9 (2013-02-27)

    • one var modeling supported (thanks Anne O.)
    • @@ -1241,14 +1263,14 @@

      2.1.9 (2013-02-27)

    -

    2.1.8 (2013-02-25) +

    2.1.8 (2013-02-25)

    • response plot supports now formal models
    -

    2.1.0 (2013-02-21) +

    2.1.0 (2013-02-21)

    • MAJOR RELEASE
    • @@ -1267,7 +1289,7 @@

      2.1.0 (2013-02-21)

    -

    2.0.11 (2013-02-18) +

    2.0.11 (2013-02-18)

    • possibility to consider a user.defined pseudo absences selection @@ -1277,7 +1299,7 @@

      2.0.11 (2013-02-18)

    -

    2.0.9 (2013-02-15) +

    2.0.9 (2013-02-15)

    • automatic save on hard drive of BIOMOD_Projection @@ -1285,7 +1307,7 @@

      2.0.9 (2013-02-15)

    -

    2.0.8 (2013-02-14) +

    2.0.8 (2013-02-14)

    • @@ -1295,14 +1317,14 @@

      2.0.8 (2013-02-14)

    -

    2.0.7 (2013-02-12) +

    2.0.7 (2013-02-12)

    • bug on weights corrected (thanks to Lugi M.)
    -

    2.0.3 (2013-01-18) +

    2.0.3 (2013-01-18)

    • deal with MAXENT categorical variables and categorical @@ -1310,7 +1332,7 @@

      2.0.3 (2013-01-18)

    -

    2.0.0 (2013-01-17) +

    2.0.0 (2013-01-17)

    • MAJOR RELEASE
    • diff --git a/docs/articles/vignette_presentation.html b/docs/articles/vignette_presentation.html index 7275a891..dc3874b8 100644 --- a/docs/articles/vignette_presentation.html +++ b/docs/articles/vignette_presentation.html @@ -33,7 +33,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    diff --git a/docs/authors.html b/docs/authors.html index f5958c24..f6e1cdc4 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    @@ -150,13 +150,13 @@

    Citation

    Thuiller W, Georges D, Gueguen M, Engler R, Breiner F, Lafourcade B, Patin R, Blancheteau H (2024). biomod2: Ensemble Platform for Species Distribution Modeling. -R package version 4.2-5-2, https://biomodhub.github.io/biomod2/. +R package version 4.2-6, https://biomodhub.github.io/biomod2/.

    @Manual{,
       title = {biomod2: Ensemble Platform for Species Distribution Modeling},
       author = {Wilfried Thuiller and Damien Georges and Maya Gueguen and Robin Engler and Frank Breiner and Bruno Lafourcade and Remi Patin and Helene Blancheteau},
       year = {2024},
    -  note = {R package version 4.2-5-2},
    +  note = {R package version 4.2-6},
       url = {https://biomodhub.github.io/biomod2/},
     }
    diff --git a/docs/index.html b/docs/index.html index 6ed272d6..c86e0a05 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    @@ -166,7 +166,7 @@


     library(devtools)
    @@ -174,24 +174,56 @@ 



    -

    - biomod 4.2-5 - Modeling options & Tuning Update +

    + biomod 4.2-6 - Improved OptionsBigBoss and new model

    -

    ! biomod2 4.2-5 is now available on CRAN !

    /!\ Please feel free to indicate if you notice some strange new behaviors !

    What is changed ?

      +
    • To improve the models, we made some change in the options for OptionsBigboss. (This only affects the ANN, CTA and RF models.) You can check all your options with the get_options() function.
    • +
    • To reduce the tuning calculation time, we update the tuning ranges for ANN, FDA and MARS models.
    • +
    +
    +
    +

    + What is new ? +

    +
      +
    • +biomod2 has a new model: RFd. It’s a Random Forest model with a down-sampling method.
    • +
    • You can now define seed.val for bm_PseudoAbsences() and BIOMOD_FormatingData().
    • +
    • New fact.aggr argument, for pseudo-absences selection with the random and disk methods, allows to reduce the resolution of the environment.
    • +
    • Possibility to give the same options for all datasets with “for_all_datasets” in bm_ModelingOptions().
    • +
    +


    +
    +

    +Main workflow

    +
    +




    +



    +
    +
    +
    +

    + biomod 4.2-5 - Modeling options & Tuning Update +

    +
    +

    + What is changed ? +

    +
    • modeling options are now automatically retrieved from single models functions, normally allowing the use of all arguments taken into account by these functions
    • tuning has been cleaned up, but keep in mind that it is still a quite long running process
    • in consequence, BIOMOD_ModelingOptions and BIOMOD_Tuning functions become secundary functions (bm_ModelingOptions and bm_Tuning), and modeling options can be directly built through BIOMOD_Modeling function
    -

    - What is new ? +

    + What is new ?

    • @@ -208,11 +240,6 @@


    -
    -

    -Main workflow

    -
    -




    @@ -220,8 +247,8 @@

    biomod 4.2 - Terra Update

    -

    - What is changed ? +

    + What is changed ?

    • @@ -240,8 +267,8 @@

      /!\ Package fresh start… meaning some changes in function names and parameters. We apologize for the trouble >{o.o}<
      Sorry for the inconvenience, and please feel free to indicate if you notice some strange new behaviors !

      -

      - What is changed ? +

      + What is changed ?

      • all code functions have been cleaned, and old / unused functions have been removed
      • @@ -251,8 +278,8 @@

      -

      - What is new ? +

      + What is new ?

      • plot functions have been re-written with ggplot2 @@ -324,7 +351,7 @@

        Developers

        Dev status

        • Cran Version
        • -
        • Github Version
        • +
        • Github Version
        • Last Commit
        • R-CMD-check
        diff --git a/docs/reference/BIOMOD.formated.data.PA.html b/docs/reference/BIOMOD.formated.data.PA.html index cfb837ad..e7d3dd83 100644 --- a/docs/reference/BIOMOD.formated.data.PA.html +++ b/docs/reference/BIOMOD.formated.data.PA.html @@ -19,7 +19,7 @@ biomod2 - 4.2-5-2 + 4.2-6
      @@ -132,9 +132,11 @@

      BIOMOD_FormatingData() output object class (with pseudo-absence PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE + filter.raster = FALSE, + seed.val = NULL ) # S4 method for numeric,SpatRaster @@ -153,9 +155,11 @@

      BIOMOD_FormatingData() output object class (with pseudo-absence PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE + filter.raster = FALSE, + seed.val = NULL )

    @@ -268,6 +272,11 @@

    Arguments

    (see Details)

    +
    PA.fact.aggr
    +

    (optional, default NULL)
    +If strategy = 'random' or strategy = 'disk', a integer defining the factor of aggregation to reduce the resolution

    + +
    PA.user.table

    (optional, default NULL)
    If pseudo-absence selection and PA.strategy = 'user.defined', a matrix or @@ -287,6 +296,11 @@

    Arguments

    If env is of raster type, a logical value defining whether sp is to be filtered when several points occur in the same raster cell

    + +
    seed.val
    +

    (optional, default NULL)
    +An integer value corresponding to the new seed value to be set

    +

    Slots

    diff --git a/docs/reference/BIOMOD_EnsembleForecasting.html b/docs/reference/BIOMOD_EnsembleForecasting.html index 897229f0..52f14091 100644 --- a/docs/reference/BIOMOD_EnsembleForecasting.html +++ b/docs/reference/BIOMOD_EnsembleForecasting.html @@ -19,7 +19,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    diff --git a/docs/reference/BIOMOD_EnsembleModeling.html b/docs/reference/BIOMOD_EnsembleModeling.html index 8296f0e2..1e315305 100644 --- a/docs/reference/BIOMOD_EnsembleModeling.html +++ b/docs/reference/BIOMOD_EnsembleModeling.html @@ -21,7 +21,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    diff --git a/docs/reference/BIOMOD_FormatingData.html b/docs/reference/BIOMOD_FormatingData.html index b2ed378e..f07247fe 100644 --- a/docs/reference/BIOMOD_FormatingData.html +++ b/docs/reference/BIOMOD_FormatingData.html @@ -20,7 +20,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    @@ -133,9 +133,11 @@

    Format input data, and select pseudo-absences if wanted, for usage in PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE + filter.raster = FALSE, + seed.val = NULL )

@@ -249,6 +251,11 @@

Arguments

(see Details)

+
PA.fact.aggr
+

(optional, default NULL)
+If strategy = 'random' or strategy = 'disk', a integer defining the factor of aggregation to reduce the resolution

+ +
PA.user.table

(optional, default NULL)
If pseudo-absence selection and PA.strategy = 'user.defined', a matrix or @@ -268,6 +275,11 @@

Arguments

If expl.var is of raster type, a logical value defining whether resp.var is to be filtered when several points occur in the same raster cell

+ +
seed.val
+

(optional, default NULL)
+An integer value corresponding to the new seed value to be set

+

Value

diff --git a/docs/reference/BIOMOD_LoadModels.html b/docs/reference/BIOMOD_LoadModels.html index d95012ae..681e688b 100644 --- a/docs/reference/BIOMOD_LoadModels.html +++ b/docs/reference/BIOMOD_LoadModels.html @@ -18,7 +18,7 @@ biomod2 - 4.2-5-2 + 4.2-6
diff --git a/docs/reference/BIOMOD_Modeling.html b/docs/reference/BIOMOD_Modeling.html index f291b37e..5e207f2e 100644 --- a/docs/reference/BIOMOD_Modeling.html +++ b/docs/reference/BIOMOD_Modeling.html @@ -20,7 +20,7 @@ biomod2 - 4.2-5-2 + 4.2-6
@@ -169,7 +169,7 @@

Arguments

models

a vector containing model names to be computed, must be among ANN, CTA, FDA, GAM, GBM, GLM, MARS, -MAXENT, MAXNET, RF, SRE, XGBOOST

+MAXENT, MAXNET, RF, RFd, SRE, XGBOOST

models.pa
@@ -374,6 +374,7 @@

Details

(https://biodiversityinformatics.amnh.org/open_source/maxent/)

  • MAXNET : Maximum Entropy (maxnet)

  • RF : Random Forest (randomForest)

  • +
  • RFd : Random Forest downsampled (randomForest)

  • SRE : Surface Range Envelop or usually called BIOCLIM (bm_SRE)

  • XGBOOST : eXtreme Gradient Boosting Training (xgboost)

  • diff --git a/docs/reference/BIOMOD_Projection.html b/docs/reference/BIOMOD_Projection.html index 628aa5bd..ac9228cd 100644 --- a/docs/reference/BIOMOD_Projection.html +++ b/docs/reference/BIOMOD_Projection.html @@ -19,7 +19,7 @@ biomod2 - 4.2-5-2 + 4.2-6
    diff --git a/docs/reference/OptionsBigboss.html b/docs/reference/OptionsBigboss.html index 7f2c46b7..a494a14b 100644 --- a/docs/reference/OptionsBigboss.html +++ b/docs/reference/OptionsBigboss.html @@ -18,7 +18,7 @@ biomod2 - 4.2-5-2 + 4.2-6 @@ -122,7 +122,7 @@

    Format

    A BIOMOD.models.options object with some changed values :

    ANN.binary.nnet.nnet

    • size = 5

    • -
    • decay = 5

    • +
    • decay = 0.1

    • trace = FALSE

    • rang = 0.1

    • maxit = 200

    • @@ -130,7 +130,7 @@

      Format

      CTA.binary.rpart.rpart

      • method = 'class'

      • -
      • control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)

      • +
      • control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)

      • cost = NULL

      @@ -183,7 +183,17 @@

      Format

      RF.binary.randomForest.randomForest

      • type = 'classification'

      • ntree = 500

      • -
      • mtry = NULL

      • +
      • mtry = 2

      • +
      • strata = factor(c(0, 1))

      • +
      • sampsize = NULL

      • +
      • nodesize = 5

      • +
      • maxnodes = NULL

      • +

      + +
      RFd.binary.randomForest.randomForest
      +

      • type = 'classification'

      • +
      • ntree = 500

      • +
      • mtry = 2

      • strata = factor(c(0, 1))

      • sampsize = NULL

      • nodesize = 5

      • diff --git a/docs/reference/biomod2_model.html b/docs/reference/biomod2_model.html index baeb6878..8bbce12c 100644 --- a/docs/reference/biomod2_model.html +++ b/docs/reference/biomod2_model.html @@ -137,6 +137,7 @@

        Details

      • MAXNET_biomod2_model : model_class is MAXNET

      • RF_biomod2_model : model_class is RF

      • +
      • RFd_biomod2_model : model_class is RFd

      • SRE_biomod2_model : model_class is SRE

      @@ -226,6 +227,7 @@

      Examples

      showClass("MAXENT_biomod2_model") showClass("MAXNET_biomod2_model") showClass("RF_biomod2_model") +showClass("RFd_biomod2_model") showClass("SRE_biomod2_model")
      diff --git a/docs/reference/bm_ModelingOptions.html b/docs/reference/bm_ModelingOptions.html index 91aa5b3e..db5a473f 100644 --- a/docs/reference/bm_ModelingOptions.html +++ b/docs/reference/bm_ModelingOptions.html @@ -17,7 +17,7 @@ biomod2 - 4.2-5-2 + 4.2-6 @@ -115,7 +115,7 @@

      Configure the modeling options for each selected model

      bm_ModelingOptions(
         data.type,
         models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXENT", "MAXNET", "RF",
      -    "SRE", "XGBOOST"),
      +    "RFd", "SRE", "XGBOOST"),
         strategy,
         user.val = NULL,
         user.base = "bigboss",
      @@ -134,7 +134,7 @@ 

      Arguments

      models

      a vector containing model names to be computed, must be among ANN, CTA, FDA, GAM, GBM, GLM, MARS, -MAXENT, MAXNET, RF, SRE, XGBOOST

      +MAXENT, MAXNET, RF, RFd, SRE, XGBOOST

      strategy
      @@ -195,7 +195,9 @@

      Details

      function

    -
    +

    To define the same options for all datasets of a model, you can provide these options as a list in +user.val with the names "for_all_datasets".

    +

    Note

    MAXENT being the only external model (not called through a R package), diff --git a/docs/reference/bm_PlotEvalMean.html b/docs/reference/bm_PlotEvalMean.html index 0d7a6595..51461704 100644 --- a/docs/reference/bm_PlotEvalMean.html +++ b/docs/reference/bm_PlotEvalMean.html @@ -22,7 +22,7 @@ biomod2 - 4.2-5-2 + 4.2-6

    diff --git a/docs/reference/bm_PseudoAbsences.html b/docs/reference/bm_PseudoAbsences.html index 1be07e3a..4d74768f 100644 --- a/docs/reference/bm_PseudoAbsences.html +++ b/docs/reference/bm_PseudoAbsences.html @@ -18,7 +18,7 @@ biomod2 - 4.2-5-2 + 4.2-6 @@ -123,7 +123,9 @@

    Select pseudo-absences

    sre.quant = 0, dist.min = 0, dist.max = NULL, - user.table = NULL + fact.aggr = NULL, + user.table = NULL, + seed.val = NULL ) bm_PseudoAbsences_user.defined(resp.var, expl.var, ...) @@ -137,10 +139,10 @@

    Select pseudo-absences

    bm_PseudoAbsences_random(resp.var, expl.var, ...) # S4 method for ANY,SpatVector -bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep) +bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep, fact.aggr) # S4 method for ANY,SpatRaster -bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep) +bm_PseudoAbsences_random(resp.var, expl.var, nb.absences, nb.rep, fact.aggr) bm_PseudoAbsences_sre(resp.var, expl.var, ...) @@ -159,7 +161,8 @@

    Select pseudo-absences

    dist.min, dist.max, nb.absences, - nb.rep + nb.rep, + fact.aggr ) # S4 method for ANY,SpatRaster @@ -169,7 +172,8 @@

    Select pseudo-absences

    dist.min, dist.max, nb.absences, - nb.rep + nb.rep, + fact.aggr )
    @@ -223,6 +227,11 @@

    Arguments

    used to make the disk pseudo-absence selection (in meters)

    +
    fact.aggr
    +

    (optional, default NULL)
    +If strategy = 'random' or strategy = 'disk', a integer defining the factor of aggregation to reduce the resolution

    + +
    user.table

    (optional, default NULL)
    If strategy = 'user.defined', a matrix or data.frame with as many rows as @@ -231,6 +240,11 @@

    Arguments

    model(s) for each repetition

    +
    seed.val
    +

    (optional, default NULL)
    +An integer value corresponding to the new seed value to be set

    + +
    ...

    (optional, one or several of the above arguments depending on the selected method)

    diff --git a/docs/reference/bm_RunModelsLoop.html b/docs/reference/bm_RunModelsLoop.html index 661a613a..38640d60 100644 --- a/docs/reference/bm_RunModelsLoop.html +++ b/docs/reference/bm_RunModelsLoop.html @@ -18,7 +18,7 @@ biomod2 - 4.2-5-2 + 4.2-6 @@ -175,7 +175,7 @@

    Arguments

    models

    a vector containing model names to be computed, must be among ANN, CTA, FDA, GAM, GBM, GLM, MARS, -MAXENT, MAXNET, RF, SRE, XGBOOST

    +MAXENT, MAXNET, RF, RFd, SRE, XGBOOST

    models.pa
    @@ -227,7 +227,7 @@

    Arguments

    model

    a character corresponding to the model name to be computed, must be either ANN, CTA, FDA, GAM, GBM, GLM, MARS, -MAXENT, MAXNET, RF, SRE, XGBOOST

    +MAXENT, MAXNET, RF, RFd, SRE, XGBOOST

    run.name
    diff --git a/docs/reference/bm_Tuning.html b/docs/reference/bm_Tuning.html index 0955dec7..6ecc520c 100644 --- a/docs/reference/bm_Tuning.html +++ b/docs/reference/bm_Tuning.html @@ -18,7 +18,7 @@ biomod2 - 4.2-5-2 + 4.2-6 @@ -126,18 +126,18 @@

    Tune models parameters

    metric.AIC = "AIC", weights = NULL, ctrl.train = NULL, - params.train = list(ANN.size = c(2, 4, 6, 8), ANN.decay = c(0.001, 0.01, 0.05, 0.1), - ANN.bag = FALSE, FDA.degree = 1:2, FDA.nprune = 2:38, GAM.select = c(TRUE, FALSE), - GAM.method = c("GCV.Cp", "GACV.Cp", "REML", "P-REML", "ML", "P-ML"), GAM.span = - c(0.3, 0.5, 0.7), GAM.degree = 1, GBM.n.trees = c(500, 1000, 2500), - GBM.interaction.depth = seq(2, 8, by = 3), GBM.shrinkage = c(0.001, 0.01, 0.1), - GBM.n.minobsinnode = 10, MARS.degree = 1:2, MARS.nprune = 2:max(38, 2 * - ncol(bm.format@data.env.var) + 1), MAXENT.algorithm = "maxnet", - MAXENT.parallel - = TRUE, RF.mtry = 1:min(10, ncol(bm.format@data.env.var)), SRE.quant = c(0, 0.0125, - 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = 1, XGBOOST.eta = c(0.3, - 0.4), XGBOOST.gamma = 0, XGBOOST.colsample_bytree = c(0.6, 0.8), - XGBOOST.min_child_weight = 1, XGBOOST.subsample = 0.5) + params.train = list(ANN.size = c(2, 4, 6, 8), ANN.decay = c(0.01, 0.05, 0.1), ANN.bag = + FALSE, FDA.degree = 1:2, FDA.nprune = 2:25, GAM.select = c(TRUE, FALSE), GAM.method = + c("GCV.Cp", "GACV.Cp", "REML", "P-REML", "ML", "P-ML"), GAM.span = c(0.3, 0.5, 0.7), + GAM.degree = 1, GBM.n.trees = c(500, 1000, 2500), GBM.interaction.depth = seq(2, 8, + by = 3), GBM.shrinkage = c(0.001, 0.01, 0.1), GBM.n.minobsinnode = 10, MARS.degree = + 1:2, MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1), MAXENT.algorithm + = "maxnet", + MAXENT.parallel = TRUE, RF.mtry = 1:min(10, + ncol(bm.format@data.env.var)), RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)), + SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = + 1, XGBOOST.eta = c(0.3, 0.4), XGBOOST.gamma = 0, XGBOOST.colsample_bytree = c(0.6, + 0.8), XGBOOST.min_child_weight = 1, XGBOOST.subsample = 0.5) ) @@ -146,7 +146,7 @@

    Arguments

    model

    a character corresponding to the algorithm to be tuned, must be either ANN, CTA, FDA, GAM, GBM, GLM, MARS, -MAXENT, MAXNET, RF, SRE, XGBOOST

    +MAXENT, MAXNET, RF, RFd, SRE, XGBOOST

    tuning.fun
    @@ -255,6 +255,9 @@

    Details

    RF

    mtry

    +
    RFd
    +

    mtry

    +
    SRE

    quant

    diff --git a/docs/reference/getters.out.html b/docs/reference/getters.out.html index 93eb0ef0..69428cfb 100644 --- a/docs/reference/getters.out.html +++ b/docs/reference/getters.out.html @@ -19,7 +19,7 @@ biomod2 - 4.2-5-2 + 4.2-6 diff --git a/docs/reference/index.html b/docs/reference/index.html index 0f9a04c8..8829a8ae 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ biomod2 - 4.2-5-2 + 4.2-6 diff --git a/docs/reference/predict2.bm.html b/docs/reference/predict2.bm.html index 575609c8..c24222da 100644 --- a/docs/reference/predict2.bm.html +++ b/docs/reference/predict2.bm.html @@ -188,6 +188,12 @@

    Functions to get predictions from biomod2 # S4 method for RF_biomod2_model,data.frame predict2(object, newdata, ...) +# S4 method for RFd_biomod2_model,SpatRaster +predict2(object, newdata, ...) + +# S4 method for RFd_biomod2_model,data.frame +predict2(object, newdata, ...) + # S4 method for SRE_biomod2_model,SpatRaster predict2(object, newdata, ...) diff --git a/man/BIOMOD.formated.data.PA.Rd b/man/BIOMOD.formated.data.PA.Rd index e79be0eb..d2a1d533 100644 --- a/man/BIOMOD.formated.data.PA.Rd +++ b/man/BIOMOD.formated.data.PA.Rd @@ -23,9 +23,11 @@ PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE + filter.raster = FALSE, + seed.val = NULL ) \S4method{BIOMOD.formated.data.PA}{numeric,SpatRaster}( @@ -43,9 +45,11 @@ PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE + filter.raster = FALSE, + seed.val = NULL ) } \arguments{ @@ -128,6 +132,9 @@ If pseudo-absence selection and \code{PA.strategy = 'sre'}, a \code{numeric} bet and \code{0.5} defining the half-quantile used to make the \code{sre} pseudo-absence selection (see Details)} +\item{PA.fact.aggr}{(\emph{optional, default} \code{NULL}) \cr +If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution} + \item{PA.user.table}{(\emph{optional, default} \code{NULL}) \cr If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} with as many rows as \code{resp.var} values, as many columns as @@ -141,6 +148,9 @@ values for explanatory variables should be removed from the analysis or not} \item{filter.raster}{(\emph{optional, default} \code{FALSE}) \cr If \code{env} is of raster type, a \code{logical} value defining whether \code{sp} is to be filtered when several points occur in the same raster cell} + +\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr +An \code{integer} value corresponding to the new seed value to be set} } \description{ Class returned by \code{\link{BIOMOD_FormatingData}}, and used by diff --git a/man/BIOMOD_FormatingData.Rd b/man/BIOMOD_FormatingData.Rd index f7c6f2cd..a74825d4 100644 --- a/man/BIOMOD_FormatingData.Rd +++ b/man/BIOMOD_FormatingData.Rd @@ -19,9 +19,11 @@ BIOMOD_FormatingData( PA.dist.min = 0, PA.dist.max = NULL, PA.sre.quant = 0.025, + PA.fact.aggr = NULL, PA.user.table = NULL, na.rm = TRUE, - filter.raster = FALSE + filter.raster = FALSE, + seed.val = NULL ) } \arguments{ @@ -105,6 +107,9 @@ If pseudo-absence selection and \code{PA.strategy = 'sre'}, a \code{numeric} bet and \code{0.5} defining the half-quantile used to make the \code{sre} pseudo-absence selection (see Details)} +\item{PA.fact.aggr}{(\emph{optional, default} \code{NULL}) \cr +If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution} + \item{PA.user.table}{(\emph{optional, default} \code{NULL}) \cr If pseudo-absence selection and \code{PA.strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} with as many rows as \code{resp.var} values, as many columns as @@ -118,6 +123,9 @@ explanatory variables should be removed from the analysis or not} \item{filter.raster}{(\emph{optional, default} \code{FALSE}) \cr If \code{expl.var} is of raster type, a \code{logical} value defining whether \code{resp.var} is to be filtered when several points occur in the same raster cell} + +\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr +An \code{integer} value corresponding to the new seed value to be set} } \value{ A \code{\link{BIOMOD.formated.data}} object that can be used to build species distribution diff --git a/man/BIOMOD_Modeling.Rd b/man/BIOMOD_Modeling.Rd index d1e4676b..58a1a0ea 100644 --- a/man/BIOMOD_Modeling.Rd +++ b/man/BIOMOD_Modeling.Rd @@ -48,7 +48,7 @@ object returned by the \code{\link{BIOMOD_FormatingData}} function} \item{models}{a \code{vector} containing model names to be computed, must be among \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}} +\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}} \item{models.pa}{(\emph{optional, default} \code{NULL}) \cr A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets @@ -198,6 +198,7 @@ metrics (see Details). (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/}) \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}}) \item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}}) + \item \code{RFd} : Random Forest downsampled (\code{\link[randomForest]{randomForest}}) \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}}) \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}}) }} diff --git a/man/OptionsBigboss.Rd b/man/OptionsBigboss.Rd index 8549fa4e..0a7b5582 100644 --- a/man/OptionsBigboss.Rd +++ b/man/OptionsBigboss.Rd @@ -11,7 +11,7 @@ A \code{\link{BIOMOD.models.options}} object with some changed values : \item{\code{ANN.binary.nnet.nnet}}{ \itemize{ \item \code{size = 5} - \item \code{decay = 5} + \item \code{decay = 0.1} \item \code{trace = FALSE} \item \code{rang = 0.1} \item \code{maxit = 200} @@ -20,7 +20,7 @@ A \code{\link{BIOMOD.models.options}} object with some changed values : \item{\code{CTA.binary.rpart.rpart}}{ \itemize{ \item \code{method = 'class'} - \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 25)} + \item \code{control = list(xval = 5, minbucket = 5, minsplit = 5, cp = 0.001, maxdepth = 10)} \item \code{cost = NULL} } } @@ -78,7 +78,18 @@ A \code{\link{BIOMOD.models.options}} object with some changed values : \itemize{ \item \code{type = 'classification'} \item \code{ntree = 500} - \item \code{mtry = NULL} + \item \code{mtry = 2} + \item \code{strata = factor(c(0, 1))} + \item \code{sampsize = NULL} + \item \code{nodesize = 5} + \item \code{maxnodes = NULL} + } + } + \item{\code{RFd.binary.randomForest.randomForest}}{ + \itemize{ + \item \code{type = 'classification'} + \item \code{ntree = 500} + \item \code{mtry = 2} \item \code{strata = factor(c(0, 1))} \item \code{sampsize = NULL} \item \code{nodesize = 5} diff --git a/man/biomod2_model.Rd b/man/biomod2_model.Rd index 25add197..e690b50f 100644 --- a/man/biomod2_model.Rd +++ b/man/biomod2_model.Rd @@ -14,6 +14,7 @@ \alias{MAXENT_biomod2_model-class} \alias{MAXNET_biomod2_model-class} \alias{RF_biomod2_model-class} +\alias{RFd_biomod2_model-class} \alias{SRE_biomod2_model-class} \alias{XGBOOST_biomod2_model-class} \alias{show,biomod2_model-method} @@ -43,6 +44,7 @@ Class created by \code{\link{BIOMOD_Modeling}} and \code{\link{bm_RunModel}} \item \code{MAXNET_biomod2_model} : \code{model_class} is \code{MAXNET} \item \code{RF_biomod2_model} : \code{model_class} is \code{RF} + \item \code{RFd_biomod2_model} : \code{model_class} is \code{RFd} \item \code{SRE_biomod2_model} : \code{model_class} is \code{SRE} } } @@ -87,6 +89,7 @@ showClass("MARS_biomod2_model") showClass("MAXENT_biomod2_model") showClass("MAXNET_biomod2_model") showClass("RF_biomod2_model") +showClass("RFd_biomod2_model") showClass("SRE_biomod2_model") } diff --git a/man/bm_ModelingOptions.Rd b/man/bm_ModelingOptions.Rd index 1cfbf8cf..8b113963 100644 --- a/man/bm_ModelingOptions.Rd +++ b/man/bm_ModelingOptions.Rd @@ -7,7 +7,7 @@ bm_ModelingOptions( data.type, models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXENT", "MAXNET", "RF", - "SRE", "XGBOOST"), + "RFd", "SRE", "XGBOOST"), strategy, user.val = NULL, user.base = "bigboss", @@ -21,7 +21,7 @@ bm_ModelingOptions( \item{models}{a \code{vector} containing model names to be computed, must be among \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}} +\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}} \item{strategy}{a \code{character} corresponding to the method to select models' parameters values, must be either \code{default}, \code{bigboss}, \code{user.defined}, \code{tuned}} @@ -66,6 +66,9 @@ argument : \item{tuned}{default parameter values are updated by calling \code{\link{bm_Tuning}} function} } + +To define the same options for all datasets of a model, you can provide these options as a list in +user.val with the names "for_all_datasets". } \note{ \code{MAXENT} being the only external model (not called through a \code{R} package), diff --git a/man/bm_PseudoAbsences.Rd b/man/bm_PseudoAbsences.Rd index 561e1d1f..d9c1b75d 100644 --- a/man/bm_PseudoAbsences.Rd +++ b/man/bm_PseudoAbsences.Rd @@ -25,7 +25,9 @@ bm_PseudoAbsences( sre.quant = 0, dist.min = 0, dist.max = NULL, - user.table = NULL + fact.aggr = NULL, + user.table = NULL, + seed.val = NULL ) bm_PseudoAbsences_user.defined(resp.var, expl.var, ...) @@ -36,9 +38,9 @@ bm_PseudoAbsences_user.defined(resp.var, expl.var, ...) bm_PseudoAbsences_random(resp.var, expl.var, ...) -\S4method{bm_PseudoAbsences_random}{ANY,SpatVector}(resp.var, expl.var, nb.absences, nb.rep) +\S4method{bm_PseudoAbsences_random}{ANY,SpatVector}(resp.var, expl.var, nb.absences, nb.rep, fact.aggr) -\S4method{bm_PseudoAbsences_random}{ANY,SpatRaster}(resp.var, expl.var, nb.absences, nb.rep) +\S4method{bm_PseudoAbsences_random}{ANY,SpatRaster}(resp.var, expl.var, nb.absences, nb.rep, fact.aggr) bm_PseudoAbsences_sre(resp.var, expl.var, ...) @@ -54,7 +56,8 @@ bm_PseudoAbsences_disk(resp.var, expl.var, ...) dist.min, dist.max, nb.absences, - nb.rep + nb.rep, + fact.aggr ) \S4method{bm_PseudoAbsences_disk}{ANY,SpatRaster}( @@ -63,7 +66,8 @@ bm_PseudoAbsences_disk(resp.var, expl.var, ...) dist.min, dist.max, nb.absences, - nb.rep + nb.rep, + fact.aggr ) } \arguments{ @@ -99,12 +103,18 @@ used to make the \code{disk} pseudo-absence selection (in meters)} If \code{strategy = 'disk'}, a \code{numeric} defining the maximal distance to presence points used to make the \code{disk} pseudo-absence selection (in meters)} +\item{fact.aggr}{(\emph{optional, default} \code{NULL}) \cr +If \code{strategy = 'random'} or \code{strategy = 'disk'}, a \code{integer} defining the factor of aggregation to reduce the resolution} + \item{user.table}{(\emph{optional, default} \code{NULL}) \cr If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} with as many rows as \code{resp.var} values, as many columns as \code{nb.rep}, and containing \code{TRUE} or \code{FALSE} values defining which points will be used to build the species distribution model(s) for each repetition} +\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr +An \code{integer} value corresponding to the new seed value to be set} + \item{\ldots}{(\emph{optional, one or several of the above arguments depending on the selected method})} } diff --git a/man/bm_RunModelsLoop.Rd b/man/bm_RunModelsLoop.Rd index de43aa1c..99ca370a 100644 --- a/man/bm_RunModelsLoop.Rd +++ b/man/bm_RunModelsLoop.Rd @@ -55,7 +55,7 @@ obtained with the \code{\link{bm_CrossValidation}} function} \item{models}{a \code{vector} containing model names to be computed, must be among \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}} +\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}} \item{models.pa}{(\emph{optional, default} \code{NULL}) \cr A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets @@ -89,7 +89,7 @@ A \code{logical} value defining whether the progress bar is to be rendered or no \item{model}{a \code{character} corresponding to the model name to be computed, must be either \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}} +\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}} \item{run.name}{a \code{character} corresponding to the model to be run (sp.name + pa.id + run.id)} diff --git a/man/bm_Tuning.Rd b/man/bm_Tuning.Rd index c9a4d5dc..eb6a7209 100644 --- a/man/bm_Tuning.Rd +++ b/man/bm_Tuning.Rd @@ -16,24 +16,24 @@ bm_Tuning( metric.AIC = "AIC", weights = NULL, ctrl.train = NULL, - params.train = list(ANN.size = c(2, 4, 6, 8), ANN.decay = c(0.001, 0.01, 0.05, 0.1), - ANN.bag = FALSE, FDA.degree = 1:2, FDA.nprune = 2:38, GAM.select = c(TRUE, FALSE), - GAM.method = c("GCV.Cp", "GACV.Cp", "REML", "P-REML", "ML", "P-ML"), GAM.span = - c(0.3, 0.5, 0.7), GAM.degree = 1, GBM.n.trees = c(500, 1000, 2500), - GBM.interaction.depth = seq(2, 8, by = 3), GBM.shrinkage = c(0.001, 0.01, 0.1), - GBM.n.minobsinnode = 10, MARS.degree = 1:2, MARS.nprune = 2:max(38, 2 * - ncol(bm.format@data.env.var) + 1), MAXENT.algorithm = "maxnet", - MAXENT.parallel - = TRUE, RF.mtry = 1:min(10, ncol(bm.format@data.env.var)), SRE.quant = c(0, 0.0125, - 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = 1, XGBOOST.eta = c(0.3, - 0.4), XGBOOST.gamma = 0, XGBOOST.colsample_bytree = c(0.6, 0.8), - XGBOOST.min_child_weight = 1, XGBOOST.subsample = 0.5) + params.train = list(ANN.size = c(2, 4, 6, 8), ANN.decay = c(0.01, 0.05, 0.1), ANN.bag = + FALSE, FDA.degree = 1:2, FDA.nprune = 2:25, GAM.select = c(TRUE, FALSE), GAM.method = + c("GCV.Cp", "GACV.Cp", "REML", "P-REML", "ML", "P-ML"), GAM.span = c(0.3, 0.5, 0.7), + GAM.degree = 1, GBM.n.trees = c(500, 1000, 2500), GBM.interaction.depth = seq(2, 8, + by = 3), GBM.shrinkage = c(0.001, 0.01, 0.1), GBM.n.minobsinnode = 10, MARS.degree = + 1:2, MARS.nprune = 2:max(21, 2 * ncol(bm.format@data.env.var) + 1), MAXENT.algorithm + = "maxnet", + MAXENT.parallel = TRUE, RF.mtry = 1:min(10, + ncol(bm.format@data.env.var)), RFd.mtry = 1:min(10, ncol(bm.format@data.env.var)), + SRE.quant = c(0, 0.0125, 0.025, 0.05, 0.1), XGBOOST.nrounds = 50, XGBOOST.max_depth = + 1, XGBOOST.eta = c(0.3, 0.4), XGBOOST.gamma = 0, XGBOOST.colsample_bytree = c(0.6, + 0.8), XGBOOST.min_child_weight = 1, XGBOOST.subsample = 0.5) ) } \arguments{ \item{model}{a \code{character} corresponding to the algorithm to be tuned, must be either \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, -\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}} +\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}} \item{tuning.fun}{a \code{character} corresponding to the model function name to be called through \code{\link[caret]{train}} function for tuning parameters (see \code{\link{ModelsTable}} @@ -110,6 +110,7 @@ Currently, the available parameters to be tuned are the following : \item{MARS}{\code{degree}, \code{nprune}} \item{MAXENT}{\code{algorithm}, \code{parallel}} \item{RF}{\code{mtry}} + \item{RFd}{\code{mtry}} \item{SRE}{\code{quant}} \item{XGBOOST}{\code{nrounds}, \code{max_depth}, \code{eta}, \code{gamma}, \code{colsampl_bytree}, \code{min_child_weight}, \code{subsample}} diff --git a/man/predict2.bm.Rd b/man/predict2.bm.Rd index fe235ca1..b1912b35 100644 --- a/man/predict2.bm.Rd +++ b/man/predict2.bm.Rd @@ -24,6 +24,8 @@ \alias{predict2.MAXNET_biomod2_model.data.frame} \alias{predict2.RF_biomod2_model.SpatRaster} \alias{predict2.RF_biomod2_model.data.frame} +\alias{predict2.RFd_biomod2_model.SpatRaster} +\alias{predict2.RFd_biomod2_model.data.frame} \alias{predict2.SRE_biomod2_model.SpatRaster} \alias{predict2.SRE_biomod2_model.data.frame} \alias{predict2,biomod2_model,SpatRaster-method} @@ -48,6 +50,8 @@ \alias{predict2,MAXNET_biomod2_model,data.frame-method} \alias{predict2,RF_biomod2_model,SpatRaster-method} \alias{predict2,RF_biomod2_model,data.frame-method} +\alias{predict2,RFd_biomod2_model,SpatRaster-method} +\alias{predict2,RFd_biomod2_model,data.frame-method} \alias{predict2,SRE_biomod2_model,SpatRaster-method} \alias{predict2,SRE_biomod2_model,data.frame-method} \alias{predict2,XGBOOST_biomod2_model,SpatRaster-method} @@ -100,6 +104,10 @@ predict2(object, newdata, ...) \S4method{predict2}{RF_biomod2_model,data.frame}(object, newdata, ...) +\S4method{predict2}{RFd_biomod2_model,SpatRaster}(object, newdata, ...) + +\S4method{predict2}{RFd_biomod2_model,data.frame}(object, newdata, ...) + \S4method{predict2}{SRE_biomod2_model,SpatRaster}(object, newdata, ...) \S4method{predict2}{SRE_biomod2_model,data.frame}(object, newdata, ...) diff --git a/vignettes/news.Rmd b/vignettes/news.Rmd index edaaba18..b0ae8dd1 100644 --- a/vignettes/news.Rmd +++ b/vignettes/news.Rmd @@ -13,6 +13,18 @@ vignette: > ## Development updates +### 2024 + +#### 4.2-6 Devel actuel + +- Warning ! Some options for `OptionsBigboss` have been modified (concern only ANN, CTA and RF models) +- Some changes for the tuning ranges. +- Add *RFd*: Random Forest with a down-sampling method. +- Add _seed.val_ for `bm_pseudoAbsences` and `BIOMOD_FormatingData`. +- Add _fact.aggr_ argument for pseudo-absences selection with the random and disk methods. It allows to reduce the resolution of the environment. +- Possibility to give the same options for all datasets with _"for_all_datasets"_ in `bm_ModelingOptions`. + + ### 2024 ##### Bugfix