From 039930f76586fed24b820afac0f6095e0f929820 Mon Sep 17 00:00:00 2001 From: MayaGueguen Date: Wed, 24 Apr 2024 11:22:24 +0200 Subject: [PATCH] Minor corrections (doc + check ok) --- NAMESPACE | 1 + R/biomod2_globalVariables.R | 6 +- R/bm_Tuning.R | 124 ++++++++++++++++-------------------- 3 files changed, 60 insertions(+), 71 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d931e4ab..81ae0d62 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -192,6 +192,7 @@ importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,complete.cases) importFrom(stats,cor) +importFrom(stats,formula) importFrom(stats,glm) importFrom(stats,median) importFrom(stats,na.exclude) diff --git a/R/biomod2_globalVariables.R b/R/biomod2_globalVariables.R index cac11819..79b268c5 100644 --- a/R/biomod2_globalVariables.R +++ b/R/biomod2_globalVariables.R @@ -31,7 +31,8 @@ utils::globalVariables(names = c("i")) ## bm_Tuning ------------ -utils::globalVariables(names = c("calib.i", "rep", "quant", "i", "typ", "intlev", "fi")) +utils::globalVariables(names = c("calib.i", "rep", "quant" + , "i", "typ", "intlev", "fi")) ## BIOMOD_Projection ------------ utils::globalVariables(names = c("do.stack", @@ -76,7 +77,8 @@ utils::globalVariables(names = c("thiscol", "pred", "proj")) utils::globalVariables(names = c("i.abs")) ## bm_Tuning ------------ -utils::globalVariables(names = c("dataset.i", "PA.i", "tuned.mod", "train.params", "tuning.grid", "criteria.AIC")) +utils::globalVariables(names = c("dataset.i", "PA.i", "tuned.mod", "train.params" + , "tuning.grid", "criteria.AIC")) ## bm_ModelingOptions ------------ utils::globalVariables(names = c("ModelsTable")) diff --git a/R/bm_Tuning.R b/R/bm_Tuning.R index e1fbd7ef..5737174f 100644 --- a/R/bm_Tuning.R +++ b/R/bm_Tuning.R @@ -1,6 +1,6 @@ ## --------------------------------------------------------------------------- ## ##' @name bm_Tuning -##' @author Frank Breiner, Maya Gueguen +##' @author Frank Breiner, Maya Gueguen, Helene Blancheteau ##' ##' @title Tune models parameters ##' @@ -69,7 +69,7 @@ ##' \describe{ ##' \item{ANN}{\code{size}, \code{decay}, \code{bag}} ##' \item{FDA}{\code{degree}, \code{nprune}} -##' \item{GAM}{\code{select}, \code{method}} +##' \item{GAM.mgcv}{\code{select}, \code{method}} ##' \item{GBM}{\code{n.trees}, \code{interaction.depth}, \code{shrinkage}, \code{n.minobsinnode}} ##' \item{MARS}{\code{degree}, \code{nprune}} ##' \item{MAXENT}{\code{algorithm}, \code{parallel}} @@ -85,7 +85,7 @@ ##' ##' @note ##' \itemize{ -##' \item No tuning for \code{GLM} and \code{MAXNET} +##' \item No tuning for \code{GAM.gam}, \code{GLM} and \code{MAXNET} ##' \item \code{MAXENT} is tuned through \code{\link[ENMeval]{ENMevaluate}} function which is ##' calling either : ##' \itemize{ @@ -95,8 +95,9 @@ ##' } ##' \item \code{SRE} is tuned through \code{\link{bm_SRE}} function ##' \item All other models are tuned through \code{\link[caret]{train}} function -##' \item No optimization of formula for \code{MAXENT}, \code{MAXNET} and \code{SRE} -##' \item Selection variables only for \code{GAM} and \code{GLM} +##' \item No optimization of formula for \code{MAXENT}, \code{MAXNET}, \code{SRE} and +##' \code{XGBOOST} +##' \item Variables selection only for \code{GAM.gam} and \code{GLM} ##' } ##' ##' @@ -177,7 +178,7 @@ ##' ##' ##' @importFrom foreach foreach %do% %:% -##' @importFrom stats aggregate +##' @importFrom stats aggregate formula ##' @importFrom PresenceAbsence optimal.thresholds presence.absence.accuracy ##' ##' @@ -279,12 +280,12 @@ bm_Tuning <- function(model, returnData = FALSE) } - + argsval <- foreach(PA.i = combi$PA, calib.i = combi$calib, dataset.i = combi$name_dataset) %do% { cat(paste0("\n\t\t> Dataset ", dataset.i)) argstmp <- bm.options@args.default - + if (model == "MAXNET") { warning("No tuning available for that model. Sorry.") } else { @@ -300,13 +301,13 @@ bm_Tuning <- function(model, ## SRE case myResp <- mySpExpl[, 1] - myExpl <- mySpExpl[, 4:(3+ncol(bm.format@data.env.var))] + myExpl <- mySpExpl[, 4:(3 + ncol(bm.format@data.env.var))] ## MAXENT case if (params.train$MAXENT.algorithm == "maxnet") { mySpExpl[["_allData_allRun"]] <- NULL mySpExpl[, 1] <- ifelse(mySpExpl[, 1] == 1 & !is.na(mySpExpl[, 1]), 1, 0) - mySpExpl <- mySpExpl[, 1:(3+ncol(bm.format@data.env.var))] + mySpExpl <- mySpExpl[, 1:(3 + ncol(bm.format@data.env.var))] } if (model == "MAXENT") { # ------------------------------------------# @@ -350,13 +351,15 @@ bm_Tuning <- function(model, new.env = myExpl[fold == i, ], quant = quant, do.extrem = FALSE)) - RES <- presence.absence.accuracy(DATA, threshold = as.vector(optimal.thresholds(DATA, opt.methods = 3)[2], mode = "numeric")) + thresh <- as.vector(optimal.thresholds(DATA, opt.methods = 3)[2], mode = "numeric") + RES <- presence.absence.accuracy(DATA, threshold = thresh) return(data.frame(RES, quant = quant)) } return(data.frame(RES, rep = rep)) } tune.SRE$TSS <- tune.SRE$sensitivity + tune.SRE$specificity - 1 - tmp <- aggregate(tune.SRE[, c("sensitivity", "specificity", "Kappa", "AUC", "TSS")], by = list(quant = tune.SRE$quant), mean) + tmp <- aggregate(tune.SRE[, c("sensitivity", "specificity", "Kappa", "AUC", "TSS")] + , by = list(quant = tune.SRE$quant), mean) argstmp$quant <- tmp[which.max(tmp[, metric.eval]), "quant"] } @@ -370,12 +373,13 @@ bm_Tuning <- function(model, mySpExpl[, PA.i] == TRUE)] mySpExpl <- mySpExpl[which(calib.lines[, calib.i] == TRUE), ] mySpExpl <- mySpExpl[which(mySpExpl[, PA.i] == TRUE), ] - mySpExpl[, 1] <- as.factor(ifelse(mySpExpl[, 1] == 1 & !is.na(mySpExpl[, 1]), "Presence", "Absence")) + mySpExpl[, 1] <- as.factor(ifelse(mySpExpl[, 1] == 1 & !is.na(mySpExpl[, 1]) + , "Presence", "Absence")) myResp <- mySpExpl[, 1] - myExpl <- mySpExpl[, 4:(3+ncol(bm.format@data.env.var))] - + myExpl <- mySpExpl[, 4:(3 + ncol(bm.format@data.env.var))] + ## run tuning ------------------------------------------------------- - + cmd.tuning <- "caret::train(x = myExpl, y = myResp, method = tuning.fun, tuneGrid = tuning.grid," cmd.tuning <- paste0(cmd.tuning, " trControl = ctrl.train, metric = 'ROC',") if (tuning.fun %in% c("fda", "rpart")) { ## add weights @@ -401,12 +405,12 @@ bm_Tuning <- function(model, if (!is.null(tuned.mod)) { tmp <- tuned.mod$results tmp$TSS <- tmp$Sens + tmp$Spec - 1 + if (model == "XGBOOST") { for (param in train.params$params) { if (is.null(argstmp[[param]])){ argstmp$params[[param]] <- tmp[which.max(tmp[, metric.eval]), param] - } - else { + } else { argstmp[[param]] <- tmp[which.max(tmp[, metric.eval]), param]} } } else { @@ -414,8 +418,12 @@ bm_Tuning <- function(model, argstmp[[param]] <- tmp[which.max(tmp[, metric.eval]), param] } } + tuning.form <- tuning.grid[which.max(tmp[, metric.eval]), ] - if (model == "RF"){tuning.form <- data.frame(mtry = tuning.grid[which.max(tmp[, metric.eval]), ])} + + if (model == "RF") { + tuning.form <- data.frame(mtry = tuning.grid[which.max(tmp[, metric.eval]), ]) + } if (model == "CTA") { tuning.fun = "rpart2" @@ -435,51 +443,22 @@ bm_Tuning <- function(model, cat("\n\t\t\t> Tuning formula...") cmd.form <- sub("tuneGrid = tuning.grid", "tuneGrid = tuning.form", cmd.tuning) - cmd.form <- sub("weights = current.weights,","",cmd.form) + cmd.form <- sub("weights = current.weights,", "", cmd.form) cmd.init <- "form = bm_MakeFormula(resp.name = 'resp', expl.var = myExpl, type = typ, interaction.level = intlev)," cmd.init <- paste0(cmd.init, " data = cbind(myExpl, resp = myResp),") cmd.form <- sub("x = myExpl, y = myResp,", cmd.init, cmd.form) - max.intlev <- min(ncol(myExpl) - 1,3) + max.intlev <- min(ncol(myExpl) - 1, 3) + typ.vec = c('simple', 'quadratic', 'polynomial', 's_smoother') - if (model %in% c("CTA")){ - TMP <- foreach (typ = c('simple', 'quadratic', 'polynomial', 's_smoother'), .combine = "rbind") %do% - { - tuned.form <- NULL - intlev <- 0 - eval(parse(text = paste0("capture.output(" - , "try(tuned.form <- ", sub(")$", ", silent = TRUE)", cmd.form) - , ")"))) - if (!is.null(tuned.form)) { - tmp <- tuned.form$results - tmp$TSS <- tmp$Sens + tmp$Spec - 1 - formu <- tuned.form$coefnames - formu <- paste0(bm.format@sp.name, " ~ 1 + ", paste0(formu, collapse = " + ")) - return(data.frame(tmp, type = typ, interaction.level = intlev, formula = formu)) - } - } + if (model %in% c("CTA", "FDA")) { + if (model == "CTA") { typ.vec = c('simple', 'quadratic', 'polynomial', 's_smoother') } + if (model == "FDA") { typ.vec = c('simple', 's_smoother') } - } else if (model == "FDA"){ - TMP <- foreach (typ = c('simple','s_smoother'), .combine = "rbind") %do% - { - tuned.form <- NULL - intlev <- 0 - eval(parse(text = paste0("capture.output(" - , "try(tuned.form <- ", sub(")$", ", silent = TRUE)", cmd.form) - , ")"))) - if (!is.null(tuned.form)) { - tmp <- tuned.form$results - tmp$TSS <- tmp$Sens + tmp$Spec - 1 - formu <- tuned.form$coefnames - formu <- paste0(bm.format@sp.name, " ~ 1 + ", paste0(formu, collapse = " + ")) - return(data.frame(tmp, type = typ, interaction.level = intlev, formula = formu)) - } - } - } else if (model == "RF"){ - TMP <- foreach (typ = c('simple','quadratic', 'polynomial'), .combine = "rbind") %:% - foreach (intlev = 0:max.intlev, .combine = "rbind") %do% + TMP <- foreach (typ = typ.vec, .combine = "rbind") %do% { tuned.form <- NULL + intlev <- 0 eval(parse(text = paste0("capture.output(" , "try(tuned.form <- ", sub(")$", ", silent = TRUE)", cmd.form) , ")"))) @@ -492,7 +471,9 @@ bm_Tuning <- function(model, } } } else { - TMP <- foreach (typ = c('simple', 'quadratic', 'polynomial', 's_smoother'), .combine = "rbind") %:% + if (model == "RF") { typ.vec = c('simple','quadratic', 'polynomial') } + + TMP <- foreach (typ = typ.vec, .combine = "rbind") %:% foreach (intlev = 0:max.intlev, .combine = "rbind") %do% { tuned.form <- NULL @@ -509,9 +490,11 @@ bm_Tuning <- function(model, } } argstmp$formula <- TMP[which.max(TMP[, metric.eval]), "formula"] - if (model %in% c("GBM","GAM","MARS","RF","ANN")){argstmp$formula <- formula(argstmp$formula)} + if (model %in% c("ANN", "GAM", "GBM", "MARS", "RF")) { + argstmp$formula <- formula(argstmp$formula) + } } else { - if (model %in% c("CTA","GAM","FDA","GBM","GLM")){ + if (model %in% c("CTA", "FDA", "GAM", "GBM", "GLM")) { argstmp$formula <- bm_MakeFormula(resp.name = bm.format@sp.name , expl.var = myExpl , type = 'simple' @@ -520,8 +503,9 @@ bm_Tuning <- function(model, } ## run variable selection ----------------------------------------------------------------- - if (do.stepAIC && (model == "GLM" || - (model == "GAM" && bm.options@package == "gam")) ) { + if (do.stepAIC && + (model == "GLM" || + (model == "GAM" && bm.options@package == "gam")) ) { cat("\n\t\t\t> Tuning variables (AIC)...") if (model == "GLM") { @@ -530,15 +514,17 @@ bm_Tuning <- function(model, family = argstmp$family, control = argstmp$control, weights = current.weights, - mustart = rep(ifelse(!is.null(argstmp$mustart) & nchar(argstmp$mustart) > 0, argstmp$mustart, 0.5), length(myResp)), + mustart = rep(ifelse(!is.null(argstmp$mustart) & nchar(argstmp$mustart) > 0 + , argstmp$mustart, 0.5), length(myResp)), model = TRUE) try(tuned.AIC <- MASS::stepAIC(glmStart, - scope = list(upper = (sub(".*~", "~", argstmp$formula)), lower = ~1), - k = criteria.AIC, - direction = "both", - trace = FALSE, - steps = 10000)) + scope = list(upper = (sub(".*~", "~", argstmp$formula)), lower = ~1), + k = criteria.AIC, + direction = "both", + trace = FALSE, + steps = 10000)) if (!is.null(tuned.AIC)) { argstmp$formula <- deparse(tuned.AIC$formula) } + } else if (model == "GAM") { # if (bm.options@GAM$algo == 'GAM_gam') { ## gam package gamStart <- do.call(gam::gam, list(formula = as.formula(paste0(bm.format@sp.name, " ~ 1")), data = mySpExpl, @@ -638,7 +624,7 @@ bm_Tuning <- function(model, .fun_testIfIn(TRUE, "metric.eval", metric.eval, c("ROC", "TSS")) } ## check weights ------------------------------------------------------------ - if (model %in% c("CTA", "FDA", "GLM", "GAM") && is.null(weights)) { + if (model %in% c("CTA", "FDA", "GAM", "GLM") && is.null(weights)) { weights = rep(1, length(bm.format@data.species)) } @@ -670,7 +656,7 @@ bm_Tuning <- function(model, if (model == "RF") tuning.length <- min(30, ncol(bm.format@data.env.var)) ## Do formula --------------------------------------------------------------- - if (model %in% c("MAXENT","MAXNET","SRE","XGBOOST")& do.formula == TRUE){ + if (model %in% c("MAXENT", "MAXNET", "SRE", "XGBOOST") && do.formula == TRUE) { do.formula <- FALSE cat("\n No optimization of formula for", model) }