Update documentation

biomodhub · Dec 19, 2023 · df55bf2 · df55bf2
1 parent aebb085
commit df55bf2
Show file tree

Hide file tree

Showing 34 changed files with 445 additions and 311 deletions.
diff --git a/R/BIOMOD_EnsembleForecasting.R b/R/BIOMOD_EnsembleForecasting.R
@@ -66,7 +66,7 @@
 ##' 
 ##' @return
 ##' 
-##' A \code{BIOMOD.projection.out} object containing models projections, or links to saved 
+##' A \code{\link{BIOMOD.projection.out}} object containing models projections, or links to saved 
 ##' outputs. \cr Models projections are stored out of \R (for memory storage reasons) in 
 ##' \code{proj.name} folder created in the current working directory :
 ##' \enumerate{

diff --git a/R/BIOMOD_EnsembleModeling.R b/R/BIOMOD_EnsembleModeling.R
@@ -94,7 +94,7 @@
 ##'   \code{EMwmean.decay}
 ##' @return
 ##' 
-##' A \code{BIOMOD.ensemble.models.out} object containing models outputs, or links to saved 
+##' A \code{\link{BIOMOD.ensemble.models.out}} object containing models outputs, or links to saved 
 ##' outputs. \cr Models outputs are stored out of \R (for memory storage reasons) in 2 different 
 ##' folders created in the current working directory :
 ##' \enumerate{
@@ -103,8 +103,9 @@
 ##'   \item a \emph{hidden} folder, named \code{.BIOMOD_DATA}, and containing outputs related 
 ##'   files (original dataset, calibration lines, pseudo-absences selected, predictions, 
 ##'   variables importance, evaluation values...), that can be retrieved with 
-##'   \href{https://biomodhub.github.io/biomod2/reference/getters.out.html}{\code{get_[...]}} or \code{\link{load}} functions, and used by other 
-##'   \pkg{biomod2} functions, like \code{\link{BIOMOD_EnsembleForecasting}}
+##'   \href{https://biomodhub.github.io/biomod2/reference/getters.out.html}{\code{get_[...]}} 
+##'   or \code{\link{load}} functions, and used by other \pkg{biomod2} functions, like 
+##'   \code{\link{BIOMOD_EnsembleForecasting}}
 ##' }
 ##' 
 ##' 

diff --git a/R/BIOMOD_FormatingData.R b/R/BIOMOD_FormatingData.R
@@ -97,9 +97,11 @@
 ##' 
 ##' @return 
 ##' 
-##' A \code{BIOMOD.formated.data} object that can be used to build species distribution model(s) 
-##' with the \code{\link{BIOMOD_Modeling}} function. \cr
-##' \code{print} and \code{plot} functions are available to have a summary of the created object.
+##' A \code{\link{BIOMOD.formated.data}} object that can be used to build species distribution 
+##' model(s) with the \code{\link{BIOMOD_Modeling}} function. \cr
+##' \code{\href{BIOMOD.formated.data.html}{print/show}}, \code{\href{plot.html}{plot}} and 
+##' \code{\href{summary.html}{summary}} functions are available to have a summary of the 
+##' created object. 
 ##' 
 ##' 
 ##' @details  
@@ -137,10 +139,10 @@
 ##'   are recommended (to estimate the effect of pseudo-absence selection), as well as high 
 ##'   number of pseudo-absence points. \cr
 ##'   \emph{Be sure not to select more pseudo-absence points than maximum number of pixels in 
-##'   the studied area ! \cr \cr \cr \cr}
+##'   the studied area !}
 ##'   }
 ##'   \item{it is possible now to create several pseudo-absence repetitions with different 
-##'   number of points, but with the same sampling strategy}
+##'   number of points, BUT with the same sampling strategy. \cr \cr \cr \cr}
 ##' }
 ##' 
 ##' \describe{
@@ -173,7 +175,7 @@
 ##'   function}), it is also possible (and strongly advised) to directly provide two independent 
 ##'   datasets, one for calibration/validation and one for evaluation
 ##'   }
-##'   \item{Pseudo-absence selection}{
+##'   \item{Pseudo-absence selection (see \code{\link{bm_PseudoAbsences}})}{
 ##'   If no true absences are available, pseudo-absences must be selected from the 
 ##'   \emph{background data}, meaning data there is no information whether the species of 
 ##'   interest occurs or not. It corresponds either to the remaining pixels of the \code{expl.var} 
@@ -188,7 +190,7 @@
 ##'     \item{sre}{pseudo-absences have to be selected in conditions (combination of explanatory 
 ##'     variables) that differ in a defined proportion (\code{PA.sre.quant}) from those of 
 ##'     presence points. A \emph{Surface Range Envelop} model is first run over the species of 
-##'     interest, and pseudo-absences are selected outside this envelop. \cr
+##'     interest (see \code{\link{bm_SRE}}), and pseudo-absences are selected outside this envelop. \cr
 ##'     \emph{This case is appropriate when all the species climatic niche has been sampled, 
 ##'     otherwise it may lead to over-optimistic model evaluations and predictions !}
 ##'     }

diff --git a/R/BIOMOD_Modeling.R b/R/BIOMOD_Modeling.R
@@ -99,7 +99,7 @@
 ##' 
 ##' @return
 ##' 
-##' A \code{BIOMOD.models.out} object containing models outputs, or links to saved outputs. \cr
+##' A \code{\link{BIOMOD.models.out}} object containing models outputs, or links to saved outputs. \cr
 ##' Models outputs are stored out of \R (for memory storage reasons) in 2 different folders 
 ##' created in the current working directory :
 ##' \enumerate{
@@ -109,16 +109,16 @@
 ##'   \item a \emph{hidden} folder, named \code{.BIOMOD_DATA}, and containing outputs related 
 ##'   files (original dataset, calibration lines, pseudo-absences selected, predictions, 
 ##'   variables importance, evaluation values...), that can be retrieved with 
-##'   \href{https://biomodhub.github.io/biomod2/reference/getters.out.html}{\code{get_[...]}} or \code{\link{load}} functions, and used by other 
-##'   \pkg{biomod2} functions, like \code{\link{BIOMOD_Projection}} or 
-##'   \code{\link{BIOMOD_EnsembleModeling}}
+##'   \code{\href{https://biomodhub.github.io/biomod2/reference/getters.out.html}{get_[...]}} 
+##'   or \code{\link{load}} functions, and used by other \pkg{biomod2} functions, like 
+##'   \code{\link{BIOMOD_Projection}} or \code{\link{BIOMOD_EnsembleModeling}}
 ##' }
 ##' 
 ##' 
 ##' @details 
 ##' 
 ##' \describe{
-##'   \item{bm.format}{If you have decided to add pseudo absences to your original dataset (see 
+##'   \item{bm.format}{If pseudo absences have been added to the original dataset (see 
 ##'   \code{\link{BIOMOD_FormatingData}}), \cr \code{PA.nb.rep *(nb.rep + 1)} models will be 
 ##'   created.}
 ##'   
@@ -139,7 +139,7 @@
 ##'     (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/})
 ##'     \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}})
 ##'     \item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}})
-##'     \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM
+##'     \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}})
 ##'     \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}})
 ##'   }}
 ##'   
@@ -153,8 +153,9 @@
 ##'   single models (see \code{\link{bm_CrossValidation}}).}
 ##'   
 ##'   \item{OPT.[...] parameters}{Different methods are available to parameterize the 
-##'   single models (see \code{\link{BIOMOD.options.dataset}}). Note that only \code{binary} data 
-##'   type is allowed currently.
+##'   single models (see \code{\link{bm_ModelingOptions}} and 
+##'   \code{\link{BIOMOD.options.dataset}}). Note that only \code{binary} data type is 
+##'   allowed currently.
 ##'   \itemize{
 ##'     \item \code{default} : only default parameter values of default parameters of the single 
 ##'     models functions are retrieved. Nothing is changed so it might not give good results.
@@ -186,20 +187,36 @@
 ##'   }}
 ##' 
 ##'   \item{metric.eval}{
-##'   \itemize{
-##'     \item \code{ROC} : Relative operating characteristic
-##'     \item \code{KAPPA} : Cohen's Kappa (Heidke skill score)
-##'     \item \code{TSS} : True skill statistic (Hanssen and Kuipers discriminant, Peirce's skill 
-##'     score)
-##'     \item \code{FAR} : False alarm ratio
-##'     \item \code{SR} : Success ratio
-##'     \item \code{ACCURANCY} : Accuracy (fraction correct)
-##'     \item \code{BIAS} : Bias score (frequency bias)
-##'     \item \code{POD} : Probability of detection (hit rate)
-##'     \item \code{CSI} : Critical success index (threat score)
-##'     \item \code{ETS} : Equitable threat score (Gilbert skill score)
-##'     \item \code{BOYCE} : Boyce index
-##'     \item \code{MPA} : Minimal predicted area (cutoff optimising MPA to predict 90% of presences)
+##'   \describe{
+##'     \item{simple}{
+##'     \itemize{
+##'       \item \code{POD} : Probability of detection (hit rate)
+##'       \item \code{FAR} : False alarm ratio
+##'       \item \code{POFD} : Probability of false detection (fall-out)
+##'       \item \code{SR} : Success ratio
+##'       \item \code{ACCURACY} : Accuracy (fraction correct)
+##'       \item \code{BIAS} : Bias score (frequency bias)
+##'     }
+##'     }
+##'     \item{complex}{
+##'     \itemize{
+##'       \item \code{ROC} : Relative operating characteristic
+##'       \item \code{TSS} : True skill statistic (Hanssen and Kuipers discriminant, Peirce's 
+##'       skill score)
+##'       \item \code{KAPPA} : Cohen's Kappa (Heidke skill score)
+##'       \item \code{OR} : Odds Ratio
+##'       \item \code{ORSS} : Odds ratio skill score (Yule's Q)
+##'       \item \code{CSI} : Critical success index (threat score)
+##'       \item \code{ETS} : Equitable threat score (Gilbert skill score)
+##'     }
+##'     }
+##'     \item{presence-only}{
+##'     \itemize{
+##'       \item \code{BOYCE} : Boyce index
+##'       \item \code{MPA} : Minimal predicted area (cutoff optimising MPA to predict 90\% of 
+##'       presences)
+##'     }
+##'     }
 ##'   }
 ##'   Optimal value of each method can be obtained with the \code{\link{get_optim_value}} 
 ##'   function. Several evaluation metrics can be selected. \emph{Please refer to the 
@@ -213,6 +230,10 @@
 ##'   \code{eval.[...]} parameters in \code{\link{BIOMOD_FormatingData}}.}
 ##'   }
 ##'   
+##'   \item{var.import}{A value caracterizing how much each variable has an impact on each model 
+##'   predictions can be calculated by randomizing the variable of interest and computing the 
+##'   correlation between original and shuffled variables (see \code{\link{bm_VariablesImportance}}).}
+##'   
 ##'   \item{scale.models}{\bold{This parameter is quite experimental and it is recommended 
 ##'   not to use it. It may lead to reduction in projection scale amplitude.} Some categorical 
 ##'   models always have to be scaled (\code{FDA}, \code{ANN}), but it may be interesting to 

diff --git a/R/BIOMOD_Projection.R b/R/BIOMOD_Projection.R
@@ -63,7 +63,7 @@
 ##' 
 ##' @return
 ##' 
-##' A \code{BIOMOD.projection.out} object containing models projections, or links to saved 
+##' A \code{\link{BIOMOD.projection.out}} object containing models projections, or links to saved 
 ##' outputs. \cr Models projections are stored out of \R (for memory storage reasons) in 
 ##' \code{proj.name} folder created in the current working directory :
 ##' \enumerate{

diff --git a/R/bm_CrossValidation.R b/R/bm_CrossValidation.R
@@ -8,14 +8,13 @@
 ##' according to 6 different methods : \code{random}, \code{kfold}, \code{block}, \code{strat}, 
 ##' \code{env} or \code{user.defined} (see Details).
 ##' 
-##' @param bm.format a \code{\link{BIOMOD.formated.data-class}} or 
-##' \code{\link{BIOMOD.formated.data.PA-class}} object returned by the 
-##' \code{\link{BIOMOD_FormatingData}} function
+##' @param bm.format a \code{\link{BIOMOD.formated.data}} or \code{\link{BIOMOD.formated.data.PA}} 
+##' object returned by the \code{\link{BIOMOD_FormatingData}} function
 ##' @param strategy a \code{character} corresponding to the cross-validation selection strategy, 
 ##' must be among \code{random}, \code{kfold}, \code{block}, \code{strat}, \code{env} or 
 ##' \code{user.defined}
-##' @param \ldots (\emph{optional, one or several of the following arguments depending on the selected 
-##' method}) 
+##' @param \ldots (\emph{optional, one or several of the following arguments depending on the 
+##' selected method}) 
 ##' 
 ##' @param nb.rep (\emph{optional, default} \code{0}) \cr
 ##' If \code{strategy = 'random'} or \code{strategy = 'kfold'}, an \code{integer} corresponding 

diff --git a/R/bm_FindOptimStat.R b/R/bm_FindOptimStat.R
@@ -13,9 +13,9 @@
 ##'
 ##'
 ##' @param metric.eval a \code{character} corresponding to the evaluation metric to be used, must 
-##' be either \code{ROC}, \code{TSS}, \code{KAPPA}, \code{ACCURACY}, \code{BIAS}, \code{POD}, 
-##' \code{FAR}, \code{POFD}, \code{SR}, \code{CSI}, \code{ETS}, \code{HK}, \code{HSS}, \code{OR}, 
-##' \code{ORSS}, \code{BOYCE}, \code{MPA}
+##' be either \code{POD}, \code{FAR}, \code{POFD}, \code{SR}, \code{ACCURACY}, \code{BIAS}, 
+##' \code{ROC}, \code{TSS}, \code{KAPPA}, \code{OR}, \code{ORSS}, \code{CSI}, \code{ETS}, 
+##' \code{BOYCE}, \code{MPA}
 ##' @param obs a \code{vector} of observed values (binary, \code{0} or \code{1})
 ##' @param fit a \code{vector} of fitted values (continuous)
 ##' @param nb.thresh an \code{integer} corresponding to the number of thresholds to be 
@@ -51,8 +51,41 @@
 ##'
 ##' @details
 ##' 
-##' \emph{Please refer to \code{\link{BIOMOD_Modeling}} to get more information about these 
-##' evaluation metrics.}
+##' \describe{
+##'   \item{simple}{
+##'   \itemize{
+##'     \item \code{POD} : Probability of detection (hit rate)
+##'     \item \code{FAR} : False alarm ratio
+##'     \item \code{POFD} : Probability of false detection (fall-out)
+##'     \item \code{SR} : Success ratio
+##'     \item \code{ACCURACY} : Accuracy (fraction correct)
+##'     \item \code{BIAS} : Bias score (frequency bias)
+##'   }
+##'   }
+##'   \item{complex}{
+##'   \itemize{
+##'     \item \code{ROC} : Relative operating characteristic
+##'     \item \code{TSS} : True skill statistic (Hanssen and Kuipers discriminant, Peirce's 
+##'     skill score)
+##'     \item \code{KAPPA} : Cohen's Kappa (Heidke skill score)
+##'     \item \code{OR} : Odds Ratio
+##'     \item \code{ORSS} : Odds ratio skill score (Yule's Q)
+##'     \item \code{CSI} : Critical success index (threat score)
+##'     \item \code{ETS} : Equitable threat score (Gilbert skill score)
+##'   }
+##'   }
+##'   \item{presence-only}{
+##'   \itemize{
+##'     \item \code{BOYCE} : Boyce index
+##'     \item \code{MPA} : Minimal predicted area (cutoff optimising MPA to predict 90\% of 
+##'     presences)
+##'   }
+##'   }
+##' }
+##'   
+##' Optimal value of each method can be obtained with the \code{\link{get_optim_value}} function. \cr
+##' \emph{Please refer to the \href{https://www.cawcr.gov.au/projects/verification/}{CAWRC website 
+##' (section "Methods for dichotomous forecasts")} to get detailed description of each metric.}
 ##' 
 ##' Note that if a value is given to \code{threshold}, no optimisation will be done., and 
 ##' only the score for this threshold will be returned.
@@ -65,10 +98,6 @@
 ##' @note In order to break dependency loop between packages \pkg{biomod2} and \pkg{ecospat}, 
 ##' code of \code{ecospat.boyce()} and \code{ecospat.mpa()} in \pkg{ecospat})
 ##' functions have been copied within this file from version 3.2.2 (august 2022).
-## \code{\link[ecospat]{ecospat.mpa}}) # generate R CMD Check error due 
-## to crossref missing ecospat package
-## code of \code{\link[ecospat]{ecospat.boyce}} and \code{\link[ecospat]{ecospat.mpa}} 
-## generate R CMD Check error due to crossref missing ecospat package 
 ##'
 ##'
 ##' @references
@@ -82,7 +111,7 @@
 ##'   Modelling}, \bold{199(2)}, 142-152.
 ##' }
 ##'
-##' @keywords models options evaluation
+##' @keywords models options evaluation auc tss boyce mpa
 ##' 
 ##'
 ##' @seealso \code{ecospat.boyce()} and \code{ecospat.mpa()} in \pkg{ecospat}, 
@@ -92,7 +121,6 @@
 ##' 
 ##' 
 ##' @examples
-##' 
 ##' ## Generate a binary vector
 ##' vec.a <- sample(c(0, 1), 100, replace = TRUE)
 ##' 
@@ -291,21 +319,19 @@ bm_FindOptimStat <- function(metric.eval = 'TSS',
 get_optim_value <- function(metric.eval)
 {
   switch(metric.eval
-         , 'TSS' = 1
-         , 'KAPPA' = 1
-         , 'ACCURACY' = 1
-         , 'BIAS' = 1
          , 'POD' = 1
          , 'FAR' = 0
          , 'POFD' = 0
          , 'SR' = 1
-         , 'CSI' = 1
-         , 'ETS' = 1
-         , 'HK' = 1
-         , 'HSS' = 1
+         , 'ACCURACY' = 1
+         , 'BIAS' = 1
+         , 'ROC' = 1
+         , 'TSS' = 1
+         , 'KAPPA' = 1
          , 'OR' = 1000000
          , 'ORSS' = 1
-         , 'ROC' = 1
+         , 'CSI' = 1
+         , 'ETS' = 1
          , 'BOYCE' = 1
          , 'MPA' = 1
   )
@@ -369,38 +395,33 @@ bm_CalculateStat <- function(misc, metric.eval = 'TSS')
   correct_negatives <- misc['FALSE', '0'] ## true negatives
 
   total <- sum(misc)
-  forecast_1 <- sum(misc['TRUE', ])
-  forecast_0 <- sum(misc['FALSE', ])
-  observed_1 <- sum(misc[, '1'])
-  observed_0 <- sum(misc[, '0'])
+  forecast_1 <- sum(misc['TRUE', ]) ## hits + false_alarms
+  forecast_0 <- sum(misc['FALSE', ]) ## misses + correct_negatives
+  observed_1 <- sum(misc[, '1']) ## hits + misses
+  observed_0 <- sum(misc[, '0']) ## false_alarms + correct_negatives
 
   ## calculate chosen evaluation metric ---------------------------------------
   out = switch(metric.eval
-               , 'TSS' = (hits / (hits + misses)) + (correct_negatives / (false_alarms + correct_negatives)) - 1
-               , 'KAPPA' = {
+               , 'POD' =  hits / observed_1
+               , 'POFD' = false_alarms / observed_0
+               , 'FAR' = false_alarms / forecast_1
+               , 'SR' = hits / forecast_1
+               , 'ACCURACY' = (hits + correct_negatives) / total
+               , 'BIAS' = forecast_1 / observed_1
+
+               , 'TSS' = (hits / observed_1) + (correct_negatives / observed_0) - 1
+               , 'KAPPA' = { ## PAREIL ?
                  Po <- (1 / total) * (hits + correct_negatives)
                  Pe <- ((1 / total) ^ 2) * ((forecast_1 * observed_1) + (forecast_0 * observed_0))
                  return((Po - Pe) / (1 - Pe))
                }
-               , 'ACCURACY' = (hits + correct_negatives) / total
-               , 'BIAS' = (hits + false_alarms) / (hits + misses)
-               , 'POD' =  hits / (hits + misses)
-               , 'FAR' = false_alarms / (hits + false_alarms)
-               , 'POFD' = false_alarms / (correct_negatives + false_alarms)
-               , 'SR' = hits / (hits + false_alarms)
-               , 'CSI' = hits / (hits + misses + false_alarms)
-               , 'ETS' = {
-                 hits_rand <- ((hits + misses) * (hits + false_alarms)) / total
-                 return((hits - hits_rand) / (hits + misses + false_alarms - hits_rand))
-               }
-               , 'HK' = (hits / (hits + misses)) - (false_alarms / (false_alarms + correct_negatives))
-               , 'HSS' = {
-                 expected_correct_rand <- (1 / total) * 
-                   (((hits + misses) * (hits + false_alarms)) + ((correct_negatives + misses) * (correct_negatives + false_alarms)))
-                 return((hits + correct_negatives - expected_correct_rand) / (total - expected_correct_rand))
-               }
                , 'OR' = (hits * correct_negatives) / (misses * false_alarms)
                , 'ORSS' = (hits * correct_negatives - misses * false_alarms) / (hits * correct_negatives + misses * false_alarms)
+               , 'CSI' = hits / (observed_1 + false_alarms)
+               , 'ETS' = {
+                 hits_rand <- (observed_1 * forecast_1) / total
+                 return((hits - hits_rand) / (observed_1 + false_alarms - hits_rand))
+               }
   )
 }