merge

Merge branch 'master' of https://github.com/brandmaier/semtree # Conflicts: # DESCRIPTION
brandmaier · Mar 25, 2024 · 0a1f762 · 0a1f762
2 parents 11ff3c7 + c1f3d72
commit 0a1f762
Show file tree

Hide file tree

Showing 29 changed files with 1,443 additions and 1,292 deletions.
diff --git a/.github/workflows/R-tests.yml b/.github/workflows/R-tests.yml
@@ -0,0 +1,30 @@
+on:
+   workflow_dispatch 
+
+name: Tests
+
+jobs:
+  document:
+    name: run tests
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          cache-version: 2
+          extra-packages: |
+            any::testthat
+            any::devtools
+#          needs: pr-document
+      - name: install package
+        run: devtools::install_github("brandmaier/semtree")
+
+      - name: run test
+        run: testthat::test_dir("tests/testthat/")
+        shell: Rscript {0}
diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml
@@ -0,0 +1,50 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: test-coverage
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr
+          needs: coverage
+
+      - name: Test coverage
+        run: |
+          covr::codecov(
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
+          )
+        shell: Rscript {0}
+
+      - name: Show testthat output
+        if: always()
+        run: |
+          ## --------------------------------------------------------------------
+          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+        shell: bash
+
+      - name: Upload test results
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -50,7 +50,11 @@ License: GPL-3
 Encoding: UTF-8
 LazyLoad: yes
 Version: 0.9.20
+<<<<<<< HEAD
 Date: 2024-03-25
+=======
+Date: 2024-03-14
+>>>>>>> c1f3d72a029d525e9e8e407e29b9d0470819b468
 RoxygenNote: 7.2.3
 VignetteBuilder: knitr
 BugReports: https://github.com/brandmaier/semtree/issues

diff --git a/R/bootstrap.R b/R/bootstrap.R
@@ -5,15 +5,15 @@ forest.sample <-
            return.oob = F,
            type = "bootstrap") {
     if (mtry > 0) {
-      #cov.ids <- which(names(dataset) %in% covariates)
+      # cov.ids <- which(names(dataset) %in% covariates)
       # sample length(cov.ids)-mtry of those to exclude
-      #rem.ids <- sample(cov.ids, length(cov.ids)-mtry)
-      #dataset <- dataset[, -rem.ids]
+      # rem.ids <- sample(cov.ids, length(cov.ids)-mtry)
+      # dataset <- dataset[, -rem.ids]
       dataset <- sampleColumns(dataset, covariates, mtry)
     }
-    
+
     N <- dim(dataset)[1]
-    
+
     if (type == "bootstrap") {
       indices <- sample(1:N, N, replace = T)
     } else if (type == "subsample") {
@@ -23,12 +23,11 @@ forest.sample <-
     }
     bootstrap.data <- dataset[indices, ]
     oob.indices <- setdiff(1:N, unique(indices))
-    oob.data <- dataset[oob.indices,]
+    oob.data <- dataset[oob.indices, ]
 
     if (return.oob) {
       return(list(bootstrap.data = bootstrap.data, oob.data = oob.data))
     } else {
       return(bootstrap.data)
     }
-
-  }
+  }
diff --git a/R/boruta.R b/R/boruta.R
@@ -3,54 +3,52 @@ boruta <- function(model,
                    control = NULL,
                    predictors = NULL,
                    ...) {
-
-
   # TODO: make sure that no column names start with "shadow_" prefix
-  
+
   # detect model (warning: duplicated code)
-  if (inherits(model,"MxModel") || inherits(model,"MxRAMModel")) {
-    tmp <- getPredictorsOpenMx(mxmodel=model, dataset=data, covariates=predictors)
+  if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
+    tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors)
     model.ids <- tmp[[1]]
     covariate.ids <- tmp[[2]]
-#  } else if (inherits(model,"lavaan")){
+    #  } else if (inherits(model,"lavaan")){
 
- # } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) {
-#
+    # } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) {
+    #
   } else {
-    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
+    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!")
   }
-  
+
   # stage 1 - create shadow features
 
-  shadow.ids <- (ncol(data)+1):(ncol(data)+length(covariate.ids))
-  
+  shadow.ids <- (ncol(data) + 1):(ncol(data) + length(covariate.ids))
+
   for (cur_cov_id in covariate.ids) {
     # pick column and shuffle
     temp_column <- data[, cur_cov_id]
-    temp_column <- sample(temp_column, length(temp_column), replace=FALSE)
+    temp_column <- sample(temp_column, length(temp_column), replace = FALSE)
     # add to dataset as shadow feature
-    temp_colname <- paste0("shadow_", names(data)[cur_cov_id],collapse="")
+    temp_colname <- paste0("shadow_", names(data)[cur_cov_id], collapse = "")
     data[temp_colname] <- temp_column
     if (!is.null(predictors)) predictors <- c(predictors, temp_colname)
   }
-  
+
   # run the forest
   forest <- semforest(model, data, control, predictors, ...)
-  
+
   # run variable importance
   vim <- varimp(forest)
-  
+
   # get variable importance from shadow features
   shadow_names <- names(data)[shadow.ids]
-  agvim <- aggregateVarimp(vim, aggregate="mean")
-  max_shadow_importance <- max(agvim[names(agvim)%in%shadow_names])  
-  agvim_filtered <- agvim[!(names(agvim)%in%shadow_names)]
-  
-  df<-data.frame(importance=agvim_filtered, predictor=names(agvim_filtered))
-  
-  vim$filter <- agvim_filtered>max_shadow_importance
+  agvim <- aggregateVarimp(vim, aggregate = "mean")
+  max_shadow_importance <- max(agvim[names(agvim) %in% shadow_names])
+  agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]
+
+  df <- data.frame(importance = agvim_filtered, predictor = names(agvim_filtered))
+
+  vim$filter <- agvim_filtered > max_shadow_importance
   vim$boruta <- TRUE
-  vim$boruta_threshold = max_shadow_importance
-  
+  vim$boruta_threshold <- max_shadow_importance
+
   return(vim)
 }
diff --git a/R/checkControl.R b/R/checkControl.R
@@ -8,12 +8,10 @@ checkControl <- function(control, fail = TRUE) {
   return(fail)
 }
 
-check.semtree.control <- function(control, fail = TRUE)
-{
+check.semtree.control <- function(control, fail = TRUE) {
   attr <- attributes(control)$names
   def.attr <- attributes(semtree.control())$names
-  if ((length(intersect(attr, def.attr)) != length(attr)))
-  {
+  if ((length(intersect(attr, def.attr)) != length(attr))) {
     unknown <- setdiff(attr, def.attr)
     msg <-
       paste("Control object contains unknown parameters:", unknown)
@@ -22,9 +20,8 @@ check.semtree.control <- function(control, fail = TRUE)
       stop()
     } else {
       ui_warn(msg)
-      
+
       return(FALSE)
-
     }
   } else {
     temp <- semtree.control()
@@ -36,23 +33,20 @@ check.semtree.control <- function(control, fail = TRUE)
         }
       }
     } # end for
-    
-    return (TRUE)
+
+    return(TRUE)
   }
-
-
-
-  return (length(intersect(attr, def.attr)) == length(attr))
-
+
+
+
+  return(length(intersect(attr, def.attr)) == length(attr))
 }
 
-check.semforest.control <- function(control, fail = TRUE)
-{
+check.semforest.control <- function(control, fail = TRUE) {
   attr <- attributes(control)$names
   def.attr <- attributes(semforest.control())$names
-
-  if ((length(intersect(attr, def.attr)) != length(attr)))
-  {
+
+  if ((length(intersect(attr, def.attr)) != length(attr))) {
     unknown <- setdiff(attr, def.attr)
     msg <-
       paste("Control object contains unknown parameters:", unknown)
@@ -61,12 +55,10 @@ check.semforest.control <- function(control, fail = TRUE)
       stop()
     } else {
       ui_warn(msg)
-      
+
       return(FALSE)
-
     }
   } else {
-    return (TRUE)
-
+    return(TRUE)
   }
-}
+}
diff --git a/R/computePval_maxLR.R b/R/computePval_maxLR.R
@@ -1,42 +1,41 @@
 #' Wrapper function for computing the maxLR corrected p value
 #' from strucchange
-#' 
+#'
 #' @param maxLR maximum of the LR test statistics
 #' @param q number of free SEM parameters / degrees of freedom
 #' @param covariate covariate under evaluation. This is important to get the level of
 #'            measurement from the covariate and the bin size for ordinal and
 #'            categorical covariates.
 #' @param from numeric from interval (0, 1) specifying start of trimmed
-#'           sample period.  With the default 
+#'           sample period.  With the default
 #'           from = 0.15 the first and last 15 percent of observations are
 #'           trimmed. This is only needed for continuous covariates.
 #' @param to numeric from interval (0, 1) specifying end of trimmed
 #'           sample period. By default, to is 1.
-#' @param nrep numeric. Number of replications used for simulating from the asymptotic 
+#' @param nrep numeric. Number of replications used for simulating from the asymptotic
 #'       distribution (passed to efpFunctional). Only needed for ordinal
 #'       covariates.
 #'
 #
 #' @author Manuel Arnold
 #' @return Numeric. p value for maximally selected LR statistic
 
-  computePval_maxLR <- function(maxLR, q, covariate, from, to, nrep) {
-    
-    # Level of measurement
-    if (!is.factor(covariate)) { # metric
-      pval <- strucchange::supLM(from = from, to = to)$computePval(x = maxLR, nproc = q)
-    } else {
-      covariate <- sort(covariate) # sort covariate
-      covariate <- droplevels(covariate)
-      if (is.ordered(covariate)) { # ordinal
-        pval <- strucchange::ordL2BB(freq = covariate, nproc = q,
-                        nrep = nrep)$computePval(x = maxLR, nproc = q)
-      } else { # categorical
-        pval <- strucchange::catL2BB(covariate)$computePval(x = maxLR, nproc = q)
-      }
+computePval_maxLR <- function(maxLR, q, covariate, from, to, nrep) {
+  # Level of measurement
+  if (!is.factor(covariate)) { # metric
+    pval <- strucchange::supLM(from = from, to = to)$computePval(x = maxLR, nproc = q)
+  } else {
+    covariate <- sort(covariate) # sort covariate
+    covariate <- droplevels(covariate)
+    if (is.ordered(covariate)) { # ordinal
+      pval <- strucchange::ordL2BB(
+        freq = covariate, nproc = q,
+        nrep = nrep
+      )$computePval(x = maxLR, nproc = q)
+    } else { # categorical
+      pval <- strucchange::catL2BB(covariate)$computePval(x = maxLR, nproc = q)
     }
-
-    pval
-
   }
-
+
+  pval
+}