Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
Merge branch 'master' of https://github.com/brandmaier/semtree

# Conflicts:
#	DESCRIPTION
  • Loading branch information
brandmaier committed Mar 25, 2024
2 parents 11ff3c7 + c1f3d72 commit 0a1f762
Show file tree
Hide file tree
Showing 29 changed files with 1,443 additions and 1,292 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/R-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
on:
workflow_dispatch

name: Tests

jobs:
document:
name: run tests
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache-version: 2
extra-packages: |
any::testthat
any::devtools
# needs: pr-document
- name: install package
run: devtools::install_github("brandmaier/semtree")

- name: run test
run: testthat::test_dir("tests/testthat/")
shell: Rscript {0}
50 changes: 50 additions & 0 deletions .github/workflows/test-coverage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: test-coverage

jobs:
test-coverage:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::covr
needs: coverage

- name: Test coverage
run: |
covr::codecov(
quiet = FALSE,
clean = FALSE,
install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
)
shell: Rscript {0}

- name: Show testthat output
if: always()
run: |
## --------------------------------------------------------------------
find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash

- name: Upload test results
if: failure()
uses: actions/upload-artifact@v3
with:
name: coverage-test-failures
path: ${{ runner.temp }}/package
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ License: GPL-3
Encoding: UTF-8
LazyLoad: yes
Version: 0.9.20
<<<<<<< HEAD
Date: 2024-03-25
=======
Date: 2024-03-14
>>>>>>> c1f3d72a029d525e9e8e407e29b9d0470819b468
RoxygenNote: 7.2.3
VignetteBuilder: knitr
BugReports: https://github.com/brandmaier/semtree/issues
Expand Down
15 changes: 7 additions & 8 deletions R/bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ forest.sample <-
return.oob = F,
type = "bootstrap") {
if (mtry > 0) {
#cov.ids <- which(names(dataset) %in% covariates)
# cov.ids <- which(names(dataset) %in% covariates)
# sample length(cov.ids)-mtry of those to exclude
#rem.ids <- sample(cov.ids, length(cov.ids)-mtry)
#dataset <- dataset[, -rem.ids]
# rem.ids <- sample(cov.ids, length(cov.ids)-mtry)
# dataset <- dataset[, -rem.ids]
dataset <- sampleColumns(dataset, covariates, mtry)
}

N <- dim(dataset)[1]

if (type == "bootstrap") {
indices <- sample(1:N, N, replace = T)
} else if (type == "subsample") {
Expand All @@ -23,12 +23,11 @@ forest.sample <-
}
bootstrap.data <- dataset[indices, ]
oob.indices <- setdiff(1:N, unique(indices))
oob.data <- dataset[oob.indices,]
oob.data <- dataset[oob.indices, ]

if (return.oob) {
return(list(bootstrap.data = bootstrap.data, oob.data = oob.data))
} else {
return(bootstrap.data)
}

}
}
50 changes: 24 additions & 26 deletions R/boruta.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,54 +3,52 @@ boruta <- function(model,
control = NULL,
predictors = NULL,
...) {


# TODO: make sure that no column names start with "shadow_" prefix

# detect model (warning: duplicated code)
if (inherits(model,"MxModel") || inherits(model,"MxRAMModel")) {
tmp <- getPredictorsOpenMx(mxmodel=model, dataset=data, covariates=predictors)
if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors)
model.ids <- tmp[[1]]
covariate.ids <- tmp[[2]]
# } else if (inherits(model,"lavaan")){
# } else if (inherits(model,"lavaan")){

# } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) {
#
# } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) {
#
} else {
ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!")
}

# stage 1 - create shadow features

shadow.ids <- (ncol(data)+1):(ncol(data)+length(covariate.ids))
shadow.ids <- (ncol(data) + 1):(ncol(data) + length(covariate.ids))

for (cur_cov_id in covariate.ids) {
# pick column and shuffle
temp_column <- data[, cur_cov_id]
temp_column <- sample(temp_column, length(temp_column), replace=FALSE)
temp_column <- sample(temp_column, length(temp_column), replace = FALSE)
# add to dataset as shadow feature
temp_colname <- paste0("shadow_", names(data)[cur_cov_id],collapse="")
temp_colname <- paste0("shadow_", names(data)[cur_cov_id], collapse = "")
data[temp_colname] <- temp_column
if (!is.null(predictors)) predictors <- c(predictors, temp_colname)
}

# run the forest
forest <- semforest(model, data, control, predictors, ...)

# run variable importance
vim <- varimp(forest)

# get variable importance from shadow features
shadow_names <- names(data)[shadow.ids]
agvim <- aggregateVarimp(vim, aggregate="mean")
max_shadow_importance <- max(agvim[names(agvim)%in%shadow_names])
agvim_filtered <- agvim[!(names(agvim)%in%shadow_names)]
df<-data.frame(importance=agvim_filtered, predictor=names(agvim_filtered))
vim$filter <- agvim_filtered>max_shadow_importance
agvim <- aggregateVarimp(vim, aggregate = "mean")
max_shadow_importance <- max(agvim[names(agvim) %in% shadow_names])
agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]

df <- data.frame(importance = agvim_filtered, predictor = names(agvim_filtered))

vim$filter <- agvim_filtered > max_shadow_importance
vim$boruta <- TRUE
vim$boruta_threshold = max_shadow_importance
vim$boruta_threshold <- max_shadow_importance

return(vim)
}
38 changes: 15 additions & 23 deletions R/checkControl.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@ checkControl <- function(control, fail = TRUE) {
return(fail)
}

check.semtree.control <- function(control, fail = TRUE)
{
check.semtree.control <- function(control, fail = TRUE) {
attr <- attributes(control)$names
def.attr <- attributes(semtree.control())$names
if ((length(intersect(attr, def.attr)) != length(attr)))
{
if ((length(intersect(attr, def.attr)) != length(attr))) {
unknown <- setdiff(attr, def.attr)
msg <-
paste("Control object contains unknown parameters:", unknown)
Expand All @@ -22,9 +20,8 @@ check.semtree.control <- function(control, fail = TRUE)
stop()
} else {
ui_warn(msg)

return(FALSE)

}
} else {
temp <- semtree.control()
Expand All @@ -36,23 +33,20 @@ check.semtree.control <- function(control, fail = TRUE)
}
}
} # end for
return (TRUE)

return(TRUE)
}



return (length(intersect(attr, def.attr)) == length(attr))




return(length(intersect(attr, def.attr)) == length(attr))
}

check.semforest.control <- function(control, fail = TRUE)
{
check.semforest.control <- function(control, fail = TRUE) {
attr <- attributes(control)$names
def.attr <- attributes(semforest.control())$names

if ((length(intersect(attr, def.attr)) != length(attr)))
{

if ((length(intersect(attr, def.attr)) != length(attr))) {
unknown <- setdiff(attr, def.attr)
msg <-
paste("Control object contains unknown parameters:", unknown)
Expand All @@ -61,12 +55,10 @@ check.semforest.control <- function(control, fail = TRUE)
stop()
} else {
ui_warn(msg)

return(FALSE)

}
} else {
return (TRUE)

return(TRUE)
}
}
}
41 changes: 20 additions & 21 deletions R/computePval_maxLR.R
Original file line number Diff line number Diff line change
@@ -1,42 +1,41 @@
#' Wrapper function for computing the maxLR corrected p value
#' from strucchange
#'
#'
#' @param maxLR maximum of the LR test statistics
#' @param q number of free SEM parameters / degrees of freedom
#' @param covariate covariate under evaluation. This is important to get the level of
#' measurement from the covariate and the bin size for ordinal and
#' categorical covariates.
#' @param from numeric from interval (0, 1) specifying start of trimmed
#' sample period. With the default
#' sample period. With the default
#' from = 0.15 the first and last 15 percent of observations are
#' trimmed. This is only needed for continuous covariates.
#' @param to numeric from interval (0, 1) specifying end of trimmed
#' sample period. By default, to is 1.
#' @param nrep numeric. Number of replications used for simulating from the asymptotic
#' @param nrep numeric. Number of replications used for simulating from the asymptotic
#' distribution (passed to efpFunctional). Only needed for ordinal
#' covariates.
#'
#
#' @author Manuel Arnold
#' @return Numeric. p value for maximally selected LR statistic

computePval_maxLR <- function(maxLR, q, covariate, from, to, nrep) {
# Level of measurement
if (!is.factor(covariate)) { # metric
pval <- strucchange::supLM(from = from, to = to)$computePval(x = maxLR, nproc = q)
} else {
covariate <- sort(covariate) # sort covariate
covariate <- droplevels(covariate)
if (is.ordered(covariate)) { # ordinal
pval <- strucchange::ordL2BB(freq = covariate, nproc = q,
nrep = nrep)$computePval(x = maxLR, nproc = q)
} else { # categorical
pval <- strucchange::catL2BB(covariate)$computePval(x = maxLR, nproc = q)
}
computePval_maxLR <- function(maxLR, q, covariate, from, to, nrep) {
# Level of measurement
if (!is.factor(covariate)) { # metric
pval <- strucchange::supLM(from = from, to = to)$computePval(x = maxLR, nproc = q)
} else {
covariate <- sort(covariate) # sort covariate
covariate <- droplevels(covariate)
if (is.ordered(covariate)) { # ordinal
pval <- strucchange::ordL2BB(
freq = covariate, nproc = q,
nrep = nrep
)$computePval(x = maxLR, nproc = q)
} else { # categorical
pval <- strucchange::catL2BB(covariate)$computePval(x = maxLR, nproc = q)
}

pval

}


pval
}
Loading

0 comments on commit 0a1f762

Please sign in to comment.