Skip to content

Commit

Permalink
replaced snow/snowfall with parallel package
Browse files Browse the repository at this point in the history
  • Loading branch information
brandmaier committed Dec 16, 2016
1 parent 3b03feb commit 3f716b1
Show file tree
Hide file tree
Showing 13 changed files with 74 additions and 202 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Imports:
digest,
rpart,
rpart.plot,
snowfall
parallel
Suggests:
lavaan
Description: SEM Trees and SEM Forests -- an extension of model-based decision
Expand Down
14 changes: 4 additions & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export("subtree")
#export("manvarOrder")
#export("recodeAllSubsets")
#export("recodeOrdered")
export("sfMapply")

#export("fairSplit")
#export("naiveSplit")

Expand Down Expand Up @@ -63,7 +63,7 @@ export("getTerminalNodes")

export("proximity")
export("outliers")
export("availableCpus")
#export("availableCpus")
export("evaluateTree")
export("getNodeById")
#export("get.children.map")
Expand Down Expand Up @@ -98,17 +98,11 @@ export("partialDependencePlot")
import("OpenMx")
import("bitops")
importFrom("sets","as.set")
#import("rpart.plot")
#import("snowfall")
import("rpart")


importFrom("utils", "str")
#importFrom("base", "summary")

importFrom("grDevices", "heat.colors", "rainbow")
importFrom("graphics", "barplot", "legend", "lines", "pairs", "par",
"plot", "strwidth", "text")
importFrom("stats", "as.dist", "cmdscale", "coef", "cor", "cov",
"logLik", "median", "pchisq", "qnorm", "runif", "var")
importFrom("utils", "flush.console", "getS3method", "sessionInfo")
importFrom("utils", "flush.console", "getS3method", "sessionInfo","str")
importFrom("parallel","parLapply","clusterMap")
5 changes: 0 additions & 5 deletions R/availableCpus.R

This file was deleted.

14 changes: 11 additions & 3 deletions R/mergeForests.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,17 @@ merge.internal <- function(forest.list){

# some checks
c1 <- digest::digest(forest$model)==digest::digest(forest.list[[i]]$model)
c2 <- digest::digest(forest$control)==digest::digest(forest.list[[i]]$control)
if (!c1 || !c2) {
stop("Cannot merge forests! Models or control objects differ");
tmp1 <- forest$control
tmp1$num.trees <- NA
tmp2 <- forest.list[[i]]$control
tmp2$num.trees <- NA
c2 <- digest::digest(tmp1)==digest::digest(tmp2)
if (!c1) {
stop("Cannot merge forests! Models differ.");
}

if (!c2) {
warning("Merging forests with different control objects!");
}

forest$forest <- c(forest$forest,forest.list[[i]]$forest)
Expand Down
12 changes: 8 additions & 4 deletions R/proximity.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
proximity <- function(forest, dataset=NULL, type=0, aggregate=T, snowfall=FALSE)
proximity <- function(forest, dataset=NULL, type=0, aggregate=T, cluster=NULL, ...)
{
if ("snowfall" %in% list(...)) {
warning("Use of snowfall is deprecated and must be replaced with cluster argument from package 'parallel'! See manual")
}

if (is.null(dataset)) {
dataset <- forest$data
}
Expand All @@ -17,10 +21,10 @@ proximity <- function(forest, dataset=NULL, type=0, aggregate=T, snowfall=FALSE)
}

#quasi map-reduce-step here:
if (snowfall)
bool.matrix <- snowfall::sfLapply(forest$forest, FUN=prox.fun, dataset)
if (is.null(cluster))
bool.matrix <- lapply(forest$forest, FUN=prox.fun, dataset)
else
bool.matrix <- lapply(forest$forest, FUN=prox.fun, dataset)
bool.matrix <- parLapply(cl=cluster,forest$forest, FUN=prox.fun, dataset)

for (i in 1:K)
{
Expand Down
25 changes: 16 additions & 9 deletions R/semforest.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
semforest <- function(model, data, control=NULL,
predictors=NULL, constraints=NULL , ...)
predictors=NULL, constraints=NULL , cluster=NULL, ...)
{

arguments <- list(...)
Expand Down Expand Up @@ -109,15 +109,22 @@ semforest <- function(model, data, control=NULL,
#browser()
start.time <- proc.time()

# applyfun <- mapply;
# if (sfParallel()) {
# applyfun <- sfMapply
# }
if (is.null(cluster)) {
trees <- mapply(FUN=semtreeApplyWrapper,
forest.data, seeds, skip,
MoreArgs=list(model,semforest.control$semtree.control,
with.error.handler, covariates, constraints),SIMPLIFY=FALSE)
} else {
trees <- clusterMap(cl=cluster, fun=semtreeApplyWrapper,
forest.data, seeds, skip,
MoreArgs=list(model,semforest.control$semtree.control,
with.error.handler, covariates, constraints),
SIMPLIFY=FALSE)
}

trees <- sfMapply(FUN=semtreeApplyWrapper,
forest.data, seeds, skip,
MoreArgs=list(model,semforest.control$semtree.control,
with.error.handler, covariates, constraints),SIMPLIFY=FALSE)



elapsed <- proc.time()-start.time

# postprocess to correct any erroneous trees
Expand Down
67 changes: 0 additions & 67 deletions R/sfMapply.R

This file was deleted.

33 changes: 21 additions & 12 deletions R/varimp.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,28 +64,37 @@ varimpTree <- function( tree, data, var.names=NULL,
#}

varimp <- function(forest, var.names=NULL, verbose=F,
main.effects=F, parallel=TRUE,
eval.fun=evaluateTree)
main.effects=F, cluster=NULL,
eval.fun=evaluateTree, ...)
{

if ("parallel" %in% list(...)) {
warning("Use of snowfall is deprecated and must be replaced with cluster argument from package 'parallel'! See manual")
}

if (is.null(var.names)) {
var.names <- forest$covariates
}

result <- list()

if (parallel) {
fun <- sfMapply
start.time <- proc.time()

if (is.null(cluster)) {
temp <- mapply(FUN=varimpTree, forest$forest, forest$forest.data,
MoreArgs=list(var.names=var.names, verbose=verbose,
max.level=NA, eval.fun=eval.fun
),
SIMPLIFY=FALSE, USE.NAMES=TRUE)
} else {
fun <- mapply
temp <- parallel::clusterMap(cl=cluster, fun=varimpTree, forest$forest, forest$forest.data,
MoreArgs=list(var.names=var.names, verbose=verbose,
max.level=NA, eval.fun=eval.fun
),
SIMPLIFY=FALSE, USE.NAMES=TRUE)
}

start.time <- proc.time()
temp <- fun(FUN=varimpTree, forest$forest, forest$forest.data,
MoreArgs=list(var.names=var.names, verbose=verbose,
max.level=NA, eval.fun=eval.fun
),
SIMPLIFY=FALSE, USE.NAMES=T)


elapsed <- proc.time()-start.time

# extract results and put them into result-object
Expand Down
41 changes: 0 additions & 41 deletions man/availableCpus.Rd

This file was deleted.

7 changes: 4 additions & 3 deletions man/proximity.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ SEM Forest Case Proximity
A \code{\link{semforest}} process to represent proportion of trees where each case are in the same leaf nodes. The values are bounded (0,1), where higher values are closer in proximity.
}
\usage{
proximity(forest, dataset = NULL, type = 0, aggregate = T, snowfall = FALSE)
proximity(forest, dataset = NULL, type = 0, aggregate = T, cluster = NULL, ...)
}
%- maybe also 'usage' for other objects documented here.
% plot.proximity(prox, main = "Principal Coordinates", xlim = NULL, ylim = NULL, col = "black", pch = NULL, axes = c(1, 2), type = "mds", cex = 2, ...)
Expand All @@ -28,9 +28,10 @@ Missingness accounted for. (0 = no, 1 = yes)
\item{aggregate}{
Boolean marker to compute aggregate proximity scores.
}
\item{snowfall}{
Use parallel computing for calculations.
\item{cluster}{
An object of class "cluster" representing a parallel socket cluster. See package \link[parallel]{makeCluster}.
}
\item{\ldots}{Optional arguments.}
}

\value{
Expand Down
5 changes: 4 additions & 1 deletion man/semforest.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Grows a SEM Forest from a template model and a dataset. This may take some time.
}
\usage{
semforest(model, data, control = NULL, predictors = NULL,
constraints = NULL, ...)
constraints = NULL, cluster=NULL, ...)
}
% print.semforest(forest)
%- maybe also 'usage' for other objects documented here.
Expand All @@ -30,6 +30,9 @@ An optional list of covariates. See semtree code example.
}
\item{constraints}{
An optional list of covariates. See semtree code example.
}
\item{cluster}{
An object of class "cluster" representing a parallel socket cluster. See package \link[parallel]{makeCluster}.
}

\item{\dots}{
Expand Down
42 changes: 0 additions & 42 deletions man/sfMapply.Rd

This file was deleted.

Loading

0 comments on commit 3f716b1

Please sign in to comment.