Merge pull request #25 from jamesdunham/v0.2.12

Merge v0.2.12
jamesdunham · Nov 14, 2017 · 5f5b7c1 · 5f5b7c1
2 parents 25dd83f + 73eb8fc
commit 5f5b7c1
Show file tree

Hide file tree

Showing 21 changed files with 585 additions and 589 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: dgo
 Title: Dynamic Estimation of Group-Level Opinion
-Version: 0.2.11
-Date: 2017-10-26
+Version: 0.2.12
+Date: 2017-11-13
 Description: Fit dynamic group-level IRT and MRP models from individual or
     aggregated item response data. This package handles common preprocessing
     tasks and extends functions for inspecting results, poststratification, and

diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ else
   R := R
 endif
 
-all: clean docs data readme build check install
+all: clean docs data readme build check install site
 
 quick: clean 
 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,19 @@
+## dgo 0.2.12
+
+* Allow modeling of unobserved groups using aggregated data. The previous
+  behavior was to drop rows in `aggregate_data` indicating zero trials. (They
+  don't represent item responses.) Preserving them has the effect that
+  unobserved groups, defined partially or entirely by the values of the grouping
+  variables in zero-trial rows in `aggregate_data`, can be included in a model.
+* Fix an unexpected error when 1) `aggregate_data` is used without `item_data`,
+  2) no demographic groups are specified via `group_names`, and 3) geographic
+  `modifier_data` is used.
+* Fix the check for missing `modifier_data`. Geographic `modifier_data` must
+  cover all combinations of the geo and time variables in the item response data
+  (individual or aggregated), but because of a bug in the validation of the
+  geographic data, this requirement was not always enforced. In some cases a
+  warning would appear instead of an error.
+
 ## dgo 0.2.11
 
 * Add poststratification over posterior samples (closes #21).

diff --git a/R/restrict_input_data.r b/R/restrict_input_data.r
@@ -63,7 +63,7 @@ restrict_modifier <- function(modifier_data, group_grid, ctrl) {
     modifier_data <- modifier_data[geo_time_grid, nomatch = 0]
 
     # confirm that modifier data covers all modeled geo and time
-    missing_geo_time <- modifier_data[!geo_time_grid]
+    missing_geo_time <- geo_time_grid[!modifier_data]
     if (nrow(missing_geo_time)) {
       stop("Not all pairs of time periods and geographic areas are in ",
            "modifier_data. ", nrow(missing_geo_time), " missing.")
@@ -122,11 +122,6 @@ restrict_aggregates <- function(aggregate_data, ctrl) {
       stop("no rows in aggregate data remaining after subsetting to items ",
            "in `aggregate_item_names`")
 
-    aggregate_data <- aggregate_data[get("n_grp") > 0]
-    if (!nrow(aggregate_data))
-      stop("no rows in aggregate data remaining after dropping unobserved ",
-           "group-item combinations")
-
     extra_colnames <- setdiff(names(aggregate_data),
                               c(ctrl@geo_name, ctrl@time_name, ctrl@group_names, "item", "s_grp", "n_grp"))
     if (length(extra_colnames)) {

diff --git a/R/shape_hierarchical.r b/R/shape_hierarchical.r
@@ -10,8 +10,10 @@ shape_hierarchical_data <- function(modifier_data, modifier_names, group_grid_t,
   hierarchical <- data.table::copy(modifier_data)
   hierarchical <- drop_extra_cols(hierarchical, modifier_names, ctrl) 
   data.table::setkeyv(hierarchical, c(ctrl@geo_name, ctrl@time_name))
-  unmodeled <- zero_unmodeled(hierarchical, modifier_names, group_grid_t, ctrl) 
-  hierarchical <- rbind(hierarchical, unmodeled)
+  if (length(ctrl@group_names)) {
+    unmodeled <- zero_unmodeled(hierarchical, modifier_names, group_grid_t, ctrl) 
+    hierarchical <- rbind(hierarchical, unmodeled)
+  }
   zz <- create_zz(hierarchical, modifier_names, ctrl)
   return(zz)
 }
@@ -40,7 +42,8 @@ zero_unmodeled <- function(hierarchical, modifier_names, group_grid_t, ctrl) {
       paste0(x, unique(group_grid_t[[x]]))[-1]
     }))
   unmodeled_frame <- expand.grid(c(list(unmodeled_param_levels,
-        ctrl@time_filter), rep(list(0L), length(modifier_names))))
+        ctrl@time_filter), rep(list(0L), length(modifier_names))),
+    stringsAsFactors = FALSE)
   unmodeled_frame <- setNames(unmodeled_frame, c(ctrl@geo_name, ctrl@time_name,
       modifier_names))
   data.table::setDT(unmodeled_frame, key = c(ctrl@geo_name, ctrl@time_name))

diff --git a/README.Rmd b/README.Rmd
@@ -1,4 +1,5 @@
 ---
+title: 'dgo: Dynamic Estimation of Group-Level Opinion'
 output: github_document
 ---
 [![Build Status](https://travis-ci.org/jamesdunham/dgo.svg?branch=master)](https://travis-ci.org/jamesdunham/dgo)
@@ -7,29 +8,29 @@ output: github_document
 
 # Introduction
 
-dgo is an R package for the dynamic estimation of group-level opinion. The
-package can be used to estimate subpopulation groups' average latent
-conservatism (or other latent trait) from individuals' responses to dichotomous
-questions using a Bayesian group-level IRT approach developed by [Caughey and
-Warshaw
-2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html)
-that models latent traits at the level of demographic and/or geographic groups
-rather than individuals. This approach uses a hierarchical model to borrow
-strength cross-sectionally and dynamic linear models to do so across time. The
-group-level estimates can be weighted to generate estimates for geographic
-units, such as states. 
-
-dgo can also be used to estimate smoothed estimates of subpopulation groups'
-average responses on individual survey questions using a dynamic multi-level
-regression and poststratification (MRP) model ([Park, Gelman, and Bafumi
+dgo is an R package for the dynamic estimation of group-level public opinion.
+You can use the package to estimate latent trait means in subpopulations from
+survey data. For example, dgo can estimate the average policy liberalism in each
+American state over time among Democrats, Independents, and Republicans, given
+their answers to survey questions about policy proposals.
+
+dgo accomplishes this using a Bayesian group-level IRT approach developed by
+[Caughey and Warshaw
+2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html).
+It models latent traits at the level of demographic and geographic groups rather
+than individuals. It uses a hierarchical model to borrow strength
+cross-sectionally and dynamic linear models to do so across time.
+
+The package can also be used to estimate smoothed estimates of subpopulations'
+average responses to single survey items, using a dynamic multi-level regression
+and poststratification (MRP) model ([Park, Gelman, and Bafumi
 2004](http://stat.columbia.edu/~gelman/research/published/StateOpinionsNationalPolls.050712.dkp.pdf)).
-For instance, it could be used to estimate public opinion in each state on
+For instance, you can use dgo to estimate public opinion in each state on
 same-sex marriage or the Affordable Care Act.
 
 This model opens up new areas of research on historical public opinion in the
-United States at the subnational level. It also enables scholars of comparative
-politics to estimate dynamic models of public opinion opinion at the country or
-subnational level.
+United States at the subnational level. It also allows scholars of comparative
+politics to estimate dynamic cross-national models of public opinion.
 
 ```{r, knitr-options, echo = FALSE}
 # rmarkdown::render("README.Rmd")
@@ -67,7 +68,7 @@ If you don't have already have RStan, follow its
 Load the package and set RStan's recommended options for a local, multicore
 machine with excess RAM:
 
-```{r, result = 'hide'}
+```{r, result = 'hide', message = FALSE}
 library(dgo)
 rstan_options(auto_write = TRUE)
 options(mc.cores = parallel::detectCores())

diff --git a/README.md b/README.md
@@ -1,73 +1,118 @@
-
-[![Build Status](https://travis-ci.org/jamesdunham/dgo.svg?branch=master)](https://travis-ci.org/jamesdunham/dgo) [![Build status](https://ci.appveyor.com/api/projects/status/1ta36kmoqen98k87?svg=true)](https://ci.appveyor.com/project/jamesdunham/dgo) [![codecov](https://codecov.io/gh/jamesdunham/dgo/branch/master/graph/badge.svg)](https://codecov.io/gh/jamesdunham/dgo)
-
-Introduction
-============
-
-dgo is an R package for the dynamic estimation of group-level opinion. The package can be used to estimate subpopulation groups' average latent conservatism (or other latent trait) from individuals' responses to dichotomous questions using a Bayesian group-level IRT approach developed by [Caughey and Warshaw 2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html) that models latent traits at the level of demographic and/or geographic groups rather than individuals. This approach uses a hierarchical model to borrow strength cross-sectionally and dynamic linear models to do so across time. The group-level estimates can be weighted to generate estimates for geographic units, such as states.
-
-dgo can also be used to estimate smoothed estimates of subpopulation groups' average responses on individual survey questions using a dynamic multi-level regression and poststratification (MRP) model ([Park, Gelman, and Bafumi 2004](http://stat.columbia.edu/~gelman/research/published/StateOpinionsNationalPolls.050712.dkp.pdf)). For instance, it could be used to estimate public opinion in each state on same-sex marriage or the Affordable Care Act.
-
-This model opens up new areas of research on historical public opinion in the United States at the subnational level. It also enables scholars of comparative politics to estimate dynamic models of public opinion opinion at the country or subnational level.
-
-Installation
-============
-
-dgo can be installed from [CRAN](https://CRAN.R-project.org/package=dgo):
+dgo: Dynamic Estimation of Group-Level Opinion
+================
+
+[![Build
+Status](https://travis-ci.org/jamesdunham/dgo.svg?branch=master)](https://travis-ci.org/jamesdunham/dgo)
+[![Build
+status](https://ci.appveyor.com/api/projects/status/1ta36kmoqen98k87?svg=true)](https://ci.appveyor.com/project/jamesdunham/dgo)
+[![codecov](https://codecov.io/gh/jamesdunham/dgo/branch/master/graph/badge.svg)](https://codecov.io/gh/jamesdunham/dgo)
+
+# Introduction
+
+dgo is an R package for the dynamic estimation of group-level public
+opinion. You can use the package to estimate latent trait means in
+subpopulations from survey data. For example, dgo can estimate the
+average policy liberalism in each American state over time among
+Democrats, Independents, and Republicans, given their answers to survey
+questions about policy proposals.
+
+dgo accomplishes this using a Bayesian group-level IRT approach
+developed by [Caughey and Warshaw
+2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html).
+It models latent traits at the level of demographic and geographic
+groups rather than individuals. It uses a hierarchical model to borrow
+strength cross-sectionally and dynamic linear models to do so across
+time.
+
+The package can also be used to estimate smoothed estimates of
+subpopulations’ average responses to single survey items, using a
+dynamic multi-level regression and poststratification (MRP) model
+([Park, Gelman, and Bafumi
+2004](http://stat.columbia.edu/~gelman/research/published/StateOpinionsNationalPolls.050712.dkp.pdf)).
+For instance, you can use dgo to estimate public opinion in each state
+on same-sex marriage or the Affordable Care Act.
+
+This model opens up new areas of research on historical public opinion
+in the United States at the subnational level. It also allows scholars
+of comparative politics to estimate dynamic cross-national models of
+public opinion.
+
+# Installation
+
+dgo can be installed from
+[CRAN](https://CRAN.R-project.org/package=dgo):
 
 ``` r
 install.packages("dgo")
 ```
 
-Or get the latest version from [GitHub](https://github.com/jamesdunham/dgo) using [devtools](https://github.com/hadley/devtools/):
+Or get the latest version from
+[GitHub](https://github.com/jamesdunham/dgo) using
+[devtools](https://github.com/hadley/devtools/):
 
 ``` r
 if (!require(devtools, quietly = TRUE)) install.packages("devtools")
 devtools::install_github("jamesdunham/dgo")
 ```
 
-dgo requires a working installation of [RStan](http://mc-stan.org/interfaces/rstan.html). If you don't have already have RStan, follow its "[Getting Started](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started)" guide.
+dgo requires a working installation of
+[RStan](http://mc-stan.org/interfaces/rstan.html). If you don’t have
+already have RStan, follow its “[Getting
+Started](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started)”
+guide.
 
-Usage
-=====
+# Usage
 
-Load the package and set RStan's recommended options for a local, multicore machine with excess RAM:
+Load the package and set RStan’s recommended options for a local,
+multicore machine with excess RAM:
 
 ``` r
 library(dgo)
-#> Loading required package: dgodata
-#> Loading required package: rstan
-#> Loading required package: ggplot2
-#> Loading required package: StanHeaders
-#> rstan (Version 2.16.2, packaged: 2017-07-03 09:24:58 UTC, GitRev: 2e1f913d3ca3)
-#> For execution on a local, multicore CPU with excess RAM we recommend calling
-#> rstan_options(auto_write = TRUE)
-#> options(mc.cores = parallel::detectCores())
 rstan_options(auto_write = TRUE)
 options(mc.cores = parallel::detectCores())
 ```
 
 The minimal workflow from raw data to estimation is:
 
 1.  shape input data using the `shape()` function; and
-2.  pass the result to the `dgirt()` function to estimate a latent trait (e.g., conservatism) or `dgmrp()` function to estimate opinion on a single survey question.
-
-Troubleshooting
-===============
-
-Please [report issues](https://github.com/jamesdunham/dgo/issues) that you encounter.
-
--   OS X only: RStan creates temporary files during estimation in a location given by `tempdir()`, typically an arbitrary location in `/var/folders`. If a model runs for days, these files can be cleaned up while still needed, which induces an error. A good solution is to set a safer path for temporary files, using an environment variable checked at session startup. For help setting environment variables, see the Stack Overflow question [here](https://stackoverflow.com/questions/17107206/change-temporary-directory). Confirm the new path before starting your model run by restarting R and checking the output from `tempdir()`.
-
--   Models fitted before October 2016 (specifically &lt; [\#8e6a2cf](https://github.com/jamesdunham/dgo/commit/8e6a2cfbe00b2cd4a908b3067241e06124d143cd)) using dgirt are not fully compatible with dgo. Their contents can be extracted without using dgo, however, with the `$` indexing operator. For example: `as.data.frame(dgirtfit_object$stan.cmb)`.
-
--   Calling `dgirt()` or `dgmrp()` can generate [warnings](http://mc-stan.org/misc/warnings#compiler-warnings) during model compilation. These are safe to ignore, or can be suppressed by following the linked instructions.
-
-Contributing and citing
-=======================
-
-dgo is under development and we welcome [suggestions](https://github.com/jamesdunham/dgo/issues).
+2.  pass the result to the `dgirt()` function to estimate a latent trait
+    (e.g., conservatism) or `dgmrp()` function to estimate opinion on a
+    single survey question.
+
+# Troubleshooting
+
+Please [report issues](https://github.com/jamesdunham/dgo/issues) that
+you encounter.
+
+  - OS X only: RStan creates temporary files during estimation in a
+    location given by `tempdir()`, typically an arbitrary location in
+    `/var/folders`. If a model runs for days, these files can be cleaned
+    up while still needed, which induces an error. A good solution is to
+    set a safer path for temporary files, using an environment variable
+    checked at session startup. For help setting environment variables,
+    see the Stack Overflow question
+    [here](https://stackoverflow.com/questions/17107206/change-temporary-directory).
+    Confirm the new path before starting your model run by restarting R
+    and checking the output from `tempdir()`.
+
+  - Models fitted before October 2016 (specifically \<
+    [\#8e6a2cf](https://github.com/jamesdunham/dgo/commit/8e6a2cfbe00b2cd4a908b3067241e06124d143cd))
+    using dgirt are not fully compatible with dgo. Their contents can be
+    extracted without using dgo, however, with the `$` indexing
+    operator. For example: `as.data.frame(dgirtfit_object$stan.cmb)`.
+
+  - Calling `dgirt()` or `dgmrp()` can generate
+    [warnings](http://mc-stan.org/misc/warnings#compiler-warnings)
+    during model compilation. These are safe to ignore, or can be
+    suppressed by following the linked instructions.
+
+# Contributing and citing
+
+dgo is under development and we welcome
+[suggestions](https://github.com/jamesdunham/dgo/issues).
 
 The package citation is:
 
-Dunham, James, Devin Caughey, and Christopher Warshaw. 2017. dgo: Dynamic Estimation of Group-level Opinion. R package. <https://jdunham.io/dgo/>.
+Dunham, James, Devin Caughey, and Christopher Warshaw. 2017. dgo:
+Dynamic Estimation of Group-level Opinion. R package.
+<https://jdunham.io/dgo/>.
diff --git a/data/toy_dgirt_in.rda b/data/toy_dgirt_in.rda
diff --git a/data/toy_dgirtfit.rda b/data/toy_dgirtfit.rda