Skip to content

Commit

Permalink
Merge branch 'main' into main_postprocess_update
Browse files Browse the repository at this point in the history
  • Loading branch information
jcblemai committed Jun 6, 2024
2 parents 160ce81 + ad733ca commit c771178
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 105 deletions.
1 change: 0 additions & 1 deletion build/conda_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,6 @@ dependencies:
- conda-forge/linux-64::r-readxl==1.4.1=r42h3ebcfa7_1
- conda-forge/noarch::r-reprex==2.0.2=r42hc72bb7e_1
- conda-forge/linux-64::r-tidyr==1.3.0=r42h38f115c_0
- conda-forge/noarch::r-tigris==2.0.1=r42hc72bb7e_0
- conda-forge/noarch::r-waldo==0.4.0=r42hc72bb7e_1
- conda-forge/noarch::r-broom==1.0.3=r42hc72bb7e_0
- conda-forge/linux-64::r-gdtools==0.3.0=r42he0ce631_0
Expand Down
2 changes: 1 addition & 1 deletion build/local_install.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ local({r <- getOption("repos")

library(devtools)

install.packages(c("covidcast","data.table","vroom","dplyr"), quiet=TRUE, dependencies = TRUE)
install.packages(c("covidcast","data.table","vroom","dplyr"), quiet=TRUE)
# devtools::install_github("hrbrmstr/cdcfluview")

# To run if operating in the container -----
Expand Down
1 change: 0 additions & 1 deletion datasetup/build_US_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop
# tidycensus::census_api_key(key = census_key)



filterUSPS <- c("WY","VT","DC","AK","ND","SD","DE","MT","RI","ME","NH","HI","ID","WV","NE","NM",
"KS","NV","MS","AR","UT","IA","CT","OK","OR","KY","LA","AL","SC","MN","CO","WI",
"MD","MO","IN","TN","MA","AZ","WA","VA","NJ","MI","NC","GA","OH","IL","PA","NY","FL","TX","CA")
Expand Down
1 change: 0 additions & 1 deletion flepimop/R_packages/flepicommon/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ export(fix_negative_counts_single_subpop)
export(get_CSSE_US_data)
export(get_CSSE_US_matchGlobal_data)
export(get_CSSE_global_data)
export(get_USAFacts_data)
export(get_covidcast_data)
export(get_groundtruth_from_source)
export(load_config)
Expand Down
58 changes: 0 additions & 58 deletions flepimop/R_packages/flepicommon/R/DataUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -426,64 +426,6 @@ aggregate_counties_to_state <- function(df, state_fips){
}


##'
##' Pull case and death count data from USAFacts
##'
##' Pulls the USAFacts cumulative case count and death data. Calculates incident counts.
##' USAFacts does not include data for all the territories (aka island areas). These data are pulled from NYTimes.
##'
##' Returned data preview:
##' tibble [352,466 × 7] (S3: grouped_df/tbl_df/tbl/data.frame)
##' $ FIPS : chr [1:352466] "00001" "00001" "00001" "00001" ...
##' $ source : chr [1:352466] "NY" "NY" "NY" "NY" ...
##' $ Update : Date[1:352466], format: "2020-01-22" "2020-01-23" ...
##' $ Confirmed : num [1:352466] 0 0 0 0 0 0 0 0 0 0 ...
##' $ Deaths : num [1:352466] 0 0 0 0 0 0 0 0 0 0 ...
##' $ incidI : num [1:352466] 0 0 0 0 0 0 0 0 0 0 ...
##' $ incidDeath : num [1:352466] 0 0 0 0 0 0 0 0 0 0 ...
##'
##' @param case_data_filename Filename where case data are stored
##' @param death_data_filename Filename where death data are stored
##' @param incl_unassigned Includes data unassigned to counties (default is FALSE)
##' @return the case and deaths data frame
##'
##'
##' @export
##'
get_USAFacts_data <- function(case_data_filename = "data/case_data/USAFacts_case_data.csv",
death_data_filename = "data/case_data/USAFacts_death_data.csv",
incl_unassigned = FALSE){

USAFACTS_CASE_DATA_URL <- "https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_confirmed_usafacts.csv"
USAFACTS_DEATH_DATA_URL <- "https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv"
usafacts_case <- download_USAFacts_data(case_data_filename, USAFACTS_CASE_DATA_URL, "Confirmed", incl_unassigned)
usafacts_death <- download_USAFacts_data(death_data_filename, USAFACTS_DEATH_DATA_URL, "Deaths", incl_unassigned)

usafacts_data <- dplyr::full_join(usafacts_case, usafacts_death)
usafacts_data <- dplyr::select(usafacts_data, Update, source, FIPS, Confirmed, Deaths)
usafacts_data <- rbind(usafacts_data, get_islandareas_data()) # Append island areas
usafacts_data <- dplyr::arrange(usafacts_data, source, FIPS, Update)

# Create columns incidI and incidDeath
usafacts_data <- dplyr::group_modify(
dplyr::group_by(
usafacts_data,
FIPS
),
function(.x,.y){
.x$incidI = c(.x$Confirmed[1],diff(.x$Confirmed))
.x$incidDeath = c(.x$Deaths[1],diff(.x$Deaths,))
return(.x)
}
)

# Fix incidence counts that go negative and NA values or missing dates
usafacts_data <- fix_negative_counts(usafacts_data, "Confirmed", "incidI")
usafacts_data <- fix_negative_counts(usafacts_data, "Deaths", "incidDeath")

return(usafacts_data)
}



##'
Expand Down
30 changes: 30 additions & 0 deletions flepimop/R_packages/flepiconfig/R/process_npi_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,36 @@ NULL



load_geodata_file <- function(filename,
geoid_len = 0,
geoid_pad = "0",
state_name = TRUE) {

if(!file.exists(filename)){stop(paste(filename,"does not exist in",getwd()))}
geodata <- readr::read_csv(filename) %>%
dplyr::mutate(geoid = as.character(geoid))

if (!("geoid" %in% names(geodata))) {
stop(paste(filename, "does not have a column named geoid"))
}

if (geoid_len > 0) {
geodata$geoid <- stringr::str_pad(geodata$geoid, geoid_len, pad = geoid_pad)
}

if(state_name) {
utils::data(fips_us_county, package = "flepicommon") # arrow::read_parquet("datasetup/usdata/fips_us_county.parquet")
geodata <- fips_us_county %>%
dplyr::distinct(state, state_name) %>%
dplyr::rename(USPS = state) %>%
dplyr::rename(state = state_name) %>%
dplyr::mutate(state = dplyr::recode(state, "U.S. Virgin Islands" = "Virgin Islands")) %>%
dplyr::right_join(geodata)
}

return(geodata)
}

##' find_truncnorm_mean_parameter
##'
##' Convenience function that estimates the mean value for a truncnorm distribution given a, b, and sd that will have the expected value of the input mean.
Expand Down
1 change: 1 addition & 0 deletions flepimop/main_scripts/create_seeding_added.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ library(purrr)

option_list <- list(
optparse::make_option(c("-c", "--config"), action = "store", default = Sys.getenv("CONFIG_PATH"), type = "character", help = "path to the config file"),
optparse::make_option(c("-p", "--flepi_path"), action="store", type='character', help="path to the flepiMoP directory", default = Sys.getenv("FLEPI_PATH", "flepiMoP/")),
optparse::make_option(c("-k", "--keep_all_seeding"), action="store",default=TRUE,type='logical',help="Whether to filter away seeding prior to the start date of the simulation.")
)
opt <- optparse::parse_args(optparse::OptionParser(option_list = option_list))
Expand Down
48 changes: 46 additions & 2 deletions flepimop/main_scripts/inference_slot.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ option_list = list(
optparse::make_option(c("-H","--save_hosp"), action = "store", default = Sys.getenv("SAVE_HOSP", TRUE), type = 'logical', help = 'Should the HOSP output files be saved for each iteration'),
optparse::make_option(c("-M", "--memory_profiling"), action = "store", default = Sys.getenv("FLEPI_MEM_PROFILE", FALSE), type = 'logical', help = 'Should the memory profiling be run during iterations'),
optparse::make_option(c("-P", "--memory_profiling_iters"), action = "store", default = Sys.getenv("FLEPI_MEM_PROF_ITERS", 100), type = 'integer', help = 'If doing memory profiling, after every X iterations run the profiler'),
optparse::make_option(c("-g", "--subpop_len"), action="store", default=Sys.getenv("SUBPOP_LENGTH", 5), type='integer', help = "number of digits in subpop")
optparse::make_option(c("-g", "--subpop_len"), action="store", default=Sys.getenv("SUBPOP_LENGTH", 5), type='integer', help = "number of digits in subpop"),
optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", TRUE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.')
)

parser=optparse::OptionParser(option_list=option_list)
Expand Down Expand Up @@ -90,6 +91,17 @@ if (opt$config == ""){
}
config = flepicommon::load_config(opt$config)

if (!is.null(config$inference$incl_aggr_likelihood)){
print("Using config option for `incl_aggr_likelihood`.")
opt$incl_aggr_likelihood <- config$inference$incl_aggr_likelihood
if (!is.null(config$inference$total_ll_multiplier)){
print("Using config option for `total_ll_multiplier`.")
opt$total_ll_multiplier <- config$inference$total_ll_multiplier
} else {
opt$total_ll_multiplier <- 1
}
}

## Check for errors in config ---------------------------------------------------------------------

## seeding section
Expand Down Expand Up @@ -262,6 +274,20 @@ if (config$inference$do_inference){
dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) %>%
dplyr::mutate_if(is.numeric, dplyr::coalesce, 0)


# add aggregate groundtruth to the obs data for the likelihood calc
if (opt$incl_aggr_likelihood){
obs <- obs %>%
dplyr::bind_rows(
obs %>%
dplyr::select(date, where(is.numeric)) %>%
dplyr::group_by(date) %>%
dplyr::summarise(across(everything(), sum)) %>% # no likelihood is calculated for time periods with missing data for any subpop
dplyr::mutate(source = "Total",
subpop = "Total")
)
}

subpopnames <- unique(obs[[obs_subpop]])


Expand Down Expand Up @@ -605,7 +631,19 @@ for(seir_modifiers_scenario in seir_modifiers_scenarios) {
# run
if (config$inference$do_inference){
sim_hosp <- flepicommon::read_file_of_type(gsub(".*[.]","",this_global_files[['hosp_filename']]))(this_global_files[['hosp_filename']]) %>%
dplyr::filter(time >= min(obs$date),time <= max(obs$date))
dplyr::filter(time >= min(obs$date), time <= max(obs$date))

# add aggregate groundtruth to the obs data for the likelihood calc
if (opt$incl_aggr_likelihood){
sim_hosp <- sim_hosp %>%
dplyr::bind_rows(
sim_hosp %>%
dplyr::select(-tidyselect::all_of(obs_subpop), -tidyselect::starts_with("date")) %>%
dplyr::group_by(time) %>%
dplyr::summarise(dplyr::across(tidyselect::everything(), sum)) %>% # no likelihood is calculated for time periods with missing data for any subpop
dplyr::mutate(!!obs_subpop := "Total")
)
}

lhs <- unique(sim_hosp[[obs_subpop]])
rhs <- unique(names(data_stats))
Expand Down Expand Up @@ -641,6 +679,12 @@ for(seir_modifiers_scenario in seir_modifiers_scenarios) {

rm(sim_hosp)

# multiply aggregate likelihood by a factor if specified in config
if (opt$incl_aggr_likelihood){
proposed_likelihood_data$ll[proposed_likelihood_data$subpop == "Total"] <- proposed_likelihood_data$ll[proposed_likelihood_data$subpop == "Total"] * opt$total_ll_multiplier
}


# write proposed likelihood to global file
arrow::write_parquet(proposed_likelihood_data, this_global_files[['llik_filename']])

Expand Down
Loading

0 comments on commit c771178

Please sign in to comment.