vignettes/articles/sbc.Rmd
sbc.RmdThis document is one of a series of simulation-based calibration
exercieses for models available in R package flocker. Here,
our goal is to validate flocker’s data formatting,
decoding, and likelihood implementations, and not brms’s
construction of the linear predictors.
The encoding of the data for a flocker model tends to be
more complex in the presence of missing observations, and so we include
missingness in the data simulation wherever possible (some visits
missing in all models, some time-steps missing in multiseason
models).
In all models, we include one unit covariate that affects detection and occupancy, colonization, extinction and/or autologistic terms as applicable, and one event covariate that affects detection only (for all models except the rep-constant).
In addition to this article on single-season models, articles are available showing SBC for multi-season models and data-augmented multi-species models.
# make the stancode
model_name <- paste0(tempdir(), "/sbc_rep_constant_model.stan")
fd <- simulate_flocker_data(
n_pt = params$n_sites, n_sp = 1,
params = list(
coefs = data.frame(
det_intercept = rnorm(1),
det_slope_unit = rnorm(1),
occ_intercept = rnorm(1),
occ_slope_unit = rnorm(1)
)
),
seed = NULL,
rep_constant = TRUE,
ragged_rep = TRUE
)
flocker_data = make_flocker_data(fd$obs, fd$unit_covs, quiet = TRUE)
scode <- flocker_stancode(
f_occ = ~ 0 + Intercept + uc1,
f_det = ~ 0 + Intercept + uc1,
flocker_data = flocker_data,
prior =
brms::set_prior("std_normal()") +
brms::set_prior("std_normal()", dpar = "occ"),
backend = "cmdstanr"
)
writeLines(scode, model_name)
rep_constant_generator <- function(N){
fd <- simulate_flocker_data(
n_pt = N, n_sp = 1,
params = list(
coefs = data.frame(
det_intercept = rnorm(1),
det_slope_unit = rnorm(1),
occ_intercept = rnorm(1),
occ_slope_unit = rnorm(1)
)
),
seed = NULL,
rep_constant = TRUE,
ragged_rep = TRUE
)
flocker_data = make_flocker_data(fd$obs, fd$unit_covs, quiet = TRUE)
# format for return
list(
variables = list(
`b[1]` = fd$params$coefs$det_intercept,
`b[2]` = fd$params$coefs$det_slope_unit,
`b_occ[1]` = fd$params$coefs$occ_intercept,
`b_occ[2]` = fd$params$coefs$occ_slope_unit
),
generated = flocker_standata(
f_occ = ~ 0 + Intercept + uc1,
f_det = ~ 0 + Intercept + uc1,
flocker_data = flocker_data
)
)
}
rep_constant_gen <- SBC_generator_function(
rep_constant_generator,
N = params$n_sites
)
rep_constant_dataset <- suppressMessages(
generate_datasets(rep_constant_gen, params$n_sims)
)
rep_constant_backend <-
SBC_backend_cmdstan_sample(
cmdstanr::cmdstan_model(
paste0(tempdir(), "/sbc_rep_constant_model.stan")
)
)
rep_constant_results <- compute_SBC(rep_constant_dataset, rep_constant_backend)
plot_ecdf(rep_constant_results)
plot of chunk rep-constant
plot_rank_hist(rep_constant_results)
plot of chunk rep-constant
plot_ecdf_diff(rep_constant_results)
plot of chunk rep-constant
# make the stancode
model_name <- paste0(tempdir(), "/sbc_rep_varying_model.stan")
fd <- simulate_flocker_data(
n_pt = params$n_sites, n_sp = 1,
params = list(
coefs = data.frame(
det_intercept = rnorm(1),
det_slope_unit = rnorm(1),
det_slope_visit = rnorm(1),
occ_intercept = rnorm(1),
occ_slope_unit = rnorm(1)
)
),
seed = NULL,
rep_constant = FALSE,
ragged_rep = TRUE
)
flocker_data = make_flocker_data(fd$obs, fd$unit_covs, fd$event_covs, quiet = TRUE)
scode <- flocker_stancode(
f_occ = ~ 0 + Intercept + uc1,
f_det = ~ 0 + Intercept + uc1 + ec1,
flocker_data = flocker_data,
prior =
brms::set_prior("std_normal()") +
brms::set_prior("std_normal()", dpar = "occ"),
backend = "cmdstanr"
)
writeLines(scode, model_name)
rep_varying_generator <- function(N){
fd <- simulate_flocker_data(
n_pt = N, n_sp = 1,
params = list(
coefs = data.frame(
det_intercept = rnorm(1),
det_slope_unit = rnorm(1),
det_slope_visit = rnorm(1),
occ_intercept = rnorm(1),
occ_slope_unit = rnorm(1)
)
),
seed = NULL,
rep_constant = FALSE,
ragged_rep = TRUE
)
flocker_data = make_flocker_data(fd$obs, fd$unit_covs, fd$event_covs, quiet = TRUE)
# format for return
list(
variables = list(
`b[1]` = fd$params$coefs$det_intercept,
`b[2]` = fd$params$coefs$det_slope_unit,
`b[3]` = fd$params$coefs$det_slope_visit,
`b_occ[1]` = fd$params$coefs$occ_intercept,
`b_occ[2]` = fd$params$coefs$occ_slope_unit
),
generated = flocker_standata(
f_occ = ~ 0 + Intercept + uc1,
f_det = ~ 0 + Intercept + uc1 + ec1,
flocker_data = flocker_data
)
)
}
rep_varying_gen <- SBC_generator_function(
rep_varying_generator,
N = params$n_sites
)
rep_varying_dataset <- suppressMessages(
generate_datasets(rep_varying_gen, params$n_sims)
)
rep_varying_backend <-
SBC_backend_cmdstan_sample(
cmdstanr::cmdstan_model(
paste0(tempdir(), "/sbc_rep_varying_model.stan")
)
)
rep_varying_results <- compute_SBC(rep_varying_dataset, rep_varying_backend)## - 1 (0%) fits had maximum Rhat > 1.01. Maximum Rhat was 1.01.
## Not all diagnostics are OK.
## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
plot_ecdf(rep_varying_results)
plot of chunk rep-varying
plot_rank_hist(rep_varying_results)
plot of chunk rep-varying
plot_ecdf_diff(rep_varying_results)
plot of chunk rep-varying