Model Diagnostics and Comparison

Overview

vitalBayes provides tools for assessing model fit and comparing alternative specifications. This vignette covers convergence diagnostics, posterior predictive checks, and leave-one-out cross-validation (LOO-CV).

Convergence Diagnostics

Before interpreting results, verify that MCMC chains have converged.

Built-in cmdstanr Diagnostics

library(vitalBayes)
library(data.table)

# Load simulated data
data(growth_data)

# Prepare growth data
gdata <- growth_data[embryo == FALSE & !is.na(age)]

# Fit a model
growth_fit <- fit_bayesian_growth(
 lt = "fl",
 age = "age",
 sex = "sex",
 data = gdata,
 model = "vb"
)

# Check for sampling issues
growth_fit$diagnostic_summary()

# Key metrics for all parameters
growth_fit$summary(c("Linf", "L0", "k", "sigma"))

What to Look For

Metric	Good Values	Concern
`rhat`	< 1.01	> 1.05 suggests non-convergence
`ess_bulk`	> 400	< 100 suggests poor mixing
`ess_tail`	> 400	Low values affect CI reliability
Divergences	0	Any divergences indicate geometry issues
Max treedepth	< max	Hitting max suggests difficult posterior

Addressing Problems

# Increase adapt_delta for divergences
growth_fit <- fit_bayesian_growth(
 lt = "fl", age = "age", data = gdata,
 adapt_delta = 0.99  # Default is 0.8-0.95
)

# Increase iterations for low ESS
growth_fit <- fit_bayesian_growth(
 lt = "fl", age = "age", data = gdata,
 iter_warmup = 2000,
 iter_sampling = 2000
)

# Use more informative priors if parameters are poorly identified
growth_fit <- fit_bayesian_growth(
 lt = "fl", age = "age", data = gdata,
 CV_Linf = 0.15,  # Tighter prior
 CV_k = 0.30      # Tighter prior
)

Posterior Predictive Checks

Do model predictions match observed data patterns?

Using ppc_summary()

# First fit all required models
birth_fit <- fit_bayesian_birth(
 embryo_lts = growth_data[embryo == TRUE, fl],
 free_swimming_lts = growth_data[embryo == FALSE, fl]
)

mat_data <- growth_data[embryo == FALSE & !is.na(mat)]

L50_fit <- fit_bayesian_maturity(
 maturity = "mat", lt = "fl", sex = "sex", data = mat_data
)

t50_fit <- fit_bayesian_maturity(
 maturity = "mat", age = "age", sex = "sex", 
 data = mat_data[!is.na(age)]
)

growth_vb <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex", data = gdata,
 model = "vb", k_based = FALSE, L50_fit = L50_fit, t50_fit = t50_fit
)

growth_gomp <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex", data = gdata,
 model = "gompertz", k_based = FALSE, L50_fit = L50_fit, t50_fit = t50_fit
)

# Comprehensive PPC for entire workflow
ppc <- ppc_summary(
 birth_fit = birth_fit,
 L50_fit = L50_fit,
 t50_fit = t50_fit,
 growth_fits = list(vb = growth_vb, gomp = growth_gomp),
 data = growth_data,
 embryo_col = "embryo"
)

# View summary
print(ppc)
summary(ppc)

Built-in Generated Quantities

Each model computes PPC metrics internally:

# Birth model
birth_fit$summary(c(
 "mean_p_embryo",      # Should be low (~0.1-0.3)
 "mean_p_freeswim",    # Should be high (~0.7-0.9)
 "prop_correct_rep"   # Classification accuracy
))

# Maturity model
L50_fit$summary(c(
 "prop_correct_rep",
 "mean_p_mature_f", "mean_p_mature_m",
 "mean_p_immature_f", "mean_p_immature_m"
))

# Growth model
growth_fit$summary(c(
 "rmse",              # Root mean squared error
 "mae",               # Mean absolute error
 "prop_in_95ci"       # Coverage (should be ~0.95)
))

Residual Diagnostics

# Visual residual checks
plot_residuals(
 fit = growth_fit,
 data = gdata,
 age_col = "age",
 length_col = "fl",
 type = "all"
)

# Look for:
# - No trend in residuals vs fitted
# - Approximately normal Q-Q plot
# - No pattern in residuals vs age
# - Symmetric residual histogram

Model Comparison with LOO-CV

What is LOO-CV?

Leave-one-out cross-validation estimates predictive accuracy: how well does the model predict each observation when that observation is left out of fitting?

\[\text{elpd}_{loo} = \sum_{i=1}^{n} \log p(y_i | y_{-i})\]

Higher (less negative) is better. vitalBayes uses Pareto-smoothed importance sampling (PSIS-LOO) for efficient computation.

Computing LOO

# Fit additional models for comparison
growth_logis <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex", data = gdata,
 model = "logistic", k_based = FALSE, L50_fit = L50_fit, t50_fit = t50_fit
)

# Compute LOO for each model
loo_vb <- compute_loo(growth_vb)
loo_gomp <- compute_loo(growth_gomp)
loo_logis <- compute_loo(growth_logis)

# Check Pareto k diagnostics
print(loo_vb)

# k values interpretation:
# k < 0.5: Good
# 0.5 < k < 0.7: OK
# k > 0.7: Problematic (influential observation)

Comparing Models

# Compare multiple models
compare_loo(
 "von Bertalanffy" = loo_vb,
 "Gompertz" = loo_gomp,
 "Logistic" = loo_logis
)

# Output interpretation:
# - elpd_diff: Difference from best model (0 for best)
# - se_diff: Standard error of the difference
# - Rule of thumb: |elpd_diff| > 2*se_diff suggests meaningful difference

Creating Summary Tables

# Publication-ready table
loo_table <- create_loo_table(
 "von Bertalanffy" = loo_vb,
 "Gompertz" = loo_gomp,
 "Logistic" = loo_logis
)

print(loo_table)

Comparing Pooling Strategies

For two-sex models, compare partial pooling vs no pooling to assess whether hierarchical structure improves estimation:

# Use the imbalanced dataset where pooling matters most
data(imbalanced_data)
gdata_imbal <- imbalanced_data[embryo == FALSE & !is.na(age)]

# Fit both versions
growth_pooled <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex",
 data = gdata_imbal,
 use_pooling = TRUE
)

growth_unpooled <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex",
 data = gdata_imbal,
 use_pooling = FALSE
)

# Compare
compare_pooling(
 pooled = growth_pooled,
 unpooled = growth_unpooled,
 params = c("Linf", "k")
)

# Typically expect:
# - Similar point estimates
# - Narrower CIs with pooling (especially for sparse sex)
# - Better LOO for pooled when sexes are similar

For a comprehensive treatment of when and why to use partial pooling, including the underlying mathematics and practical guidance, see vignette("partial_pooling").

Creating Parameter Tables

Single Model

# Extract formatted parameter estimates
growth_fit$summary(c("Linf", "L0", "k", "sigma"))

Multiple Models

# Comprehensive table across workflow stages
param_table <- create_parameter_table(
 birth = birth_fit,
 L50 = L50_fit,
 t50 = t50_fit,
 growth = growth_fit,
 ci_width = 0.95,    # 95% credible intervals
 digits = 2
)

# Export for publication
data.table::fwrite(param_table, "Table1_parameters.csv")

Complete Diagnostic Workflow

# ---- Load data ----
data(growth_data)
gdata <- growth_data[embryo == FALSE & !is.na(age)]

# ---- 1. Fit Models ----
growth_vb <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex",
 data = gdata, model = "vb"
)

growth_gomp <- fit_bayesian_growth(
 lt = "fl", age = "age", sex = "sex",
 data = gdata, model = "gompertz"
)

# ---- 2. Check Convergence ----
growth_vb$diagnostic_summary()
growth_vb$summary()[, c("variable", "rhat", "ess_bulk")]

# ---- 3. Posterior Predictive Checks ----
growth_vb$summary(c("rmse", "mae", "prop_in_95ci"))
plot_residuals(growth_vb, gdata, type = "all")

# ---- 4. Model Comparison ----
loo_vb <- compute_loo(growth_vb)
loo_gomp <- compute_loo(growth_gomp)

comparison <- compare_loo(
 "von Bertalanffy" = loo_vb,
 "Gompertz" = loo_gomp
)

print(comparison)

# ---- 5. Select Best Model ----
best_model <- if (comparison$elpd_diff[1] == 0) growth_vb else growth_gomp

# ---- 6. Final Summary ----
best_model$summary(c("Linf", "L0", "k", "sigma"))

Interpreting LOO Results

Scenario	elpd_diff	se_diff	Interpretation
Clear winner	-10	3	Model 1 clearly better
Unclear	-2	3	Models essentially equivalent
Close call	-5	2	Slight preference for Model 1

When models are equivalent by LOO, prefer:

The simpler model (fewer parameters)
The more biologically interpretable model
The model with better residual diagnostics