1 Preparations

Load the necessary libraries

library(tidyverse) # for data wrangling etc
library(rstanarm) # for fitting models in STAN
library(cmdstanr) # for cmdstan
library(brms) # for fitting models in STAN
library(standist) # for exploring distributions
library(HDInterval) # for HPD intervals
library(posterior) # for posterior draws
library(coda) # for diagnostics
library(bayesplot) # for diagnostics
library(ggmcmc) # for MCMC diagnostics
library(rstan) # for interfacing with STAN
library(emmeans) # for marginal means etc
library(broom) # for tidying outputs
library(DHARMa) # for residual diagnostics
library(tidybayes) # for more tidying outputs
library(ggeffects) # for partial plots
library(broom.mixed) # for tidying MCMC outputs
library(patchwork) # for multiple plots
library(ggridges) # for ridge plots
library(bayestestR) # for ROPE
library(see) # for some plots
library(easystats) # framework for stats, modelling and visualisation
library(modelsummary)
source("helperFunctions.R")

2 Scenario

**Figure** 2: Sampling design for the starling data set

Table 1: Format of starling_full.csv data file

SITUATION	MONTH	MASS	BIRD
tree	Nov	78	tree1
..	..	..	..
nest-box	Nov	78	nest-box1
..	..	..	..
inside	Nov	79	inside1
..	..	..	..
other	Nov	77	other1
..	..	..	..
tree	Jan	85	tree1
..	..	..	..

Table 2: Description of the variables in the starling_full data file

SITUATION	Categorical listing of roosting situations (tree, nest-box, inside or other)
MONTH	Categorical listing of the month of sampling.
MASS	Mass (g) of starlings.
BIRD	Categorical listing of individual bird repeatedly sampled.

3 Read in the data

starling <- read_csv("../data/starling_full.csv", trim_ws = TRUE)

Rows: 80 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): MONTH, SITUATION, BIRD
dbl (2): subjectnum, MASS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(starling)

Rows: 80
Columns: 5
$ MONTH      <chr> "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "No…
$ SITUATION  <chr> "tree", "tree", "tree", "tree", "tree", "tree", "tree", "tr…
$ subjectnum <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1…
$ BIRD       <chr> "tree1", "tree2", "tree3", "tree4", "tree5", "tree6", "tree…
$ MASS       <dbl> 78, 88, 87, 88, 83, 82, 81, 80, 80, 89, 78, 78, 85, 81, 78,…

## Explore the first 6 rows of the data
head(starling)

# A tibble: 6 × 5
  MONTH SITUATION subjectnum BIRD   MASS
  <chr> <chr>          <dbl> <chr> <dbl>
1 Nov   tree               1 tree1    78
2 Nov   tree               2 tree2    88
3 Nov   tree               3 tree3    87
4 Nov   tree               4 tree4    88
5 Nov   tree               5 tree5    83
6 Nov   tree               6 tree6    82

str(starling)

spc_tbl_ [80 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ MONTH     : chr [1:80] "Nov" "Nov" "Nov" "Nov" ...
 $ SITUATION : chr [1:80] "tree" "tree" "tree" "tree" ...
 $ subjectnum: num [1:80] 1 2 3 4 5 6 7 8 9 10 ...
 $ BIRD      : chr [1:80] "tree1" "tree2" "tree3" "tree4" ...
 $ MASS      : num [1:80] 78 88 87 88 83 82 81 80 80 89 ...
 - attr(*, "spec")=
  .. cols(
  ..   MONTH = col_character(),
  ..   SITUATION = col_character(),
  ..   subjectnum = col_double(),
  ..   BIRD = col_character(),
  ..   MASS = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

starling |> datawizard::data_codebook()

starling (80 rows and 5 variables, 5 shown)

ID | Name       | Type      | Missings |    Values |          N
---+------------+-----------+----------+-----------+-----------
1  | MONTH      | character | 0 (0.0%) |       Jan | 40 (50.0%)
   |            |           |          |       Nov | 40 (50.0%)
---+------------+-----------+----------+-----------+-----------
2  | SITUATION  | character | 0 (0.0%) |    inside | 20 (25.0%)
   |            |           |          |  nest-box | 20 (25.0%)
   |            |           |          |     other | 20 (25.0%)
   |            |           |          |      tree | 20 (25.0%)
---+------------+-----------+----------+-----------+-----------
3  | subjectnum | numeric   | 0 (0.0%) |   [1, 10] |         80
---+------------+-----------+----------+-----------+-----------
4  | BIRD       | character | 0 (0.0%) |   inside1 |  2 ( 2.5%)
   |            |           |          |  inside10 |  2 ( 2.5%)
   |            |           |          |   inside2 |  2 ( 2.5%)
   |            |           |          |   inside3 |  2 ( 2.5%)
   |            |           |          |   inside4 |  2 ( 2.5%)
   |            |           |          |   inside5 |  2 ( 2.5%)
   |            |           |          |   inside6 |  2 ( 2.5%)
   |            |           |          |   inside7 |  2 ( 2.5%)
   |            |           |          |   inside8 |  2 ( 2.5%)
   |            |           |          |   inside9 |  2 ( 2.5%)
   |            |           |          |     (...) |           
---+------------+-----------+----------+-----------+-----------
5  | MASS       | numeric   | 0 (0.0%) | [68, 100] |         80
---------------------------------------------------------------

starling |> modelsummary::datasummary_skim()

	Unique	Missing Pct.	Mean	SD	Min	Median	Max
subjectnum	10	0	5.5	2.9	1.0	5.5	10.0
MASS	28	0	83.8	6.7	68.0	84.0	100.0
		N	%
MONTH	Jan	40	50.0
	Nov	40	50.0
SITUATION	inside	20	25.0
	nest-box	20	25.0
	other	20	25.0
	tree	20	25.0
BIRD	inside1	2	2.5
	inside10	2	2.5
	inside2	2	2.5
	inside3	2	2.5
	inside4	2	2.5
	inside5	2	2.5
	inside6	2	2.5
	inside7	2	2.5
	inside8	2	2.5
	inside9	2	2.5
	nest-box1	2	2.5
	nest-box10	2	2.5
	nest-box2	2	2.5
	nest-box3	2	2.5
	nest-box4	2	2.5
	nest-box5	2	2.5
	nest-box6	2	2.5
	nest-box7	2	2.5
	nest-box8	2	2.5
	nest-box9	2	2.5
	other1	2	2.5
	other10	2	2.5
	other2	2	2.5
	other3	2	2.5
	other4	2	2.5
	other5	2	2.5
	other6	2	2.5
	other7	2	2.5
	other8	2	2.5
	other9	2	2.5
	tree1	2	2.5
	tree10	2	2.5
	tree2	2	2.5
	tree3	2	2.5
	tree4	2	2.5
	tree5	2	2.5
	tree6	2	2.5
	tree7	2	2.5
	tree8	2	2.5
	tree9	2	2.5

starling |> modelsummary::datasummary_skim(by = c("SITUATION", "MONTH"))

	SITUATION	MONTH	Unique	Missing Pct.	Mean	SD	Min	Median	Max
subjectnum	tree	Nov	10	0	5.5	3.0	1.0	5.5	10.0
	nest-box	Nov	10	0	5.5	3.0	1.0	5.5	10.0
	inside	Nov	10	0	5.5	3.0	1.0	5.5	10.0
	other	Nov	10	0	5.5	3.0	1.0	5.5	10.0
	tree	Jan	10	0	5.5	3.0	1.0	5.5	10.0
	nest-box	Jan	10	0	5.5	3.0	1.0	5.5	10.0
	inside	Jan	10	0	5.5	3.0	1.0	5.5	10.0
	other	Jan	10	0	5.5	3.0	1.0	5.5	10.0
MASS	tree	Nov	8	0	83.6	4.0	78.0	82.5	89.0
	nest-box	Nov	6	0	79.4	3.2	74.0	79.5	85.0
	inside	Nov	8	0	78.6	3.3	73.0	78.5	84.0
	other	Nov	8	0	75.4	4.5	68.0	75.0	84.0
	tree	Jan	9	0	90.8	5.5	85.0	88.5	100.0
	nest-box	Jan	8	0	90.2	4.4	84.0	89.5	96.0
	inside	Jan	10	0	88.2	4.0	83.0	87.5	96.0
	other	Jan	9	0	84.2	4.3	77.0	84.5	90.0
		N	%
MONTH	Jan	40	50.0
	Nov	40	50.0
SITUATION	inside	20	25.0
	nest-box	20	25.0
	other	20	25.0
	tree	20	25.0
BIRD	inside1	2	2.5
	inside10	2	2.5
	inside2	2	2.5
	inside3	2	2.5
	inside4	2	2.5
	inside5	2	2.5
	inside6	2	2.5
	inside7	2	2.5
	inside8	2	2.5
	inside9	2	2.5
	nest-box1	2	2.5
	nest-box10	2	2.5
	nest-box2	2	2.5
	nest-box3	2	2.5
	nest-box4	2	2.5
	nest-box5	2	2.5
	nest-box6	2	2.5
	nest-box7	2	2.5
	nest-box8	2	2.5
	nest-box9	2	2.5
	other1	2	2.5
	other10	2	2.5
	other2	2	2.5
	other3	2	2.5
	other4	2	2.5
	other5	2	2.5
	other6	2	2.5
	other7	2	2.5
	other8	2	2.5
	other9	2	2.5
	tree1	2	2.5
	tree10	2	2.5
	tree2	2	2.5
	tree3	2	2.5
	tree4	2	2.5
	tree5	2	2.5
	tree6	2	2.5
	tree7	2	2.5
	tree8	2	2.5
	tree9	2	2.5

4 Exploratory data analysis

Random intercepts model formula: \[ \begin{align} y_i &\sim{} \mathcal{N}(\mu_i, \sigma^2)\\ \mu_i &= \beta_0 + \boldsymbol{\beta} \bf{X_i} + \boldsymbol{\gamma} \bf{Z_i}\\ \boldsymbol{\gamma} &= \gamma_0\\ \beta_0 &\sim{} \mathcal{N}(80, 2.5)\\ \beta &\sim{} \mathcal{N}(0, 10)\\ \gamma_0 &\sim{} \mathcal{N}(0, \sigma_1^2)\\ \sigma &\sim{} \mathcal{t}(3, 0, 6)\\ \sigma_1 &\sim{} \mathcal{t}(3, 0, 6)\\ \end{align} \]

Random intercept/slope model formula: \[ \begin{align} y_{i,j} &\sim{} \mathcal{N}(\mu_{i,j}, \sigma^2)\\ \mu_{i,j} &=\beta_0 + \bf{Z_j}\boldsymbol{\gamma_j} + \bf{X_i}\boldsymbol{\beta} \\ \beta_0 &\sim{} \mathcal{N}(80, 2)\\ \beta_1 &\sim{} \mathcal{N}(0, 10)\\ \boldsymbol{\gamma_j} &\sim{} \mathcal{N}(0, \boldsymbol{\Sigma})\\ \boldsymbol{\Sigma} &= \boldsymbol{D}({\sigma_l})\boldsymbol{\Omega}\boldsymbol{D}({\sigma_l^2})\\ \boldsymbol{\Omega} &\sim{} LKJ(\zeta)\\ \sigma_l^2 &\sim{} \mathcal{t}(3,0,3)\\ \sigma^2 &\sim{} \mathcal{t}(3, 0, 3)\ \end{align} \]

$\bf{X}$ is the model matrix representing the effects of the situation and month on the mass of birds.
$\boldsymbol{\beta}$ is a vector of the population-level effects parameters to be estimated.
$\boldsymbol{\gamma}$ is a vector of the group-level effect parameters
$\bf{Z}$ represents a cell means model matrix for the random intercepts (and possibly random slopes) associated with indivual birds.
the population-level intercept ($\beta_0$) has a gaussian prior with location of 80 and scale of 2.5
the population-level effect ($\beta_1$) has a gaussian prior with location of 0 and scale of 10
the group-level effects are assumed to sum-to-zero and be drawn from a gaussian distribution with mean of 0 and covariance of $\Sigma$
$\boldsymbol{\Sigma}$ is the variance-covariance matrix between the groups (individual leaves). It turns out that it is difficult to apply a prior on this covariance matrix, so instead, the covariance matrix is decomposed into a correlation matrix ($\boldsymbol{\Omega}$) and a vector of variances ($\boldsymbol{\sigma_l^2}$) which are the diagonals ($\boldsymbol{D}$) of the covariance matrix.
$\boldsymbol{\Omega}$

\[ \begin{align} \gamma &\sim{} N(0,\Sigma)\\ \Sigma &-> \Omega, \tau\\ \end{align} \]

where $\Sigma$ is a covariance matrix.

starling <- starling |>
  mutate(
    BIRD = factor(BIRD),
    SITUATION = factor(SITUATION),
    MONTH = factor(MONTH, levels = c("Nov", "Jan"))
  )

ggplot(starling, aes(y = MASS, x = MONTH)) +
  geom_boxplot() +
  facet_grid(~SITUATION)

ggplot(starling, aes(y = MASS, x = SITUATION, fill = MONTH)) +
  geom_boxplot()

## Better still
ggplot(starling, aes(y = MASS, x = MONTH, group = BIRD)) +
  geom_point() +
  geom_line() +
  facet_grid(~SITUATION)

Conclusions:

it is clear that the Nov mass of each bird is different - so random intercepts
the degree to which they change between Nov and Dec is also relatively different per bird - perhaps random intercept/random slope

5 Fit the model

In rstanarm, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over-fitting) and help stabilise the computations.

starling_rstanarm <- stan_glmer(MASS ~ MONTH * SITUATION + (1 | BIRD),
  data = starling,
  family = gaussian(),
  iter = 5000,
  warmup = 2000,
  chains = 3,
  thin = 5,
  refresh = 0
)

starling_rstanarm %>% prior_summary()

Priors for model '.' 
------
Intercept (after predictors centered)
  Specified prior:
    ~ normal(location = 84, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 84, scale = 17)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [33.25,38.40,38.40,...])

Auxiliary (sigma)
  Specified prior:
    ~ exponential(rate = 1)
  Adjusted prior:
    ~ exponential(rate = 0.15)

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details

This tells us: - for the intercept (when the family is Gaussian), a normal prior with a mean of 84 and a standard deviation of 17 is used. The 2.5 is used for scaling all parameter standard deviations. The value of 84 is based on the mean of the response (mean(starling$MASS)) and the scaled standard deviation of 17 is based on multiplying the scaling factor by the standard deviation of the response.

2.5 * sd(starling$MASS)

[1] 16.73225

for the coefficients (in this case, the effects of MONTH, SEASON and their interactions), the default prior is a normal prior centred around 0 with a standard deviation of 2.5. This is then adjusted for the scale of the data by multiplying the 2.5 by the ratio of the standard deviation of the response by the standard deviation of the numerical dummy variables for the predictor (then rounded).

2.5 * sd(starling$MASS) / apply(model.matrix(~ MONTH * SITUATION, starling)[, -1], 2, sd)

                  MONTHJan          SITUATIONnest-box 
                  33.25470                   38.39922 
            SITUATIONother              SITUATIONtree 
                  38.39922                   38.39922 
MONTHJan:SITUATIONnest-box    MONTHJan:SITUATIONother 
                  50.27638                   50.27638 
    MONTHJan:SITUATIONtree 
                  50.27638

the auxiliary prior represents whatever additional prior is required for the nominated model. In the case of a Gaussian model, this will be $\sigma$, for negative binomial, it will be the reciprocal of dispersion, for gamma, it will be shape, etc . By default in rstanarm, this is a exponential with a rate of 1 which is then adjusted by division with the standard deviation of the response.

1 / sd(starling$MASS)

[1] 0.149412

Lets now run with priors only so that we can explore the range of values they allow in the posteriors.

starling_rstanarm1 <- update(starling_rstanarm, prior_PD = TRUE)

starling_rstanarm1 %>%
  ggpredict(~ MONTH * SITUATION) %>%
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Conclusions:

we see that the range of predictions is fairly wide and the predicted means could range from a small value to a relatively large value.

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucked out of thin air):

$\beta_0$: normal centred at 84 with a standard deviation of 17
- mean of 84: since mean(starling$MASS)
- sd of 17: since 2.5*sd(starling$MASS)
$\beta_1$: normal centred at 0 with a standard deviation of 33
- sd of 33: since 2.5*sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[,-1], 2, sd)
$\beta_{2-4}$: normal centred at 0 with a standard deviation of 39
- sd of 39: since 2.5*sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[,-1], 2, sd)
$\beta_{5-7}$: normal centred at 0 with a standard deviation of 50
- sd of 50: since 2.5*sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[,-1], 2, sd)
$\sigma$: exponential with rate of 0.15
- since 1 / sd(starling$MASS)
$\Sigma$: decov with:
- regularisation: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
- concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
- shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

I will also overlay the raw data for comparison.

starling_rstanarm2 <- stan_glmer(MASS ~ MONTH * SITUATION + (1 | BIRD),
  data = starling,
  family = gaussian(),
  prior_intercept = normal(84, 17, autoscale = FALSE),
  prior = normal(0, c(33, 39, 39, 39, 50, 50, 50), autoscale = FALSE),
  prior_aux = rstanarm::exponential(0.15, autoscale = FALSE),
  prior_covariance = decov(1, 1, 1, 1),
  prior_PD = TRUE,
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0
)

starling_rstanarm2 %>%
  ggpredict(~ SITUATION * MONTH) %>%
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Now lets refit, conditioning on the data.

starling_rstanarm3 <- update(starling_rstanarm2, prior_PD = FALSE)

posterior_vs_prior(starling_rstanarm3,
  color_by = "vs", group_by = TRUE,
  facet_args = list(scales = "free_y")
)


Drawing from prior...

Conclusions:

in each case, the prior is substantially wider than the posterior, suggesting that the posterior is not biased towards the prior.

ggemmeans(starling_rstanarm3, ~ SITUATION * MONTH) %>% plot(show_data = TRUE)

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

ggpredict(starling_rstanarm3, ~ SITUATION * MONTH) %>% plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

In brms, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over fitting) and help stabilise the computations.

Unlike rstanarm, brms models must be compiled before they start sampling. For most models, the compilation of the stan code takes around 45 seconds.

starling_form <- bf(MASS ~ MONTH*SITUATION+(MONTH + MONTH:SITUATION|BIRD),
  family = gaussian()
)
options(width=150)
starling_form |> get_prior(data = starling)

                 prior     class                       coef group resp dpar nlpar lb ub       source
                (flat)         b                                                             default
                (flat)         b                   MONTHJan                             (vectorized)
                (flat)         b MONTHJan:SITUATIONnestMbox                             (vectorized)
                (flat)         b    MONTHJan:SITUATIONother                             (vectorized)
                (flat)         b     MONTHJan:SITUATIONtree                             (vectorized)
                (flat)         b          SITUATIONnestMbox                             (vectorized)
                (flat)         b             SITUATIONother                             (vectorized)
                (flat)         b              SITUATIONtree                             (vectorized)
                lkj(1)       cor                                                             default
                lkj(1)       cor                             BIRD                       (vectorized)
 student_t(3, 84, 5.9) Intercept                                                             default
  student_t(3, 0, 5.9)        sd                                                   0         default
  student_t(3, 0, 5.9)        sd                             BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd                  Intercept  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd                   MONTHJan  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd MONTHJan:SITUATIONnestMbox  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd    MONTHJan:SITUATIONother  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd     MONTHJan:SITUATIONtree  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd MONTHNov:SITUATIONnestMbox  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd    MONTHNov:SITUATIONother  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd     MONTHNov:SITUATIONtree  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)     sigma                                                   0         default

options(width=80)

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucked out of thin air):

$\beta_0$: normal centred at 84 with a standard deviation of 5.9
- mean of 84: since median(starling$MASS)
- sd of 5.9: since mad(starling$MASS)
$\beta_1$: normal centred at 0 with a standard deviation of 13
- sd of 13: since sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[, -1], 2, sd)
$\beta_{2-4}$: normal centred at 0 with a standard deviation of 15
- sd of 15: since sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[, -1], 2, sd)
$\beta_{5-7}$: normal centred at 0 with a standard deviation of 20
- sd of 20: since sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[, -1], 2, sd)
$\sigma$: gamma with shape parameters 6.5 and 1
- 6.5: since sd(starling$MASS)
$\sigma_j$: half-cauchy with parameters 0 and 2.5.
- 2.: since sqrt(sd(starling$MASS))
$\Sigma$: decov with:
- regularisation: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
- concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
- shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

Note, for hierarchical models, the model will tend to want to have a large $sigma$ in order to fit the data better. It is a good idea to regularise this tendency by applying a prior that has most mass around zero. Suitable candidates include:

half-t: as the degrees of freedom approach infinity, this will approach a half-normal
half-cauchy: this is essentially a half-t with 1 degree of freedom
exponential

I will also overlay the raw data for comparison.

starling_form <- bf(MASS ~ MONTH * SITUATION + (1 | BIRD),
  family = gaussian()
)
get_prior(starling_form, data = starling)

                 prior     class                       coef group resp dpar
                (flat)         b                                           
                (flat)         b                   MONTHJan                
                (flat)         b MONTHJan:SITUATIONnestMbox                
                (flat)         b    MONTHJan:SITUATIONother                
                (flat)         b     MONTHJan:SITUATIONtree                
                (flat)         b          SITUATIONnestMbox                
                (flat)         b             SITUATIONother                
                (flat)         b              SITUATIONtree                
 student_t(3, 84, 5.9) Intercept                                           
  student_t(3, 0, 5.9)        sd                                           
  student_t(3, 0, 5.9)        sd                             BIRD          
  student_t(3, 0, 5.9)        sd                  Intercept  BIRD          
  student_t(3, 0, 5.9)     sigma                                           
 nlpar lb ub       source
                  default
             (vectorized)
             (vectorized)
             (vectorized)
             (vectorized)
             (vectorized)
             (vectorized)
             (vectorized)
                  default
        0         default
        0    (vectorized)
        0    (vectorized)
        0         default

starling |>
  group_by(SITUATION, MONTH) |>
  summarise(
    mean(MASS),
    median(MASS),
    sd(MASS),
    mad(MASS)
  )

`summarise()` has grouped output by 'SITUATION'. You can override using the
`.groups` argument.

# A tibble: 8 × 6
# Groups:   SITUATION [4]
  SITUATION MONTH `mean(MASS)` `median(MASS)` `sd(MASS)` `mad(MASS)`
  <fct>     <fct>        <dbl>          <dbl>      <dbl>       <dbl>
1 inside    Nov           78.6           78.5       3.31        2.22
2 inside    Jan           88.2           87.5       4.05        4.45
3 nest-box  Nov           79.4           79.5       3.20        2.22
4 nest-box  Jan           90.2           89.5       4.37        5.19
5 other     Nov           75.4           75         4.53        2.22
6 other     Jan           84.2           84.5       4.26        4.45
7 tree      Nov           83.6           82.5       4.03        5.19
8 tree      Jan           90.8           88.5       5.47        4.45

standist::visualize("normal(80, 2.5)", xlim = c(50, 100))

standist::visualize("student_t(3, 0, 6)",
  xlim = c(-10, 25)
)

priors <- prior(normal(80, 2.5), class = "Intercept") +
  ## prior(normal(0, 13), class = 'b', coef = "MONTHJan") +
  ## prior(normal(0, 15), class = 'b', coef = "SITUATIONnestMbox") +
  ## prior(normal(0, 15), class = 'b', coef = "SITUATIONother") +
  ## prior(normal(0, 15), class = 'b', coef = "SITUATIONtree") +
  prior(normal(0, 10), class = "b") +
  ## prior(gamma(6.5,1), class = 'sigma') +
  prior(student_t(3, 0, 6), class = "sigma") +
  prior(student_t(3, 0, 6), class = "sd")
starling_brm2 <- brm(starling_form,
  data = starling,
  prior = priors,
  sample_prior = "only",
  iter = 5000,
  warmup = 1000,
  chains = 3, cores = 3,
  thin = 5,
  refresh = 0,
  backend = "rstan"
)

Compiling Stan program...

Start sampling

starling_brm2 |>
  conditional_effects("SITUATION:MONTH") |>
  plot(points = TRUE)

starling_brm2 |>
  ggpredict(~ SITUATION * MONTH) |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

The above seem sufficiently wide whilst at the same time not providing any encouragement for the sampler to wander off into very unsupported areas.

starling_brm3 <- update(starling_brm2,
  sample_prior = "yes",
  control = list(adapt_delta = 0.99),
  refresh = 0
)

The desired updates require recompiling the model

Compiling Stan program...

Start sampling

starling_brm3 |>
  conditional_effects("SITUATION:MONTH") |>
  plot(points = TRUE)

starling_brm3 |>
  ggpredict(~ SITUATION * MONTH) |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

starling_brm3 |> get_variables()

 [1] "b_Intercept"                  "b_MONTHJan"                  
 [3] "b_SITUATIONnestMbox"          "b_SITUATIONother"            
 [5] "b_SITUATIONtree"              "b_MONTHJan:SITUATIONnestMbox"
 [7] "b_MONTHJan:SITUATIONother"    "b_MONTHJan:SITUATIONtree"    
 [9] "sd_BIRD__Intercept"           "sigma"                       
[11] "Intercept"                    "r_BIRD[inside1,Intercept]"   
[13] "r_BIRD[inside10,Intercept]"   "r_BIRD[inside2,Intercept]"   
[15] "r_BIRD[inside3,Intercept]"    "r_BIRD[inside4,Intercept]"   
[17] "r_BIRD[inside5,Intercept]"    "r_BIRD[inside6,Intercept]"   
[19] "r_BIRD[inside7,Intercept]"    "r_BIRD[inside8,Intercept]"   
[21] "r_BIRD[inside9,Intercept]"    "r_BIRD[nest-box1,Intercept]" 
[23] "r_BIRD[nest-box10,Intercept]" "r_BIRD[nest-box2,Intercept]" 
[25] "r_BIRD[nest-box3,Intercept]"  "r_BIRD[nest-box4,Intercept]" 
[27] "r_BIRD[nest-box5,Intercept]"  "r_BIRD[nest-box6,Intercept]" 
[29] "r_BIRD[nest-box7,Intercept]"  "r_BIRD[nest-box8,Intercept]" 
[31] "r_BIRD[nest-box9,Intercept]"  "r_BIRD[other1,Intercept]"    
[33] "r_BIRD[other10,Intercept]"    "r_BIRD[other2,Intercept]"    
[35] "r_BIRD[other3,Intercept]"     "r_BIRD[other4,Intercept]"    
[37] "r_BIRD[other5,Intercept]"     "r_BIRD[other6,Intercept]"    
[39] "r_BIRD[other7,Intercept]"     "r_BIRD[other8,Intercept]"    
[41] "r_BIRD[other9,Intercept]"     "r_BIRD[tree1,Intercept]"     
[43] "r_BIRD[tree10,Intercept]"     "r_BIRD[tree2,Intercept]"     
[45] "r_BIRD[tree3,Intercept]"      "r_BIRD[tree4,Intercept]"     
[47] "r_BIRD[tree5,Intercept]"      "r_BIRD[tree6,Intercept]"     
[49] "r_BIRD[tree7,Intercept]"      "r_BIRD[tree8,Intercept]"     
[51] "r_BIRD[tree9,Intercept]"      "prior_Intercept"             
[53] "prior_b"                      "prior_sigma"                 
[55] "prior_sd_BIRD"                "lprior"                      
[57] "lp__"                         "accept_stat__"               
[59] "stepsize__"                   "treedepth__"                 
[61] "n_leapfrog__"                 "divergent__"                 
[63] "energy__"

starling_brm3 |>
  hypothesis("MONTHJan=0") |>
  plot()

starling_brm3 |>
  hypothesis("SITUATIONnestMbox=0") |>
  plot()

starling_brm3 |> SUYR_prior_and_posterior()

While we are here, we might like to explore a random intercept/slope model

# I am going to narrow the priors so as to stabalise the model
priors <- prior(normal(80, 2), class = "Intercept") +
  # prior(normal(0, 13), class = 'b', coef = "MONTHJan") +
  # prior(normal(0, 15), class = 'b', coef = "SITUATIONnestMbox") +
  # prior(normal(0, 15), class = 'b', coef = "SITUATIONother") +
  # prior(normal(0, 15), class = 'b', coef = "SITUATIONtree") +
  prior(normal(0, 10), class = "b") +
  ## prior(gamma(6.5,1), class = 'sigma') +
  prior(student_t(3, 0, 3), class = "sigma") +
  prior(student_t(3, 0, 3), class = "sd") +
  prior(lkj_corr_cholesky(1), class = "cor")

starling_form <- bf(MASS ~ MONTH * SITUATION + (MONTH | BIRD),
  family = gaussian()
)
starling_brm3a <- brm(starling_form,
  data = starling,
  prior = priors,
  sample_prior = "only",
  iter = 5000,
  warmup = 2500,
  chains = 3, cores = 3,
  thin = 5,
  refresh = 0,
  control = list(adapt_delta = 0.99),
  backend = "rstan"
)

Compiling Stan program...

Start sampling

starling_brm4 <- brm(starling_form,
  data = starling,
  prior = priors,
  sample_prior = "yes",
  iter = 10000, # note, these changes are in response to issues
  warmup = 5000,
  chains = 3, cores = 3,
  thin = 10, # some autocorrelation noted
  refresh = 0,
  control = list(adapt_delta = 0.99, max_treedepth = 20)
)

Compiling Stan program...
Start sampling

Warning: There were 1 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.

Warning: Examine the pairs() plot to diagnose sampling problems

starling_brm4 |> get_variables()

  [1] "b_Intercept"                   "b_MONTHJan"                   
  [3] "b_SITUATIONnestMbox"           "b_SITUATIONother"             
  [5] "b_SITUATIONtree"               "b_MONTHJan:SITUATIONnestMbox" 
  [7] "b_MONTHJan:SITUATIONother"     "b_MONTHJan:SITUATIONtree"     
  [9] "sd_BIRD__Intercept"            "sd_BIRD__MONTHJan"            
 [11] "cor_BIRD__Intercept__MONTHJan" "sigma"                        
 [13] "Intercept"                     "r_BIRD[inside1,Intercept]"    
 [15] "r_BIRD[inside10,Intercept]"    "r_BIRD[inside2,Intercept]"    
 [17] "r_BIRD[inside3,Intercept]"     "r_BIRD[inside4,Intercept]"    
 [19] "r_BIRD[inside5,Intercept]"     "r_BIRD[inside6,Intercept]"    
 [21] "r_BIRD[inside7,Intercept]"     "r_BIRD[inside8,Intercept]"    
 [23] "r_BIRD[inside9,Intercept]"     "r_BIRD[nest-box1,Intercept]"  
 [25] "r_BIRD[nest-box10,Intercept]"  "r_BIRD[nest-box2,Intercept]"  
 [27] "r_BIRD[nest-box3,Intercept]"   "r_BIRD[nest-box4,Intercept]"  
 [29] "r_BIRD[nest-box5,Intercept]"   "r_BIRD[nest-box6,Intercept]"  
 [31] "r_BIRD[nest-box7,Intercept]"   "r_BIRD[nest-box8,Intercept]"  
 [33] "r_BIRD[nest-box9,Intercept]"   "r_BIRD[other1,Intercept]"     
 [35] "r_BIRD[other10,Intercept]"     "r_BIRD[other2,Intercept]"     
 [37] "r_BIRD[other3,Intercept]"      "r_BIRD[other4,Intercept]"     
 [39] "r_BIRD[other5,Intercept]"      "r_BIRD[other6,Intercept]"     
 [41] "r_BIRD[other7,Intercept]"      "r_BIRD[other8,Intercept]"     
 [43] "r_BIRD[other9,Intercept]"      "r_BIRD[tree1,Intercept]"      
 [45] "r_BIRD[tree10,Intercept]"      "r_BIRD[tree2,Intercept]"      
 [47] "r_BIRD[tree3,Intercept]"       "r_BIRD[tree4,Intercept]"      
 [49] "r_BIRD[tree5,Intercept]"       "r_BIRD[tree6,Intercept]"      
 [51] "r_BIRD[tree7,Intercept]"       "r_BIRD[tree8,Intercept]"      
 [53] "r_BIRD[tree9,Intercept]"       "r_BIRD[inside1,MONTHJan]"     
 [55] "r_BIRD[inside10,MONTHJan]"     "r_BIRD[inside2,MONTHJan]"     
 [57] "r_BIRD[inside3,MONTHJan]"      "r_BIRD[inside4,MONTHJan]"     
 [59] "r_BIRD[inside5,MONTHJan]"      "r_BIRD[inside6,MONTHJan]"     
 [61] "r_BIRD[inside7,MONTHJan]"      "r_BIRD[inside8,MONTHJan]"     
 [63] "r_BIRD[inside9,MONTHJan]"      "r_BIRD[nest-box1,MONTHJan]"   
 [65] "r_BIRD[nest-box10,MONTHJan]"   "r_BIRD[nest-box2,MONTHJan]"   
 [67] "r_BIRD[nest-box3,MONTHJan]"    "r_BIRD[nest-box4,MONTHJan]"   
 [69] "r_BIRD[nest-box5,MONTHJan]"    "r_BIRD[nest-box6,MONTHJan]"   
 [71] "r_BIRD[nest-box7,MONTHJan]"    "r_BIRD[nest-box8,MONTHJan]"   
 [73] "r_BIRD[nest-box9,MONTHJan]"    "r_BIRD[other1,MONTHJan]"      
 [75] "r_BIRD[other10,MONTHJan]"      "r_BIRD[other2,MONTHJan]"      
 [77] "r_BIRD[other3,MONTHJan]"       "r_BIRD[other4,MONTHJan]"      
 [79] "r_BIRD[other5,MONTHJan]"       "r_BIRD[other6,MONTHJan]"      
 [81] "r_BIRD[other7,MONTHJan]"       "r_BIRD[other8,MONTHJan]"      
 [83] "r_BIRD[other9,MONTHJan]"       "r_BIRD[tree1,MONTHJan]"       
 [85] "r_BIRD[tree10,MONTHJan]"       "r_BIRD[tree2,MONTHJan]"       
 [87] "r_BIRD[tree3,MONTHJan]"        "r_BIRD[tree4,MONTHJan]"       
 [89] "r_BIRD[tree5,MONTHJan]"        "r_BIRD[tree6,MONTHJan]"       
 [91] "r_BIRD[tree7,MONTHJan]"        "r_BIRD[tree8,MONTHJan]"       
 [93] "r_BIRD[tree9,MONTHJan]"        "prior_Intercept"              
 [95] "prior_b"                       "prior_sigma"                  
 [97] "prior_sd_BIRD"                 "prior_cor_BIRD"               
 [99] "lprior"                        "lp__"                         
[101] "accept_stat__"                 "stepsize__"                   
[103] "treedepth__"                   "n_leapfrog__"                 
[105] "divergent__"                   "energy__"

starling_brm4 |>
  hypothesis("MONTHJan=0") |>
  plot()

starling_brm4 |>
  hypothesis("SITUATIONnestMbox=0") |>
  plot()

starling_brm4 |> SUYR_prior_and_posterior()

starling_brm4 |>
  posterior_samples() |>
  dplyr::select(-`lp__`) |>
  pivot_longer(everything(), names_to = "key") |>
  filter(!str_detect(key, "^r")) |>
  mutate(
    Type = ifelse(str_detect(key, "prior"), "Prior", "Posterior"),
    ## Class = ifelse(str_detect(key, 'Intercept'),  'Intercept',
    ##         ifelse(str_detect(key, 'b'),  'b', 'sigma')),
    Class = case_when(
      str_detect(key, "(^b|^prior).*Intercept$") ~ "Intercept",
      str_detect(key, "b_SITUATION.*|prior_b_SITUATION.*") &
        !str_detect(key, ".*:.*") ~ "SITUATION",
      str_detect(key, "b_MONTH.*|prior_b_MONTH.*") &
        !str_detect(key, ".*\\:.*") ~ "MONTH",
      str_detect(key, ".*\\:.*|prior_b_.*\\:.*") ~ "Interaction",
      str_detect(key, "sd") ~ "sd",
      str_detect(key, "^cor|prior_cor") ~ "cor",
      str_detect(key, "sigma") ~ "sigma"
    ),
    Par = str_replace(key, "b_", "")
  ) |>
  ggplot(aes(x = Type, y = value, color = Par)) +
  stat_pointinterval(position = position_dodge()) +
  facet_wrap(~Class, scales = "free")

Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.

We can compare the two models using LOO

(l.1 <- starling_brm3 |> rstan::loo())


Computed from 2400 by 80 log-likelihood matrix.

         Estimate   SE
elpd_loo   -233.9  5.1
p_loo        13.3  1.5
looic       467.8 10.2
------
MCSE of elpd_loo is 0.1.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.1]).

All Pareto k estimates are good (k < 0.7).
See help('pareto-k-diagnostic') for details.

(l.2 <- starling_brm4 |> rstan::loo())

Warning: Found 2 observations with a pareto_k > 0.7 in model 'starling_brm4'.
We recommend to set 'moment_match = TRUE' in order to perform moment matching
for problematic observations.


Computed from 1500 by 80 log-likelihood matrix.

         Estimate   SE
elpd_loo   -232.0  5.0
p_loo        22.5  2.3
looic       464.1 10.1
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.2, 0.9]).

Pareto k diagnostic values:
                          Count Pct.    Min. ESS
(-Inf, 0.69]   (good)     78    97.5%   77      
   (0.69, 1]   (bad)       2     2.5%   <NA>    
    (1, Inf)   (very bad)  0     0.0%   <NA>    
See help('pareto-k-diagnostic') for details.

loo_compare(l.1, l.2)

              elpd_diff se_diff
starling_brm4  0.0       0.0   
starling_brm3 -1.8       0.8

There is only slightly more support for the more complex random intercept/slope model over the simpler random intercept only model. Consequently, we could argue in favour of either. Since we have already fit the random intercept/slope model, we will use that one.

6 MCMC sampling diagnostics

brms

The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.

See list of available diagnostics by name

available_mcmc()

bayesplot MCMC module:
  mcmc_acf
  mcmc_acf_bar
  mcmc_areas
  mcmc_areas_ridges
  mcmc_combo
  mcmc_dens
  mcmc_dens_chains
  mcmc_dens_overlay
  mcmc_hex
  mcmc_hist
  mcmc_hist_by_chain
  mcmc_intervals
  mcmc_neff
  mcmc_neff_hist
  mcmc_nuts_acceptance
  mcmc_nuts_divergence
  mcmc_nuts_energy
  mcmc_nuts_stepsize
  mcmc_nuts_treedepth
  mcmc_pairs
  mcmc_parcoord
  mcmc_rank_ecdf
  mcmc_rank_hist
  mcmc_rank_overlay
  mcmc_recover_hist
  mcmc_recover_intervals
  mcmc_recover_scatter
  mcmc_rhat
  mcmc_rhat_hist
  mcmc_scatter
  mcmc_trace
  mcmc_trace_highlight
  mcmc_violin

Of these, we will focus on:

trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).

pars <- starling_brm4 |> get_variables()
pars <- pars |>
  str_extract("^b_.*|[sS]igma|^sd.*") |>
  ## str_extract('^b.Intercept|^b_SITUTATION.*|^b_MONTH.*|[sS]igma|^sd.*') |>
  na.omit()
pars

 [1] "b_Intercept"                  "b_MONTHJan"                  
 [3] "b_SITUATIONnestMbox"          "b_SITUATIONother"            
 [5] "b_SITUATIONtree"              "b_MONTHJan:SITUATIONnestMbox"
 [7] "b_MONTHJan:SITUATIONother"    "b_MONTHJan:SITUATIONtree"    
 [9] "sd_BIRD__Intercept"           "sd_BIRD__MONTHJan"           
[11] "sigma"                        "sigma"                       
attr(,"na.action")
 [1]  11  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30
[20]  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49
[39]  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
[58]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87
[77]  88  89  90  91  92  93  94  95  97  98  99 100 101 102 103 104 105 106
attr(,"class")
[1] "omit"

starling_brm4 |> mcmc_plot(type = "trace", variable = pars)

The chains appear well mixed and very similar

acf_bar (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain

starling_brm4 |> mcmc_plot(type = "acf_bar", variable = pars)

There is no evidence of auto-correlation in the MCMC samples

rhat_hist: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.

starling_brm4 |> mcmc_plot(type = "rhat_hist")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

neff_hist (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

If the ratios are low, tightening the priors may help.

starling_brm4 |> mcmc_plot(type = "neff_hist")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

More diagnostics

starling_brm4 |> mcmc_plot(type = "combo", variable = pars)

starling_brm4 |> mcmc_plot(type = "violin", variable = pars)

The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.

Of these, we will focus on:

stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).

starling_brm4 |> get_variables()

  [1] "b_Intercept"                   "b_MONTHJan"                   
  [3] "b_SITUATIONnestMbox"           "b_SITUATIONother"             
  [5] "b_SITUATIONtree"               "b_MONTHJan:SITUATIONnestMbox" 
  [7] "b_MONTHJan:SITUATIONother"     "b_MONTHJan:SITUATIONtree"     
  [9] "sd_BIRD__Intercept"            "sd_BIRD__MONTHJan"            
 [11] "cor_BIRD__Intercept__MONTHJan" "sigma"                        
 [13] "Intercept"                     "r_BIRD[inside1,Intercept]"    
 [15] "r_BIRD[inside10,Intercept]"    "r_BIRD[inside2,Intercept]"    
 [17] "r_BIRD[inside3,Intercept]"     "r_BIRD[inside4,Intercept]"    
 [19] "r_BIRD[inside5,Intercept]"     "r_BIRD[inside6,Intercept]"    
 [21] "r_BIRD[inside7,Intercept]"     "r_BIRD[inside8,Intercept]"    
 [23] "r_BIRD[inside9,Intercept]"     "r_BIRD[nest-box1,Intercept]"  
 [25] "r_BIRD[nest-box10,Intercept]"  "r_BIRD[nest-box2,Intercept]"  
 [27] "r_BIRD[nest-box3,Intercept]"   "r_BIRD[nest-box4,Intercept]"  
 [29] "r_BIRD[nest-box5,Intercept]"   "r_BIRD[nest-box6,Intercept]"  
 [31] "r_BIRD[nest-box7,Intercept]"   "r_BIRD[nest-box8,Intercept]"  
 [33] "r_BIRD[nest-box9,Intercept]"   "r_BIRD[other1,Intercept]"     
 [35] "r_BIRD[other10,Intercept]"     "r_BIRD[other2,Intercept]"     
 [37] "r_BIRD[other3,Intercept]"      "r_BIRD[other4,Intercept]"     
 [39] "r_BIRD[other5,Intercept]"      "r_BIRD[other6,Intercept]"     
 [41] "r_BIRD[other7,Intercept]"      "r_BIRD[other8,Intercept]"     
 [43] "r_BIRD[other9,Intercept]"      "r_BIRD[tree1,Intercept]"      
 [45] "r_BIRD[tree10,Intercept]"      "r_BIRD[tree2,Intercept]"      
 [47] "r_BIRD[tree3,Intercept]"       "r_BIRD[tree4,Intercept]"      
 [49] "r_BIRD[tree5,Intercept]"       "r_BIRD[tree6,Intercept]"      
 [51] "r_BIRD[tree7,Intercept]"       "r_BIRD[tree8,Intercept]"      
 [53] "r_BIRD[tree9,Intercept]"       "r_BIRD[inside1,MONTHJan]"     
 [55] "r_BIRD[inside10,MONTHJan]"     "r_BIRD[inside2,MONTHJan]"     
 [57] "r_BIRD[inside3,MONTHJan]"      "r_BIRD[inside4,MONTHJan]"     
 [59] "r_BIRD[inside5,MONTHJan]"      "r_BIRD[inside6,MONTHJan]"     
 [61] "r_BIRD[inside7,MONTHJan]"      "r_BIRD[inside8,MONTHJan]"     
 [63] "r_BIRD[inside9,MONTHJan]"      "r_BIRD[nest-box1,MONTHJan]"   
 [65] "r_BIRD[nest-box10,MONTHJan]"   "r_BIRD[nest-box2,MONTHJan]"   
 [67] "r_BIRD[nest-box3,MONTHJan]"    "r_BIRD[nest-box4,MONTHJan]"   
 [69] "r_BIRD[nest-box5,MONTHJan]"    "r_BIRD[nest-box6,MONTHJan]"   
 [71] "r_BIRD[nest-box7,MONTHJan]"    "r_BIRD[nest-box8,MONTHJan]"   
 [73] "r_BIRD[nest-box9,MONTHJan]"    "r_BIRD[other1,MONTHJan]"      
 [75] "r_BIRD[other10,MONTHJan]"      "r_BIRD[other2,MONTHJan]"      
 [77] "r_BIRD[other3,MONTHJan]"       "r_BIRD[other4,MONTHJan]"      
 [79] "r_BIRD[other5,MONTHJan]"       "r_BIRD[other6,MONTHJan]"      
 [81] "r_BIRD[other7,MONTHJan]"       "r_BIRD[other8,MONTHJan]"      
 [83] "r_BIRD[other9,MONTHJan]"       "r_BIRD[tree1,MONTHJan]"       
 [85] "r_BIRD[tree10,MONTHJan]"       "r_BIRD[tree2,MONTHJan]"       
 [87] "r_BIRD[tree3,MONTHJan]"        "r_BIRD[tree4,MONTHJan]"       
 [89] "r_BIRD[tree5,MONTHJan]"        "r_BIRD[tree6,MONTHJan]"       
 [91] "r_BIRD[tree7,MONTHJan]"        "r_BIRD[tree8,MONTHJan]"       
 [93] "r_BIRD[tree9,MONTHJan]"        "prior_Intercept"              
 [95] "prior_b"                       "prior_sigma"                  
 [97] "prior_sd_BIRD"                 "prior_cor_BIRD"               
 [99] "lprior"                        "lp__"                         
[101] "accept_stat__"                 "stepsize__"                   
[103] "treedepth__"                   "n_leapfrog__"                 
[105] "divergent__"                   "energy__"

pars <- starling_brm4 |> get_variables()
pars <- str_extract(pars, "^b_.*|^sigma$|^sd.*") |> na.omit()

starling_brm4$fit |>
  stan_trace(pars = pars)

The chains appear well mixed and very similar

stan_acf (auto-correlation function): plots the auto-correlation between successive MCMC sample lags for each parameter and each chain

starling_brm4$fit |>
  stan_ac(pars = pars)

There is no evidence of auto-correlation in the MCMC samples

stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.

starling_brm4$fit |> stan_rhat()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

If the ratios are low, tightening the priors may help.

starling_brm4$fit |> stan_ess()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

starling_brm4$fit |>
  stan_dens(separate_chains = TRUE, pars = pars)

The ggmean package also as a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.

Of these, we will focus on:

ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).

## starling_ggs <- starling_brm3 %>% ggs(burnin = FALSE, inc_warmup = FALSE)
## starling_ggs %>% ggs_traceplot()

The chains appear well mixed and very similar

gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain

## ggs_autocorrelation(starling_ggs)

There is no evidence of auto-correlation in the MCMC samples

stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.

## ggs_Rhat(starling_ggs)

All Rhat values are below 1.05, suggesting the chains have converged.

stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

If the ratios are low, tightening the priors may help.

## ggs_effective(starling_ggs)

Ratios all very high.

More diagnostics

## ggs_crosscorrelation(starling_ggs)

## ggs_grb(starling_ggs)

7 Model validation

brms

Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.

See list of available diagnostics by name

available_ppc()

bayesplot PPC module:
  ppc_bars
  ppc_bars_grouped
  ppc_boxplot
  ppc_dens
  ppc_dens_overlay
  ppc_dens_overlay_grouped
  ppc_dots
  ppc_ecdf_overlay
  ppc_ecdf_overlay_grouped
  ppc_error_binned
  ppc_error_hist
  ppc_error_hist_grouped
  ppc_error_scatter
  ppc_error_scatter_avg
  ppc_error_scatter_avg_grouped
  ppc_error_scatter_avg_vs_x
  ppc_freqpoly
  ppc_freqpoly_grouped
  ppc_hist
  ppc_intervals
  ppc_intervals_grouped
  ppc_km_overlay
  ppc_km_overlay_grouped
  ppc_loo_intervals
  ppc_loo_pit_ecdf
  ppc_loo_pit_overlay
  ppc_loo_pit_qq
  ppc_loo_ribbon
  ppc_pit_ecdf
  ppc_pit_ecdf_grouped
  ppc_ribbon
  ppc_ribbon_grouped
  ppc_rootogram
  ppc_scatter
  ppc_scatter_avg
  ppc_scatter_avg_grouped
  ppc_stat
  ppc_stat_2d
  ppc_stat_freqpoly
  ppc_stat_freqpoly_grouped
  ppc_stat_grouped
  ppc_violin_grouped

dens_overlay: plots the density distribution of the observed data (black line) overlayed on top of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.

starling_brm4 |> pp_check(type = "dens_overlay", nsamples = 100)

Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
instead.

The model draws appear to be consistent with the observed data.

error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. Note, this is not really that useful for models that involve a binomial response

## starling_brm4 |> pp_check(type = 'error_scatter_avg')

This is not really interpretable

intervals: plots the observed data overlayed on top of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.

starling_brm4 |> pp_check(group = "BIRD", type = "intervals")

Using all posterior draws for ppc type 'intervals' by default.

Warning: The following arguments were unrecognized and ignored: group

The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.

# library(shinystan)
# launch_shinystan(starling_brm2)

DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components yourself, we can still obtain the simulated residuals from the fitted stan model.

We need to supply:

simulated (predicted) responses associated with each observation.
observed values
fitted (predicted) responses (averaged) associated with each observation

starling_resids <- make_brms_dharma_res(starling_brm4, integerResponse = FALSE)
wrap_elements(~ testUniformity(starling_resids)) +
  wrap_elements(~ plotResiduals(starling_resids, form = factor(rep(1, nrow(starling))))) +
  wrap_elements(~ plotResiduals(starling_resids, quantreg = TRUE)) +
  wrap_elements(~ testDispersion(starling_resids))

Conclusions:

the simulated residuals do not suggest any issues with the residuals
there is no evidence of a lack of fit.

8 Partial effects plots

brms

starling_brm4 |>
  conditional_effects("SITUATION:MONTH") |>
  plot(points = TRUE)

starling_brm4 |>
  ggpredict(~ SITUATION * MONTH) |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

starling_brm4 |>
  ggemmeans(~ SITUATION * MONTH) |>
  plot(show_data = TRUE)

Warning in stats::qt(ci, df = dof): NaNs produced

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Partial.obs <- starling_brm4$data |>
  mutate(
    Pred = predict(starling_brm4)[, "Estimate"],
    Resid = resid(starling_brm4)[, "Estimate"],
    Obs = Pred + Resid
  )

starling_brm4 |>
  fitted_draws(newdata = starling, re_formula = NA) |>
  median_hdci() |>
  ggplot(aes(x = SITUATION, y = .value, color = MONTH)) +
  geom_pointrange(aes(ymin = .lower, ymax = .upper)) +
  geom_line() +
  geom_point(
    data = Partial.obs, aes(y = Obs, x = SITUATION, color = MONTH),
    position = position_nudge(x = 0.1)
  ) +
  geom_point(
    data = starling, aes(y = MASS, x = SITUATION, color = MONTH), alpha = 0.2,
    position = position_nudge(x = 0.05)
  )

Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
  means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
  named `object` and the `n` parameter is now named `ndraws`.

9 Model investigation

brms

The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).

starling_brm4 |> summary()

Warning: There were 1 divergent transitions after warmup. Increasing
adapt_delta above 0.99 may help. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: MASS ~ MONTH * SITUATION + (MONTH | BIRD) 
   Data: starling (Number of observations: 80) 
  Draws: 3 chains, each with iter = 10000; warmup = 5000; thin = 10;
         total post-warmup draws = 1500

Multilevel Hyperparameters:
~BIRD (Number of levels: 40) 
                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)               1.15      0.81     0.07     2.99 1.00      873
sd(MONTHJan)                2.14      1.33     0.11     5.02 1.00     1019
cor(Intercept,MONTHJan)    -0.09      0.54    -0.94     0.91 1.00     1358
                        Tail_ESS
sd(Intercept)                376
sd(MONTHJan)                 377
cor(Intercept,MONTHJan)     1419

Regression Coefficients:
                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept                     78.58      1.22    76.17    81.03 1.00     1582
MONTHJan                       9.16      1.72     5.80    12.59 1.00     1456
SITUATIONnestMbox              0.63      1.78    -2.85     4.18 1.00     1509
SITUATIONother                -3.33      1.78    -6.88     0.18 1.00     1544
SITUATIONtree                  4.71      1.76     1.24     8.04 1.00     1471
MONTHJan:SITUATIONnestMbox     1.58      2.47    -3.18     6.35 1.00     1393
MONTHJan:SITUATIONother       -0.43      2.47    -5.26     4.29 1.00     1533
MONTHJan:SITUATIONtree        -1.82      2.48    -6.53     3.11 1.00     1565
                           Tail_ESS
Intercept                      1456
MONTHJan                       1413
SITUATIONnestMbox              1334
SITUATIONother                 1457
SITUATIONtree                  1309
MONTHJan:SITUATIONnestMbox     1421
MONTHJan:SITUATIONother        1459
MONTHJan:SITUATIONtree         1420

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     3.77      0.55     2.60     4.70 1.00      806      355

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Warning: There were 1 divergent transitions after warmup. Increasing
adapt_delta above 0.99 may help. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

Conclusions:

the estimated Mass of starlings nesting Inside in November is 78.58
starlings that nest Inside were found to increase their Mass from November to January by 9.16 grams.
starling that nest in nest boxes are on average 0.63 grams heavier in November than those that nest inside.
starling that nest in other and trees are on average 3.33 grams lighter and 4.71 heavier respectively in November than those that nest inside.
there is relatively little variation in Mass between individual birds compared to the variation in Mass within Birds.

# A draws_df: 500 iterations, 3 chains, and 100 variables
   b_Intercept b_MONTHJan b_SITUATIONnestMbox b_SITUATIONother b_SITUATIONtree
1           77        8.5               3.471             -1.5             4.9
2           79        6.8               0.124             -3.7             6.0
3           78       12.6               2.076             -3.9             6.6
4           79        8.5              -0.576             -4.4             4.5
5           79        8.6              -0.460             -5.4             4.8
6           78       10.6               2.377             -3.9             6.3
7           79       10.8               0.482             -4.0             5.5
8           81        9.1              -4.118             -6.2             3.5
9           78        8.6               0.052             -2.6             3.8
10          77       10.6               0.945             -2.2             6.0
   b_MONTHJan:SITUATIONnestMbox b_MONTHJan:SITUATIONother
1                          1.70                      0.82
2                          4.08                      2.94
3                         -3.48                     -3.72
4                          3.69                     -0.36
5                          4.35                      0.77
6                          0.18                      0.55
7                         -1.38                     -3.90
8                          4.84                      1.87
9                          1.35                      0.91
10                         1.53                     -1.39
   b_MONTHJan:SITUATIONtree
1                     -1.58
2                      0.86
3                     -5.23
4                     -0.69
5                     -1.79
6                     -6.54
7                     -4.81
8                     -2.91
9                      1.53
10                    -4.18
# ... with 1490 more draws, and 92 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}

# A tibble: 100 × 8
   variable                 median    lower  upper      Pl     Pg  rhat ess_bulk
   <chr>                     <dbl>    <dbl>  <dbl>   <dbl>  <dbl> <dbl>    <dbl>
 1 b_Intercept              78.6   76.4     81.2   0       1      1.00     1582.
 2 b_MONTHJan                9.17   5.98    12.7   0       1      1.000    1456.
 3 b_SITUATIONnestMbox       0.641 -2.52     4.45  0.357   0.643  1.00     1509.
 4 b_SITUATIONother         -3.31  -6.94     0.102 0.971   0.0287 1.00     1544.
 5 b_SITUATIONtree           4.76   1.31     8.11  0.00333 0.997  1.00     1470.
 6 b_MONTHJan:SITUATIONnes…  1.61  -3.22     6.25  0.269   0.731  1.00     1393.
 7 b_MONTHJan:SITUATIONoth… -0.502 -5.04     4.45  0.578   0.422  1.00     1533.
 8 b_MONTHJan:SITUATIONtree -1.82  -6.74     2.79  0.767   0.233  0.999    1564.
 9 sd_BIRD__Intercept        1.02   0.00189  2.68  0       1      1.00      871.
10 sd_BIRD__MONTHJan         2.06   0.00695  4.41  0       1      1.00     1016.
# ℹ 90 more rows

starling_brm4$fit |>
  tidyMCMC(
    estimate.method = "median",
    conf.int = TRUE, conf.method = "HPDinterval",
    rhat = TRUE, ess = TRUE
  )

# A tibble: 99 × 7
   term                        estimate std.error conf.low conf.high  rhat   ess
   <chr>                          <dbl>     <dbl>    <dbl>     <dbl> <dbl> <int>
 1 b_Intercept                   78.6       1.22  76.4        81.2   0.999  1569
 2 b_MONTHJan                     9.16      1.72   5.98       12.7   1.000  1454
 3 b_SITUATIONnestMbox            0.629     1.78  -2.52        4.45  0.999  1502
 4 b_SITUATIONother              -3.33      1.78  -6.94        0.102 0.999  1540
 5 b_SITUATIONtree                4.71      1.76   1.31        8.11  1.00   1478
 6 b_MONTHJan:SITUATIONnestMb…    1.58      2.47  -3.22        6.25  1.00   1372
 7 b_MONTHJan:SITUATIONother     -0.432     2.47  -5.04        4.45  1.00   1525
 8 b_MONTHJan:SITUATIONtree      -1.82      2.48  -6.74        2.79  0.999  1559
 9 sd_BIRD__Intercept             1.15      0.808  0.00189     2.68  1.00    675
10 sd_BIRD__MONTHJan              2.14      1.33   0.00695     4.41  1.00    881
# ℹ 89 more rows

Conclusions:

see above

starling_brm4 |> get_variables()

  [1] "b_Intercept"                   "b_MONTHJan"                   
  [3] "b_SITUATIONnestMbox"           "b_SITUATIONother"             
  [5] "b_SITUATIONtree"               "b_MONTHJan:SITUATIONnestMbox" 
  [7] "b_MONTHJan:SITUATIONother"     "b_MONTHJan:SITUATIONtree"     
  [9] "sd_BIRD__Intercept"            "sd_BIRD__MONTHJan"            
 [11] "cor_BIRD__Intercept__MONTHJan" "sigma"                        
 [13] "Intercept"                     "r_BIRD[inside1,Intercept]"    
 [15] "r_BIRD[inside10,Intercept]"    "r_BIRD[inside2,Intercept]"    
 [17] "r_BIRD[inside3,Intercept]"     "r_BIRD[inside4,Intercept]"    
 [19] "r_BIRD[inside5,Intercept]"     "r_BIRD[inside6,Intercept]"    
 [21] "r_BIRD[inside7,Intercept]"     "r_BIRD[inside8,Intercept]"    
 [23] "r_BIRD[inside9,Intercept]"     "r_BIRD[nest-box1,Intercept]"  
 [25] "r_BIRD[nest-box10,Intercept]"  "r_BIRD[nest-box2,Intercept]"  
 [27] "r_BIRD[nest-box3,Intercept]"   "r_BIRD[nest-box4,Intercept]"  
 [29] "r_BIRD[nest-box5,Intercept]"   "r_BIRD[nest-box6,Intercept]"  
 [31] "r_BIRD[nest-box7,Intercept]"   "r_BIRD[nest-box8,Intercept]"  
 [33] "r_BIRD[nest-box9,Intercept]"   "r_BIRD[other1,Intercept]"     
 [35] "r_BIRD[other10,Intercept]"     "r_BIRD[other2,Intercept]"     
 [37] "r_BIRD[other3,Intercept]"      "r_BIRD[other4,Intercept]"     
 [39] "r_BIRD[other5,Intercept]"      "r_BIRD[other6,Intercept]"     
 [41] "r_BIRD[other7,Intercept]"      "r_BIRD[other8,Intercept]"     
 [43] "r_BIRD[other9,Intercept]"      "r_BIRD[tree1,Intercept]"      
 [45] "r_BIRD[tree10,Intercept]"      "r_BIRD[tree2,Intercept]"      
 [47] "r_BIRD[tree3,Intercept]"       "r_BIRD[tree4,Intercept]"      
 [49] "r_BIRD[tree5,Intercept]"       "r_BIRD[tree6,Intercept]"      
 [51] "r_BIRD[tree7,Intercept]"       "r_BIRD[tree8,Intercept]"      
 [53] "r_BIRD[tree9,Intercept]"       "r_BIRD[inside1,MONTHJan]"     
 [55] "r_BIRD[inside10,MONTHJan]"     "r_BIRD[inside2,MONTHJan]"     
 [57] "r_BIRD[inside3,MONTHJan]"      "r_BIRD[inside4,MONTHJan]"     
 [59] "r_BIRD[inside5,MONTHJan]"      "r_BIRD[inside6,MONTHJan]"     
 [61] "r_BIRD[inside7,MONTHJan]"      "r_BIRD[inside8,MONTHJan]"     
 [63] "r_BIRD[inside9,MONTHJan]"      "r_BIRD[nest-box1,MONTHJan]"   
 [65] "r_BIRD[nest-box10,MONTHJan]"   "r_BIRD[nest-box2,MONTHJan]"   
 [67] "r_BIRD[nest-box3,MONTHJan]"    "r_BIRD[nest-box4,MONTHJan]"   
 [69] "r_BIRD[nest-box5,MONTHJan]"    "r_BIRD[nest-box6,MONTHJan]"   
 [71] "r_BIRD[nest-box7,MONTHJan]"    "r_BIRD[nest-box8,MONTHJan]"   
 [73] "r_BIRD[nest-box9,MONTHJan]"    "r_BIRD[other1,MONTHJan]"      
 [75] "r_BIRD[other10,MONTHJan]"      "r_BIRD[other2,MONTHJan]"      
 [77] "r_BIRD[other3,MONTHJan]"       "r_BIRD[other4,MONTHJan]"      
 [79] "r_BIRD[other5,MONTHJan]"       "r_BIRD[other6,MONTHJan]"      
 [81] "r_BIRD[other7,MONTHJan]"       "r_BIRD[other8,MONTHJan]"      
 [83] "r_BIRD[other9,MONTHJan]"       "r_BIRD[tree1,MONTHJan]"       
 [85] "r_BIRD[tree10,MONTHJan]"       "r_BIRD[tree2,MONTHJan]"       
 [87] "r_BIRD[tree3,MONTHJan]"        "r_BIRD[tree4,MONTHJan]"       
 [89] "r_BIRD[tree5,MONTHJan]"        "r_BIRD[tree6,MONTHJan]"       
 [91] "r_BIRD[tree7,MONTHJan]"        "r_BIRD[tree8,MONTHJan]"       
 [93] "r_BIRD[tree9,MONTHJan]"        "prior_Intercept"              
 [95] "prior_b"                       "prior_sigma"                  
 [97] "prior_sd_BIRD"                 "prior_cor_BIRD"               
 [99] "lprior"                        "lp__"                         
[101] "accept_stat__"                 "stepsize__"                   
[103] "treedepth__"                   "n_leapfrog__"                 
[105] "divergent__"                   "energy__"

starling_draw <- starling_brm4 |>
  gather_draws(`b.Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE)
starling_draw

# A tibble: 12,000 × 5
# Groups:   .variable [8]
   .chain .iteration .draw .variable   .value
    <int>      <int> <int> <chr>        <dbl>
 1      1          1     1 b_Intercept   77.4
 2      1          2     2 b_Intercept   79.2
 3      1          3     3 b_Intercept   77.9
 4      1          4     4 b_Intercept   78.7
 5      1          5     5 b_Intercept   79.1
 6      1          6     6 b_Intercept   78.0
 7      1          7     7 b_Intercept   79.2
 8      1          8     8 b_Intercept   81.0
 9      1          9     9 b_Intercept   78.3
10      1         10    10 b_Intercept   77.3
# ℹ 11,990 more rows

We can then summarise this

starling_draw |> median_hdci()

# A tibble: 8 × 7
  .variable                    .value .lower .upper .width .point .interval
  <chr>                         <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 b_Intercept                  78.6    76.2  81.1     0.95 median hdci     
2 b_MONTHJan                    9.17    6.03 12.8     0.95 median hdci     
3 b_MONTHJan:SITUATIONnestMbox  1.61   -3.22  6.25    0.95 median hdci     
4 b_MONTHJan:SITUATIONother    -0.502  -5.04  4.45    0.95 median hdci     
5 b_MONTHJan:SITUATIONtree     -1.82   -6.74  2.79    0.95 median hdci     
6 b_SITUATIONnestMbox           0.641  -2.80  4.25    0.95 median hdci     
7 b_SITUATIONother             -3.31   -6.94  0.102   0.95 median hdci     
8 b_SITUATIONtree               4.76    1.31  8.11    0.95 median hdci

starling_brm3 |>
  gather_draws(`b_Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE) |>
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  stat_slab(aes(
    x = .value, y = .variable,
    fill = stat(ggdist::cut_cdf_qi(cdf,
      .width = c(0.5, 0.8, 0.95),
      labels = scales::percent_format()
    ))
  ), color = "black") +
  scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)

Warning: `stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95), labels =
scales::percent_format()))` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
  labels = scales::percent_format()))` instead.

starling_brm3 |>
  gather_draws(`.Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE) |>
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  stat_halfeye(aes(x = .value, y = .variable)) +
  theme_classic()

starling_brm3$fit %>% plot(type = "intervals")

'pars' not specified. Showing first 10 parameters by default.

ci_level: 0.8 (80% intervals)

outer_level: 0.95 (95% intervals)

starling_brm4 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  facet_wrap(~.variable, scales = "free")

starling_brm4 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  geom_vline(xintercept = 0, linetype = "dashed")

starling_brm4 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
  geom_vline(xintercept = 0, linetype = "dashed")

Picking joint bandwidth of 0.428

## Or in colour
starling_brm4 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges_gradient(
    aes(
      x = (.value),
      y = .variable,
      fill = stat(x)
    ),
    alpha = 0.4, colour = "white",
    quantile_lines = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_x_continuous() +
  scale_fill_viridis_c(option = "C")

Picking joint bandwidth of 0.428

## Fractional scale
starling_brm4 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges_gradient(
    aes(
      x = exp(.value),
      y = .variable,
      fill = stat(x)
    ),
    alpha = 0.4, colour = "white",
    quantile_lines = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_x_continuous(trans = scales::log2_trans()) +
  scale_fill_viridis_c(option = "C")

Picking joint bandwidth of 0.617

This is purely a graphical depiction on the posteriors.

starling_brm4 |> tidy_draws()

# A tibble: 1,500 × 109
   .chain .iteration .draw b_Intercept b_MONTHJan b_SITUATIONnestMbox
    <int>      <int> <int>       <dbl>      <dbl>               <dbl>
 1      1          1     1        77.4       8.54              3.47  
 2      1          2     2        79.2       6.84              0.124 
 3      1          3     3        77.9      12.6               2.08  
 4      1          4     4        78.7       8.53             -0.576 
 5      1          5     5        79.1       8.60             -0.460 
 6      1          6     6        78.0      10.6               2.38  
 7      1          7     7        79.2      10.8               0.482 
 8      1          8     8        81.0       9.14             -4.12  
 9      1          9     9        78.3       8.57              0.0517
10      1         10    10        77.3      10.6               0.945 
# ℹ 1,490 more rows
# ℹ 103 more variables: b_SITUATIONother <dbl>, b_SITUATIONtree <dbl>,
#   `b_MONTHJan:SITUATIONnestMbox` <dbl>, `b_MONTHJan:SITUATIONother` <dbl>,
#   `b_MONTHJan:SITUATIONtree` <dbl>, sd_BIRD__Intercept <dbl>,
#   sd_BIRD__MONTHJan <dbl>, cor_BIRD__Intercept__MONTHJan <dbl>, sigma <dbl>,
#   Intercept <dbl>, `r_BIRD[inside1,Intercept]` <dbl>,
#   `r_BIRD[inside10,Intercept]` <dbl>, `r_BIRD[inside2,Intercept]` <dbl>, …

starling_brm4 |> spread_draws(`.*Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE)

# A tibble: 1,500 × 55
   .chain .iteration .draw b_Intercept b_MONTHJan b_SITUATIONnestMbox
    <int>      <int> <int>       <dbl>      <dbl>               <dbl>
 1      1          1     1        77.4       8.54              3.47  
 2      1          2     2        79.2       6.84              0.124 
 3      1          3     3        77.9      12.6               2.08  
 4      1          4     4        78.7       8.53             -0.576 
 5      1          5     5        79.1       8.60             -0.460 
 6      1          6     6        78.0      10.6               2.38  
 7      1          7     7        79.2      10.8               0.482 
 8      1          8     8        81.0       9.14             -4.12  
 9      1          9     9        78.3       8.57              0.0517
10      1         10    10        77.3      10.6               0.945 
# ℹ 1,490 more rows
# ℹ 49 more variables: b_SITUATIONother <dbl>, b_SITUATIONtree <dbl>,
#   `b_MONTHJan:SITUATIONnestMbox` <dbl>, `b_MONTHJan:SITUATIONother` <dbl>,
#   `b_MONTHJan:SITUATIONtree` <dbl>, sd_BIRD__Intercept <dbl>,
#   cor_BIRD__Intercept__MONTHJan <dbl>, Intercept <dbl>,
#   `r_BIRD[inside1,Intercept]` <dbl>, `r_BIRD[inside10,Intercept]` <dbl>,
#   `r_BIRD[inside2,Intercept]` <dbl>, `r_BIRD[inside3,Intercept]` <dbl>, …

starling_brm4 |>
  posterior_samples() |>
  as_tibble()

Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.

# A tibble: 1,500 × 100
   b_Intercept b_MONTHJan b_SITUATIONnestMbox b_SITUATIONother b_SITUATIONtree
         <dbl>      <dbl>               <dbl>            <dbl>           <dbl>
 1        77.4       8.54              3.47              -1.51            4.89
 2        79.2       6.84              0.124             -3.65            6.02
 3        77.9      12.6               2.08              -3.93            6.63
 4        78.7       8.53             -0.576             -4.39            4.50
 5        79.1       8.60             -0.460             -5.39            4.78
 6        78.0      10.6               2.38              -3.88            6.31
 7        79.2      10.8               0.482             -4.04            5.48
 8        81.0       9.14             -4.12              -6.16            3.49
 9        78.3       8.57              0.0517            -2.55            3.77
10        77.3      10.6               0.945             -2.19            6.04
# ℹ 1,490 more rows
# ℹ 95 more variables: `b_MONTHJan:SITUATIONnestMbox` <dbl>,
#   `b_MONTHJan:SITUATIONother` <dbl>, `b_MONTHJan:SITUATIONtree` <dbl>,
#   sd_BIRD__Intercept <dbl>, sd_BIRD__MONTHJan <dbl>,
#   cor_BIRD__Intercept__MONTHJan <dbl>, sigma <dbl>, Intercept <dbl>,
#   `r_BIRD[inside1,Intercept]` <dbl>, `r_BIRD[inside10,Intercept]` <dbl>,
#   `r_BIRD[inside2,Intercept]` <dbl>, `r_BIRD[inside3,Intercept]` <dbl>, …

starling_brm4 |>
  bayes_R2(re.form = NA, summary = FALSE) |>
  median_hdci()

          y      ymin      ymax .width .point .interval
1 0.6266656 0.5319229 0.6927965   0.95 median      hdci

starling_brm4 |>
  bayes_R2(re.form = ~ (1 | BIRD), summary = FALSE) |>
  median_hdci()

          y      ymin      ymax .width .point .interval
1 0.6483664 0.5405546 0.7276725   0.95 median      hdci

starling_brm4 |>
  bayes_R2(re.form = ~ (MONTH | BIRD), summary = FALSE) |>
  median_hdci()

          y      ymin      ymax .width .point .interval
1 0.6958443 0.5594972 0.8373179   0.95 median      hdci

starling_brm4 |>
  bayes_R2(re.form = ~ (MONTH + MONTH:SITUATION | BIRD), summary = FALSE) |>
  median_hdci()

          y      ymin      ymax .width .point .interval
1 0.6266656 0.5319229 0.6927965   0.95 median      hdci

starling_brm4 |>
  bayes_R2(re.form = ~ (MONTH * SITUATION | BIRD), summary = FALSE) |>
  median_hdci()

          y      ymin      ymax .width .point .interval
1 0.6266656 0.5319229 0.6927965   0.95 median      hdci

starling_brm4 |> modelsummary(
  statistic = c("conf.low", "conf.high"),
  shape = term ~ statistic,
  exponentiate = FALSE
)

Warning: 
`modelsummary` uses the `performance` package to extract goodness-of-fit
statistics from models of this class. You can specify the statistics you wish
to compute by supplying a `metrics` argument to `modelsummary`, which will then
push it forward to `performance`. Acceptable values are: "all", "common",
"none", or a character vector of metrics names. For example: `modelsummary(mod,
metrics = c("RMSE", "R2")` Note that some metrics are computationally
expensive. See `?performance::performance` for details.
 This warning appears once per session.

	(1)
	Est.	2.5 %	97.5 %
b_Intercept	78.575	76.168	81.033
b_MONTHJan	9.169	5.802	12.592
b_SITUATIONnestMbox	0.641	-2.853	4.184
b_SITUATIONother	-3.306	-6.876	0.183
b_SITUATIONtree	4.758	1.236	8.043
b_MONTHJan × SITUATIONnestMbox	1.609	-3.182	6.348
b_MONTHJan × SITUATIONother	-0.502	-5.262	4.295
b_MONTHJan × SITUATIONtree	-1.819	-6.534	3.108
sd_BIRD__Intercept	1.024	0.065	2.992
sd_BIRD__MONTHJan	2.062	0.105	5.025
cor_BIRD__Intercept__MONTHJan	-0.160	-0.943	0.914
sigma	3.812	2.597	4.701
Num.Obs.	80
R2	0.696
R2 Adj.	0.587
R2 Marg.	0.627
ELPD	-232.0
ELPD s.e.	5.0
LOOIC	464.1
LOOIC s.e.	10.1
WAIC	461.4
RMSE	3.17
r2.adjusted.marginal	0.409469102360111

starling_brm4 |> modelplot(exponentiate = FALSE)

10 Further investigations

brms

starling_brm4 |>
  emmeans(~SITUATION) |>
  pairs()

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

NOTE: Results may be misleading due to involvement in interactions

 contrast            estimate lower.HPD upper.HPD
 inside - (nest-box)    -1.42    -4.207     1.417
 inside - other          3.55     0.758     6.316
 inside - tree          -3.84    -6.593    -0.855
 (nest-box) - other      4.95     2.358     7.865
 (nest-box) - tree      -2.36    -5.168     0.362
 other - tree           -7.36   -10.101    -4.606

Results are averaged over the levels of: MONTH 
Point estimate displayed: median 
HPD interval probability: 0.95

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

NOTE: Results may be misleading due to involvement in interactions

 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -9.01     -10.8     -7.05

Results are averaged over the levels of: SITUATION 
Point estimate displayed: median 
HPD interval probability: 0.95

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

SITUATION = inside:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -9.17     -12.7     -5.98

SITUATION = nest-box:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan   -10.71     -14.3     -7.29

SITUATION = other:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -8.80     -12.2     -4.98

SITUATION = tree:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -7.35     -11.1     -4.02

Point estimate displayed: median 
HPD interval probability: 0.95

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

# A tibble: 4 × 8
  contrast SITUATION .value .lower .upper .width .point .interval
  <chr>    <fct>      <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 Jan/Nov  inside     11.6    7.24   16.2   0.95 median hdci     
2 Jan/Nov  nest-box   13.5    9.17   18.4   0.95 median hdci     
3 Jan/Nov  other      11.7    6.34   16.5   0.95 median hdci     
4 Jan/Nov  tree        8.84   4.60   13.6   0.95 median hdci

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

# A tibble: 6 × 8
# Groups:   SITUATION [1]
  SITUATION .chain .iteration .draw   Nov   Jan   Eff  PEff
  <fct>      <int>      <int> <int> <dbl> <dbl> <dbl> <dbl>
1 inside        NA         NA     1  77.4  85.9  8.54 11.0 
2 inside        NA         NA     2  79.2  86.0  6.84  8.63
3 inside        NA         NA     3  77.9  90.5 12.6  16.2 
4 inside        NA         NA     4  78.7  87.3  8.53 10.8 
5 inside        NA         NA     5  79.1  87.7  8.60 10.9 
6 inside        NA         NA     6  78.0  88.5 10.6  13.5

# A tibble: 4 × 7
  SITUATION  PEff .lower .upper .width .point .interval
  <fct>     <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 inside    11.6    7.24   16.2   0.95 median hdci     
2 nest-box  13.5    9.17   18.4   0.95 median hdci     
3 other     11.7    6.34   16.5   0.95 median hdci     
4 tree       8.84   4.60   13.6   0.95 median hdci

# A tibble: 4 × 3
  SITUATION  Prob `Prob>10`
  <fct>     <dbl>     <dbl>
1 inside    1         0.763
2 nest-box  1         0.941
3 other     0.999     0.728
4 tree      1         0.304

Compare specific sets of SITUATION within each MONTH

levels(starling$SITUATION)

[1] "inside"   "nest-box" "other"    "tree"

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

NOTE: Results may be misleading due to involvement in interactions

 contrast               estimate lower.HPD upper.HPD
 SITUATION.Nat vs Art       4.53     2.376      7.00
 SITUATION.Tree vs I/NB     3.13     0.739      5.57
 SITUATION.Tree vs NB       2.36    -0.362      5.17

Results are averaged over the levels of: MONTH 
Point estimate displayed: median 
HPD interval probability: 0.95

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

NOTE: Results may be misleading due to involvement in interactions

# A tibble: 4,500 × 5
# Groups:   contrast [3]
   contrast             .chain .iteration .draw .value
   <chr>                 <int>      <int> <int>  <dbl>
 1 SITUATION.Nat vs Art     NA         NA     1   3.03
 2 SITUATION.Nat vs Art     NA         NA     2   6.46
 3 SITUATION.Nat vs Art     NA         NA     3   5.83
 4 SITUATION.Nat vs Art     NA         NA     4   5.25
 5 SITUATION.Nat vs Art     NA         NA     5   4.98
 6 SITUATION.Nat vs Art     NA         NA     6   3.42
 7 SITUATION.Nat vs Art     NA         NA     7   5.14
 8 SITUATION.Nat vs Art     NA         NA     8   4.35
 9 SITUATION.Nat vs Art     NA         NA     9   4.98
10 SITUATION.Nat vs Art     NA         NA    10   4.34
# ℹ 4,490 more rows

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

MONTH = Nov:
 contrast               estimate lower.HPD upper.HPD
 SITUATION.Nat vs Art       5.63    2.7219      8.38
 SITUATION.Tree vs I/NB     4.41    1.4919      7.48
 SITUATION.Tree vs NB       4.10    0.7234      7.78

MONTH = Jan:
 contrast               estimate lower.HPD upper.HPD
 SITUATION.Nat vs Art       3.39    0.0497      6.70
 SITUATION.Tree vs I/NB     1.77   -1.4540      5.36
 SITUATION.Tree vs NB       0.72   -3.4886      4.39

Point estimate displayed: median 
HPD interval probability: 0.95

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

# A tibble: 6 × 8
# Groups:   contrast [1]
  contrast             .chain .iteration .draw   Nov    Jan   Eff  PEff
  <fct>                 <int>      <int> <int> <dbl>  <dbl> <dbl> <dbl>
1 SITUATION.Nat vs Art     NA         NA     1  4.23 1.82   -2.41 -57.0
2 SITUATION.Nat vs Art     NA         NA     2  7.20 5.72   -1.48 -20.5
3 SITUATION.Nat vs Art     NA         NA     3  7.25 4.41   -2.84 -39.1
4 SITUATION.Nat vs Art     NA         NA     4  6.15 4.35   -1.80 -29.3
5 SITUATION.Nat vs Art     NA         NA     5  6.73 3.23   -3.50 -52.0
6 SITUATION.Nat vs Art     NA         NA     6  6.81 0.0327 -6.78 -99.5

Bayesian GLMM Part3

1 Preparations

2 Scenario

3 Read in the data

4 Exploratory data analysis

5 Fit the model

6 MCMC sampling diagnostics

7 Model validation

8 Partial effects plots

9 Model investigation

10 Further investigations

11 References