1 Preparations

Load the necessary libraries

library(tidyverse) # for data wrangling etc
library(rstanarm) # for fitting models in STAN
library(cmdstanr) # for cmdstan
library(brms) # for fitting models in STAN
library(standist) # for exploring distributions
library(HDInterval) # for HPD intervals
library(posterior) # for posterior draws
library(coda) # for diagnostics
library(bayesplot) # for diagnostics
library(ggmcmc) # for diagnostics
library(rstan) # for interfacing with STAN
library(DHARMa) # for residual diagnostics
library(emmeans) # for marginal means etc
library(broom) # for tidying outputs
library(broom.mixed) # for tidying MCMC outputs
library(tidybayes) # for more tidying outputs
library(ggeffects) # for partial plots
library(patchwork) # for multiple figures
library(bayestestR) # for ROPE
library(see) # for some plots
library(ggridges) # for ridge plots
library(easystats) # framework for stats, modelling and visualisation
library(modelsummary)
source("helperFunctions.R")

2 Scenario

A plant pathologist wanted to examine the effects of two different strengths of tobacco virus on the number of lesions on tobacco leaves. She knew from pilot studies that leaves were inherently very variable in response to the virus. In an attempt to account for this leaf to leaf variability, both treatments were applied to each leaf. Eight individual leaves were divided in half, with half of each leaf inoculated with weak strength virus and the other half inoculated with strong virus. So the leaves were blocks and each treatment was represented once in each block. A completely randomised design would have had 16 leaves, with 8 whole leaves randomly allocated to each treatment.

Table 1: Format of tobacco.csv data files

LEAF	TREAT	NUMBER
1	Strong	35.898
1	Week	25.02
2	Strong	34.118
2	Week	23.167
3	Strong	35.702
3	Week	24.122
...	...	...

Table 2: Description of the variables in the tobacco data file

LEAF	The blocking factor - Factor B
TREAT	Categorical representation of the strength of the tobacco virus - main factor of interest Factor A
NUMBER	Number of lesions on that part of the tobacco leaf - response variable

3 Read in the data

tobacco <- read_csv("../data/tobacco.csv", trim_ws = TRUE)

Rows: 16 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): LEAF, TREATMENT
dbl (1): NUMBER

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(tobacco)

Rows: 16
Columns: 3
$ LEAF      <chr> "L1", "L1", "L2", "L2", "L3", "L3", "L4", "L4", "L5", "L5", …
$ TREATMENT <chr> "Strong", "Weak", "Strong", "Weak", "Strong", "Weak", "Stron…
$ NUMBER    <dbl> 35.89776, 25.01984, 34.11786, 23.16740, 35.70215, 24.12191, …

## Explore the first 6 rows of the data
head(tobacco)

# A tibble: 6 × 3
  LEAF  TREATMENT NUMBER
  <chr> <chr>      <dbl>
1 L1    Strong      35.9
2 L1    Weak        25.0
3 L2    Strong      34.1
4 L2    Weak        23.2
5 L3    Strong      35.7
6 L3    Weak        24.1

str(tobacco)

spc_tbl_ [16 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ LEAF     : chr [1:16] "L1" "L1" "L2" "L2" ...
 $ TREATMENT: chr [1:16] "Strong" "Weak" "Strong" "Weak" ...
 $ NUMBER   : num [1:16] 35.9 25 34.1 23.2 35.7 ...
 - attr(*, "spec")=
  .. cols(
  ..   LEAF = col_character(),
  ..   TREATMENT = col_character(),
  ..   NUMBER = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

tobacco |> datawizard::data_codebook()

tobacco (16 rows and 3 variables, 3 shown)

ID | Name      | Type      | Missings |         Values |         N
---+-----------+-----------+----------+----------------+----------
1  | LEAF      | character | 0 (0.0%) |             L1 | 2 (12.5%)
   |           |           |          |             L2 | 2 (12.5%)
   |           |           |          |             L3 | 2 (12.5%)
   |           |           |          |             L4 | 2 (12.5%)
   |           |           |          |             L5 | 2 (12.5%)
   |           |           |          |             L6 | 2 (12.5%)
   |           |           |          |             L7 | 2 (12.5%)
   |           |           |          |             L8 | 2 (12.5%)
---+-----------+-----------+----------+----------------+----------
2  | TREATMENT | character | 0 (0.0%) |         Strong | 8 (50.0%)
   |           |           |          |           Weak | 8 (50.0%)
---+-----------+-----------+----------+----------------+----------
3  | NUMBER    | numeric   | 0 (0.0%) | [20.57, 44.72] |        16
------------------------------------------------------------------

tobacco |> modelsummary::datasummary_skim()

	Unique	Missing Pct.	Mean	SD	Min	Median	Max
NUMBER	16	0	31.0	6.5	20.6	33.1	44.7
		N	%
LEAF	L1	2	12.5
	L2	2	12.5
	L3	2	12.5
	L4	2	12.5
	L5	2	12.5
	L6	2	12.5
	L7	2	12.5
	L8	2	12.5
TREATMENT	Strong	8	50.0
	Weak	8	50.0

tobacco |> modelsummary::datasummary_skim(by = "TREATMENT")

	TREATMENT	Unique	Missing Pct.	Mean	SD	Min	Median	Max
NUMBER	Strong	8	0	34.9	5.1	26.2	34.9	44.7
	Weak	8	0	27.1	5.5	20.6	25.1	36.0
		N	%
LEAF	L1	2	12.5
	L2	2	12.5
	L3	2	12.5
	L4	2	12.5
	L5	2	12.5
	L6	2	12.5
	L7	2	12.5
	L8	2	12.5
TREATMENT	Strong	8	50.0
	Weak	8	50.0

4 Exploratory data analysis

To explore the assumptions of homogeneity of variance and normality, a boxplot of each Treatment level is appropriate.

ggplot(tobacco, aes(y = NUMBER, x = TREATMENT)) +
  geom_boxplot()

Conclusions:

both normality and homogeneity of variance seem satisfied

It can also be useful to get a sense of the consistency across blocks (LEAF). That is, do all Leaves have a similar baseline level of lesion susceptibility and do they respond similarly to the treatment.

ggplot(tobacco, aes(y = NUMBER, x = as.numeric(LEAF))) +
  geom_line(aes(linetype = TREATMENT))

## If we want to retain the original LEAF labels
ggplot(tobacco, aes(y = NUMBER, x = as.numeric(LEAF))) +
  geom_blank(aes(x = LEAF)) +
  geom_line(aes(linetype = TREATMENT))

Conclusions:

it is clear that some leaves are more susceptible to lesions (e.g. Leaf 7) than other leaves (e.g. Leaf 4)
most leaves (other than Leaf 4 and 6) have a similar response to the Treatments - that is most have higher number of lesions from the Strong Treatment than the Weak Treatment.

Given that there are only two levels of Treatment (Strong and Weak), it might be easier to visualise the differences in baselines and effect consistency by plotting as:

ggplot(tobacco, aes(y = NUMBER, x = TREATMENT, group = LEAF)) +
  geom_point() +
  geom_line(aes(x = as.numeric(TREATMENT)))

Conclusions:

this figure reiterates the points made earlier about the varying baselines and effect consistency.

The above figure also serves as a good way to visualise certain aspects of mixed effects models. When we fit a mixed effects model that includes a random blocking effect (in this case LEAF), we are indicating that we are allowing there to be a different intercept for each block (LEAF). In the current case, the intercept will represent the first Treatment level (Strong). So the random effect is specifying that the intercept can vary from Leaf to Leaf.

We can think of the model as having two tiers (a hierarchy), where the tiers of the hierarchy represent progressively smaller (typically) spatial scales. In the current example, the largest spatial units are the leaves (blocking factor). Within the leaves, there are the two Treatments (Strong and Weak) and within the Treatments are the individual observations.

From a frequentist perspective, this model might be referred to as a mixed effects model in which the TREATMENT is a ‘fixed’ effect and the LEAF is a ‘random’ effect. Such terminology is inconsistent in a Bayesian context since all parameters are ‘random’. Instead, these would be referred to as population-level and group-level effects respectively. Group-level effects are so called because they are effects that differ within each group (e.g. level of a blocking factor), whereas population effects are those that have been pooled across all groups.

Random intercepts model formula: \[ \begin{align} y_{i,j} &\sim{} \mathcal{N}(\mu_{i,j}, \sigma^2)\\ \mu_{i,j} &=\beta_0 + \bf{Z_j}\boldsymbol{\gamma_j} + \bf{X_i}\boldsymbol{\beta} \\ \beta_0 &\sim{} \mathcal{N}(35, 3)\\ \beta_1 &\sim{} \mathcal{N}(0, 10)\\ \boldsymbol{\gamma_j} &\sim{} \mathcal{N}(0, \sigma^2_l)\\ \sigma^2_l &\sim{} t(3, 0, 5)\\ \sigma^2 &\sim{} t(3, 0, 5)\ \end{align} \]

Random intercept/slope model formula: \[ \begin{align} y_{i,j} &\sim{} \mathcal{N}(\mu_{i,j}, \sigma^2)\\ \mu_{i,j} &=\beta_0 + \bf{Z_j}\boldsymbol{\gamma_j} + \bf{X_i}\boldsymbol{\beta} \\ \beta_0 &\sim{} \mathcal{N}(35, 3)\\ \beta_1 &\sim{} \mathcal{N}(0, 10)\\ \boldsymbol{\gamma_j} &\sim{} \mathcal{N}(0, \boldsymbol{\Sigma})\\ \boldsymbol{\Sigma} &= \boldsymbol{D}({\sigma_l})\boldsymbol{\Omega}\boldsymbol{D}({\sigma_l^2})\\ \boldsymbol{\Omega} &\sim{} LKJ(\zeta)\\ \sigma_l^2 &\sim{} \mathcal{t}(3,0,5)\\ \sigma^2 &\sim{} \mathcal{t}(3, 0, 5)\ \end{align} \]

where:

$\bf{X}$ is the model matrix representing the overall intercept and effects of the treatment on the number of lesions.
$\boldsymbol{\beta}$ is a vector of the population-level effects parameters to be estimated.
$\boldsymbol{\gamma}$ is a vector of the group-level effect parameters
$\bf{Z}$ represents a cell means model matrix for the random intercepts (and possibly random slopes) associated with leaves.
the population-level intercept ($\beta_0$) has a gaussian prior with location of 31 and scale of 10
the population-level effect ($\beta_1$) has a gaussian prior with location of 0 and scale of 10
the group-level effects are assumed to sum-to-zero and be drawn from a gaussian distribution with mean of 0 and covariance of $\Sigma$
$\boldsymbol{\Sigma}$ is the variance-covariance matrix between the groups (individual leaves). It turns out that it is difficult to apply a prior on this covariance matrix, so instead, the covariance matrix is decomposed into a correlation matrix ($\boldsymbol{\Omega}$) and a vector of variances ($\boldsymbol{\sigma_l^2}$) which are the diagonals ($\boldsymbol{D}$) of the covariance matrix.
$\boldsymbol{\Omega}$

\[ \begin{align} \gamma &\sim{} N(0,\Sigma)\\ \Sigma &-> \Omega, \tau\\ \end{align} \]

where $\Sigma$ is a covariance matrix.

It turns out that it is difficult to apply a prior on a covariance matrix, so instead, we decompose the covariance matrix into a correlation matrix and variance.

https://jrnold.github.io/bayesian_notes/appendix.html - Section 20.15.3 Covariance-Correlation Matrix Decomposition

Covariance matrix can be decomposed into a correlation matrix and a vector of variances
The variances can be further decomposed into the product of a simplex vector (which is a probability vector, non-negative and sums to 1) and the trace (product of the order of the matrix and the scale of the scale parameter, also the sum of its diagonal elements) of a matrix. Each element of the simplex vector represents the proportion of the trace that is attributable to the corresponding variable.
A prior on all the above is a decov (decomposition of covariance) function
The prior on the correlation matrix is called LKJ
density is proportional to the determinant of the correlation matrix raised to the power of the positive regularization paramter minus one.
The prior on the simplex vector is a symmetric Dirichlet prior which has a single (positive) concentration parameter (default of 1 implying the prior is jointly uniform over the same of simplex vectors of that size) A symmetric Dirichlet prior is used for the simplex vector. The Dirichlet prior has a single (positive) concentration parameter
The positive scale paramter has a gamma prior (with default shape and scale of 1 - implying a unit-exponential distribution)
alternatively, the lkj prior can be used for covariance.
as with decov, it decomposes into correlation and variances, however the variances are not further decomosed into a simplex vector and trace.
instead the standard deviations (variance squared) for each of the group specific paramters are given half student-t distribution with scale and df paramters specified through the scale (default 10) and df (default 1) arguments of the lkj function.
the lkj prior is similar, yet faster than decov

5 Fit the model

In rstanarm, the default priors are designed to be weakly informative. They are chosen to provide moderate regularlization (to help prevent overfitting) and help stabalise the computations.

tobacco_rstanarm <- stan_glmer(NUMBER ~ (1 | LEAF) + TREATMENT,
  data = tobacco,
  family = gaussian(),
  iter = 5000,
  warmup = 2000,
  chains = 3,
  thin = 5,
  refresh = 0
)

tobacco_rstanarm |> prior_summary()

Priors for model 'tobacco_rstanarm' 
------
Intercept (after predictors centered)
  Specified prior:
    ~ normal(location = 31, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 31, scale = 16)

Coefficients
  Specified prior:
    ~ normal(location = 0, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 0, scale = 32)

Auxiliary (sigma)
  Specified prior:
    ~ exponential(rate = 1)
  Adjusted prior:
    ~ exponential(rate = 0.15)

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details

This tells us:

for the intercept (when the family is Gaussian), a normal prior with a mean of 31 and a standard deviation of 16 is used. The 2.5 is used for scaling all parameter standard deviations. The value of 31 is based on the mean of the response (mean(tobacco$NUMBER)) and the scaled standard deviation of 16 is based on multiplying the scaling factor by the standard deviation of the response.

2.5 * sd(tobacco$NUMBER)

[1] 16.35115

for the coefficients (in this case, just the difference between strong and weak innoculation), the default prior is a normal prior centered around 0 with a standard deviation of 2.5. This is then adjusted for the scale of the data by multiplying the 2.5 by the ratio of the standard deviation of the response by the stanard devation of the numerical dummy variables for the predictor (then rounded).

2.5 * sd(tobacco$NUMBER) / apply(model.matrix(~TREATMENT, tobacco), 2, sd)

  (Intercept) TREATMENTWeak 
          Inf      31.66386

the auxillary prior represents whatever additional prior is required for the nominated model. In the case of a Gaussian model, this will be $\sigma$, for negative binomial, it will be the reciprocal of dispersion, for gamma, it will be shape, etc . By default in rstanarm, this is a exponential with a rate of 1 which is then adjusted by devision with the standard deviation of the response.

1 / sd(tobacco$NUMBER)

[1] 0.1528945

Lets now run with priors only so that we can explore the range of values they allow in the posteriors.

tobacco_rstanarm1 <- update(tobacco_rstanarm, prior_PD = TRUE)

ggpredict(tobacco_rstanarm1) |> plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Conclusions:

we see that the range of predictions is faily wide and the predicted means could range from a small negative number to a relatively large positive number.

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucket out of thin air):

$\beta_0$: normal centered at 35 with a standard deviation of 7
- mean of 33: since median(tobacco$NUMBER)
- sd of 7: since mad(tobacco$NUMBER)
$\beta_1$: normal centred at 0 with a standard deviation of 13
- sd of 13: since sd(tobacco$NUMBER) / apply(model.matrix(~TREATMENT, tobacco), 2, sd)
$\sigma$: exponential with rate of 0.15
- since 1 / sd(tobacco$NUMBER)
$\Sigma$: decov with:
- regularization: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
- concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
- shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

I will also overlay the raw data for comparison.

tobacco_rstanarm2 <- stan_glmer(NUMBER ~ (1 | LEAF) + TREATMENT,
  data = tobacco,
  family = gaussian(),
  prior_intercept = normal(35, 7, autoscale = FALSE),
  prior = normal(0, 13, autoscale = FALSE),
  prior_aux = rstanarm::exponential(0.15, autoscale = FALSE),
  prior_covariance = decov(1, 1, 1, 1),
  prior_PD = TRUE,
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0
)

tobacco_rstanarm2 |>
  ggpredict() |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Now lets refit, conditioning on the data.

tobacco_rstanarm3 <- update(tobacco_rstanarm2, prior_PD = FALSE)

posterior_vs_prior(tobacco_rstanarm3,
  color_by = "vs", group_by = TRUE,
  facet_args = list(scales = "free_y")
)


Drawing from prior...

Conclusions:

in each case, the prior is substantially wider than the posterior, suggesting that the posterior is not biased towards the prior.

ggemmeans(tobacco_rstanarm3, ~TREATMENT) |> plot(show_data = TRUE)

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

ggpredict(tobacco_rstanarm3, ~TREATMENT) |> plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

In brms, the default priors are designed to be weakly informative. They are chosen to provide moderate regularlization (to help prevent overfitting) and help stabalise the computations.

Unlike rstanarm, brms models must be compiled before they start sampling. For most models, the compilation of the stan code takes around 45 seconds.

tobacco_form <- bf(NUMBER ~ (1|LEAF) + TREATMENT,
                   family = gaussian()
                   )
options(width=100)
tobacco_form |> get_prior(data=tobacco)

                   prior     class          coef group resp dpar nlpar lb ub       source
                  (flat)         b                                                default
                  (flat)         b TREATMENTWeak                             (vectorized)
 student_t(3, 33.1, 6.5) Intercept                                                default
    student_t(3, 0, 6.5)        sd                                      0         default
    student_t(3, 0, 6.5)        sd                LEAF                  0    (vectorized)
    student_t(3, 0, 6.5)        sd     Intercept  LEAF                  0    (vectorized)
    student_t(3, 0, 6.5)     sigma                                      0         default

## tobacco_brm <- brm(tobacco_form,
##                   iter = 5000,
##                   warmup = 1000,
##                   chains = 3,
##                   thin = 5,
##                   refresh = 0)

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucket out of thin air):

$\beta_0$: normal centered at 35 with a standard deviation of 3
- mean of 35: since with(tobacco, tapply(NUMBER, TREATMENT, FUN = median))
- sd of 3: since with(tobacco, tapply(NUMBER, TREATMENT, FUN = mad))
$\beta_1$: normal centred at 0 with a standard deviation of 13
- sd of 10: since mad(tobacco$NUMBER) / apply(model.matrix(~TREATMENT, tobacco), 2, mad)
$\sigma$: half-cauchy with parameters 0 and 5
- since with(tobacco, tapply(NUMBER, TREATMENT, FUN = mad))
$\sigma_j$: half-cauchy with parameters 0 and 3
- since with(tobacco, tapply(NUMBER, TREATMENT, FUN = mad))
- we want this prior to have most mass close to zero for the purpose of regularisation
$\Sigma$: decov with:
- regularization: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
- concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
- shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

Note, for hierarchical models, the model will tend to want to have a large $sigma$ in order to fit the data better. It is a good idea to regularise this tendency by applying a prior that has most mass around zero. Suitable candidates include:

half-t: as the degrees of freedom approach infinity, this will approach a half-normal
half-cauchy: this is essentially a half-t with 1 degree of freedom
exponential

I will also overlay the raw data for comparison.

tobacco |>
  group_by(TREATMENT) |>
  summarise(
    median(NUMBER),
    mad(NUMBER)
  )

# A tibble: 2 × 3
  TREATMENT `median(NUMBER)` `mad(NUMBER)`
  <fct>                <dbl>         <dbl>
1 Strong                34.9          2.68
2 Weak                  25.1          3.54

standist::visualize("normal(35,3)", xlim = c(-10, 100))

standist::visualize("normal(0, 10)", xlim = c(-20, 20))

standist::visualize(
  "student_t(3,0,10)",
  xlim = c(-30, 50)
)

priors <- prior(normal(35, 3), class = "Intercept") +
  prior(normal(0, 10), class = "b") +
  prior(student_t(3, 0, 5), class = "sigma") +
  prior(student_t(3, 0, 5), class = "sd")
tobacco_form <- bf(NUMBER ~ (1 | LEAF) + TREATMENT,
  family = gaussian()
)
tobacco_brm2 <- brm(tobacco_form,
  data = tobacco,
  prior = priors,
  sample_prior = "only",
  iter = 5000,
  warmup = 2500,
  chains = 3, cores = 3,
  thin = 5,
  refresh = 0,
  backend = "rstan"
)

Compiling Stan program...

Start sampling

Note in the above model, the output may have included a warning message alerting us the presence of divergent transitions. Divergent transitions are an indication that the sampler has encountered poor sampling conditions - the more divergent transitions, the more severe the issue.

Typically, divergent transitions are the result of either:

a miss-specified model
priors that permit the sampler to drift into unsupported areas
complex posterior “features” (with high degrees of curvature) for which the sampler was inadequately tuned during the warmup phase

One useful way to diagnose the cause of divergent transitions is to explore a parallel coordinates plot. Each parameter is on the x-axis and each line represents a single MCMC draw. Divergent transitions will be highlighted in red (by default). If lines pinch together at a particular parameter, then it points to that dimension as the cause of divergent transitions.

tobacco_np <- nuts_params(tobacco_brm2)
tobacco_mcmc <- as.array(tobacco_brm2)
mcmc_parcoord(x = tobacco_mcmc, np = tobacco_np) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

tobacco_brm2 |> mcmc_parcoord(np = tobacco_np) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

tobacco_brm2 |> mcmc_parcoord(regex_pars = "^b.*|^r.*") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Accordingly, these divergent transitions can be addressed by either:

reviewing the model structure
adopting tighter priors
increase the adaptive delta from the default of 0.8 to closer to 1. The adaptive delta defines the average acceptance probability that the sampler should aspire to during the warmup phase. Increasing the adaptive delta results in a smaller step size (and thus fewer divergences and more robust samples) however it will also result in slower sampling speeds.

tobacco_brm2 |>
  conditional_effects() |>
  plot(points = TRUE)

tobacco_brm2 |>
  ggpredict() |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

tobacco_brm3 <- update(tobacco_brm2,
  sample_prior = "yes",
  control = list(adapt_delta = 0.99, max_treedepth = 20),
  refresh = 0, cores = 3
)

The desired updates require recompiling the model

Compiling Stan program...

Start sampling

tobacco_brm3 |>
  conditional_effects() |>
  plot(points = TRUE)

tobacco_brm3 |>
  ggpredict() |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

tobacco_brm3 |> get_variables()

 [1] "b_Intercept"          "b_TREATMENTWeak"      "sd_LEAF__Intercept"  
 [4] "sigma"                "Intercept"            "r_LEAF[L1,Intercept]"
 [7] "r_LEAF[L2,Intercept]" "r_LEAF[L3,Intercept]" "r_LEAF[L4,Intercept]"
[10] "r_LEAF[L5,Intercept]" "r_LEAF[L6,Intercept]" "r_LEAF[L7,Intercept]"
[13] "r_LEAF[L8,Intercept]" "prior_Intercept"      "prior_b"             
[16] "prior_sigma"          "prior_sd_LEAF"        "lprior"              
[19] "lp__"                 "accept_stat__"        "stepsize__"          
[22] "treedepth__"          "n_leapfrog__"         "divergent__"         
[25] "energy__"

tobacco_brm3 |>
  hypothesis("TREATMENTWeak=0") |>
  plot()

tobacco_brm3 |> SUYR_prior_and_posterior()

While we are here, we might like to explore a random intercept/slope model

priors <- prior(normal(35, 3), class = "Intercept") +
  prior(normal(0, 10), class = "b") +
  prior(student_t(3, 0, 5), class = "sigma") +
  prior(student_t(3, 0, 5), class = "sd") +
  prior(lkj_corr_cholesky(1), class = "cor")
tobacco_form <- bf(NUMBER ~ (TREATMENT | LEAF) + TREATMENT,
  family = gaussian()
)

tobacco_brm4 <- brm(tobacco_form,
  data = tobacco,
  prior = priors,
  sample_prior = "yes",
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0,
  control = list(adapt_delta = 0.99),
  backend = "rstan"
)

Compiling Stan program...

Start sampling

Warning: There were 37 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.

Warning: Examine the pairs() plot to diagnose sampling problems

Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess

Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess

tobacco_brm4 |> get_variables()

 [1] "b_Intercept"                        "b_TREATMENTWeak"                   
 [3] "sd_LEAF__Intercept"                 "sd_LEAF__TREATMENTWeak"            
 [5] "cor_LEAF__Intercept__TREATMENTWeak" "sigma"                             
 [7] "Intercept"                          "r_LEAF[L1,Intercept]"              
 [9] "r_LEAF[L2,Intercept]"               "r_LEAF[L3,Intercept]"              
[11] "r_LEAF[L4,Intercept]"               "r_LEAF[L5,Intercept]"              
[13] "r_LEAF[L6,Intercept]"               "r_LEAF[L7,Intercept]"              
[15] "r_LEAF[L8,Intercept]"               "r_LEAF[L1,TREATMENTWeak]"          
[17] "r_LEAF[L2,TREATMENTWeak]"           "r_LEAF[L3,TREATMENTWeak]"          
[19] "r_LEAF[L4,TREATMENTWeak]"           "r_LEAF[L5,TREATMENTWeak]"          
[21] "r_LEAF[L6,TREATMENTWeak]"           "r_LEAF[L7,TREATMENTWeak]"          
[23] "r_LEAF[L8,TREATMENTWeak]"           "prior_Intercept"                   
[25] "prior_b"                            "prior_sigma"                       
[27] "prior_sd_LEAF"                      "prior_cor_LEAF"                    
[29] "lprior"                             "lp__"                              
[31] "accept_stat__"                      "stepsize__"                        
[33] "treedepth__"                        "n_leapfrog__"                      
[35] "divergent__"                        "energy__"

tobacco_brm4 |>
  hypothesis("TREATMENTWeak=0") |>
  plot()

tobacco_brm4 |> SUYR_prior_and_posterior()

tobacco_brm4 |>
  posterior_samples() |>
  dplyr::select(-`lp__`) |>
  pivot_longer(everything(), names_to = "key") |>
  filter(!str_detect(key, "^r")) |>
  mutate(
    Type = ifelse(str_detect(key, "prior"), "Prior", "Posterior"),
    ## Class = ifelse(str_detect(key, 'Intercept'),  'Intercept',
    ##         ifelse(str_detect(key, 'b'),  'b', 'sigma')),
    Class = case_when(
      str_detect(key, "(^b|^prior).*Intercept$") ~ "Intercept",
      str_detect(key, "b_TREATMENT|prior_b") ~ "TREATMENT",
      str_detect(key, "sd") ~ "sd",
      str_detect(key, "^cor|prior_cor") ~ "cor",
      str_detect(key, "sigma") ~ "sigma"
    ),
    Par = str_replace(key, "b_", "")
  ) |>
  ggplot(aes(x = Type, y = value, color = Par)) +
  stat_pointinterval(position = position_dodge()) +
  facet_wrap(~Class, scales = "free")

Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.

We can compare the two models using LOO

(l.1 <- tobacco_brm3 |> loo())

Warning: Found 7 observations with a pareto_k > 0.69 in model 'tobacco_brm3'.
We recommend to run more iterations to get at least about 2200 posterior draws
to improve LOO-CV approximation accuracy.


Computed from 1500 by 16 log-likelihood matrix.

         Estimate  SE
elpd_loo    -50.2 2.5
p_loo         6.6 1.4
looic       100.4 5.1
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.0]).

Pareto k diagnostic values:
                          Count Pct.    Min. ESS
(-Inf, 0.69]   (good)     9     56.2%   325     
   (0.69, 1]   (bad)      7     43.8%   <NA>    
    (1, Inf)   (very bad) 0      0.0%   <NA>    
See help('pareto-k-diagnostic') for details.

(l.2 <- tobacco_brm4 |> loo())

Warning: Found 3 observations with a pareto_k > 0.7 in model 'tobacco_brm4'. We
recommend to set 'moment_match = TRUE' in order to perform moment matching for
problematic observations.


Computed from 2400 by 16 log-likelihood matrix.

         Estimate  SE
elpd_loo    -49.1 2.5
p_loo        10.0 1.8
looic        98.3 5.0
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.0, 0.1]).

Pareto k diagnostic values:
                         Count Pct.    Min. ESS
(-Inf, 0.7]   (good)     13    81.2%   10      
   (0.7, 1]   (bad)       3    18.8%   <NA>    
   (1, Inf)   (very bad)  0     0.0%   <NA>    
See help('pareto-k-diagnostic') for details.

loo_compare(l.1, l.2)

             elpd_diff se_diff
tobacco_brm4  0.0       0.0   
tobacco_brm3 -1.1       0.7

Whilst the random intercept/slope has a slightly smaller looic, when we consider the standard error (or rather an interval of +- 2.5xSE), there is no support for this more complex model over the simpler random intercept only model. Consequently, we will continue with the random intercept model.

6 MCMC sampling diagnostics

brms

The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.

See list of available diagnostics by name

available_mcmc()

bayesplot MCMC module:
  mcmc_acf
  mcmc_acf_bar
  mcmc_areas
  mcmc_areas_ridges
  mcmc_combo
  mcmc_dens
  mcmc_dens_chains
  mcmc_dens_overlay
  mcmc_hex
  mcmc_hist
  mcmc_hist_by_chain
  mcmc_intervals
  mcmc_neff
  mcmc_neff_hist
  mcmc_nuts_acceptance
  mcmc_nuts_divergence
  mcmc_nuts_energy
  mcmc_nuts_stepsize
  mcmc_nuts_treedepth
  mcmc_pairs
  mcmc_parcoord
  mcmc_rank_ecdf
  mcmc_rank_hist
  mcmc_rank_overlay
  mcmc_recover_hist
  mcmc_recover_intervals
  mcmc_recover_scatter
  mcmc_rhat
  mcmc_rhat_hist
  mcmc_scatter
  mcmc_trace
  mcmc_trace_highlight
  mcmc_violin

Of these, we will focus on:

trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).

tobacco_brm3 |> mcmc_plot(type = "trace")

No divergences to plot.

The chains appear well mixed and very similar

acf_bar (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain

tobacco_brm3 |> mcmc_plot(type = "acf_bar")

There is no evidence of autocorrelation in the MCMC samples

rhat_hist: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.

tobacco_brm3 |> mcmc_plot(type = "rhat_hist")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

neff_hist (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

If the ratios are low, tightening the priors may help.

tobacco_brm2 |> mcmc_plot(type = "neff_hist")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

More diagnostics

tobacco_brm3 |> mcmc_plot(type = "combo")

tobacco_brm3 |> mcmc_plot(type = "violin")

The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.

Of these, we will focus on:

stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).

tobacco_brm3 |> get_variables()

 [1] "b_Intercept"          "b_TREATMENTWeak"      "sd_LEAF__Intercept"  
 [4] "sigma"                "Intercept"            "r_LEAF[L1,Intercept]"
 [7] "r_LEAF[L2,Intercept]" "r_LEAF[L3,Intercept]" "r_LEAF[L4,Intercept]"
[10] "r_LEAF[L5,Intercept]" "r_LEAF[L6,Intercept]" "r_LEAF[L7,Intercept]"
[13] "r_LEAF[L8,Intercept]" "prior_Intercept"      "prior_b"             
[16] "prior_sigma"          "prior_sd_LEAF"        "lprior"              
[19] "lp__"                 "accept_stat__"        "stepsize__"          
[22] "treedepth__"          "n_leapfrog__"         "divergent__"         
[25] "energy__"

pars <- tobacco_brm3 |> get_variables()
pars <- str_extract(pars, "^b_.*|^sigma$|^sd.*") |> na.omit()

tobacco_brm3$fit |> stan_trace(pars = pars)

The chains appear well mixed and very similar

stan_acf (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain

tobacco_brm3$fit |>
  stan_ac(pars = pars)

There is no evidence of autocorrelation in the MCMC samples

stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.

tobacco_brm3$fit |> stan_rhat()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

If the ratios are low, tightening the priors may help.

tobacco_brm2$fit |> stan_ess()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

tobacco_brm3$fit |>
  stan_dens(separate_chains = TRUE, pars = pars)

The ggmean package also as a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.

Of these, we will focus on:

ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).

tobacco_ggs <- tobacco_brm3 |> ggs(burnin = FALSE, inc_warmup = FALSE)

Warning in custom.sort(D$Parameter): NAs introduced by coercion
Warning in custom.sort(D$Parameter): NAs introduced by coercion

tobacco_ggs |> ggs_traceplot()

The chains appear well mixed and very similar

gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain

ggs_autocorrelation(tobacco_ggs)

There is no evidence of autocorrelation in the MCMC samples

stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.

ggs_Rhat(tobacco_ggs)

All Rhat values are below 1.05, suggesting the chains have converged.

stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

If the ratios are low, tightening the priors may help.

ggs_effective(tobacco_ggs)

Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.
ℹ The deprecated feature was likely used in the ggmcmc package.
  Please report the issue at <https://github.com/xfim/ggmcmc/issues/>.

Ratios all very high.

More diagnostics

ggs_crosscorrelation(tobacco_ggs)

ggs_grb(tobacco_ggs)

Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.

See list of available diagnostics by name

available_ppc()

bayesplot PPC module:
  ppc_bars
  ppc_bars_grouped
  ppc_boxplot
  ppc_dens
  ppc_dens_overlay
  ppc_dens_overlay_grouped
  ppc_dots
  ppc_ecdf_overlay
  ppc_ecdf_overlay_grouped
  ppc_error_binned
  ppc_error_hist
  ppc_error_hist_grouped
  ppc_error_scatter
  ppc_error_scatter_avg
  ppc_error_scatter_avg_grouped
  ppc_error_scatter_avg_vs_x
  ppc_freqpoly
  ppc_freqpoly_grouped
  ppc_hist
  ppc_intervals
  ppc_intervals_grouped
  ppc_km_overlay
  ppc_km_overlay_grouped
  ppc_loo_intervals
  ppc_loo_pit_ecdf
  ppc_loo_pit_overlay
  ppc_loo_pit_qq
  ppc_loo_ribbon
  ppc_pit_ecdf
  ppc_pit_ecdf_grouped
  ppc_ribbon
  ppc_ribbon_grouped
  ppc_rootogram
  ppc_scatter
  ppc_scatter_avg
  ppc_scatter_avg_grouped
  ppc_stat
  ppc_stat_2d
  ppc_stat_freqpoly
  ppc_stat_freqpoly_grouped
  ppc_stat_grouped
  ppc_violin_grouped

dens_overlay: plots the density distribution of the observed data (black line) overlayed ontop of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.

tobacco_brm3 |> pp_check(type = "dens_overlay", ndraws = 100)

The model draws appear to be consistent with the observed data.

error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. Note, this is not really that useful for models that involve a binomial response

# tobacco_brm3 |> pp_check(type = 'error_scatter_avg')

This is not really interpretable

intervals: plots the observed data overlayed ontop of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.

tobacco_brm3 |> pp_check(group = "TREATMENT", type = "intervals")

Using all posterior draws for ppc type 'intervals' by default.

Warning: The following arguments were unrecognized and ignored: group

tobacco_brm3 |> pp_check(group = "TREATMENT", type = "intervals_grouped")

Using all posterior draws for ppc type 'intervals_grouped' by default.

tobacco_brm3 |> pp_check(group = "TREATMENT", type = "violin_grouped")

Using all posterior draws for ppc type 'violin_grouped' by default.

The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.

# library(shinystan)
# launch_shinystan(tobacco_brm2)

DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components ourself, we can still obtain the simulated residuals from the fitted stan model.

We need to supply:

simulated (predicted) responses associated with each observation.
observed values
fitted (predicted) responses (averaged) associated with each observation

preds <- tobacco_brm4 |> posterior_predict(ndraws = 250, summary = FALSE)
tobacco_resids <- createDHARMa(
  simulatedResponse = t(preds),
  observedResponse = tobacco$NUMBER,
  fittedPredictedResponse = apply(preds, 2, median),
  integerResponse = FALSE
)
plot(tobacco_resids, quantreg = FALSE)

tobacco_resids <- make_brms_dharma_res(tobacco_brm3, integerResponse = FALSE)
wrap_elements(~ testUniformity(tobacco_resids)) +
  wrap_elements(~ plotResiduals(tobacco_resids, form = factor(rep(1, nrow(tobacco))))) +
  wrap_elements(~ plotResiduals(tobacco_resids, quantreg = FALSE)) +
  wrap_elements(~ testDispersion(tobacco_resids))

Conclusions:

the simulated residuals do not suggest any issues with the residuals
there is no evidence of a lack of fit.

7 Partial effects plots

brms

tobacco_brm3 |>
  conditional_effects() |>
  plot(points = TRUE)

tobacco_brm3 |>
  ggpredict() |>
  plot(show_data = TRUE)

Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

tobacco_brm3 |>
  ggemmeans(~TREATMENT) |>
  plot(show_data = TRUE) +
  geom_point(data = tobacco, aes(y = NUMBER, x = as.numeric(TREATMENT)))

Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Partial.obs <- tobacco_brm3$data |>
  mutate(
    Pred = predict(tobacco_brm3)[, "Estimate"],
    Resid = resid(tobacco_brm3)[, "Estimate"],
    Obs = Pred + Resid
  )

tobacco_brm3 |>
  fitted_draws(newdata = tobacco) |>
  median_hdci() |>
  ggplot(aes(x = TREATMENT, y = .value)) +
  geom_pointrange(aes(ymin = .lower, ymax = .upper)) +
  geom_line() +
  geom_point(
    data = Partial.obs, aes(y = Obs, x = TREATMENT), color = "red",
    position = position_nudge(x = 0.1)
  ) +
  geom_point(
    data = tobacco, aes(y = NUMBER, x = TREATMENT),
    position = position_nudge(x = 0.05)
  )

Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
  means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
  named `object` and the `n` parameter is now named `ndraws`.

tobacco_brm3 |>
  epred_draws(newdata = tobacco) |>
  ggplot() +
  geom_violin(data = tobacco, aes(y = NUMBER, x = TREATMENT), fill = "blue", alpha = 0.2) +
  geom_point(
    data = tobacco, aes(y = NUMBER, x = TREATMENT),
    position = position_jitter(width = 0.1, height = 0)
  ) +
  geom_violin(aes(y = .epred, x = TREATMENT), fill = "orange", alpha = 0.2) +
  theme_bw()

8 Model investigation

brms

The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).

tobacco_brm3 |> summary()

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: NUMBER ~ (1 | LEAF) + TREATMENT 
   Data: tobacco (Number of observations: 16) 
  Draws: 3 chains, each with iter = 5000; warmup = 2500; thin = 5;
         total post-warmup draws = 1500

Multilevel Hyperparameters:
~LEAF (Number of levels: 8) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     3.53      1.90     0.27     7.61 1.00      947     1250

Regression Coefficients:
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept        35.73      1.87    32.12    39.78 1.00     1473     1457
TREATMENTWeak    -7.52      2.20   -11.73    -2.98 1.00     1244     1385

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     4.40      1.15     2.61     7.10 1.00     1207     1312

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Conclusions:

the intercept indicates that the Strong treatment has an average of 35.73 lesions.
the Weak treatment has 7.52 fewer lesions.
the variance in intercepts across all Leaves is 3.53
the scale of variance between Leaves is very similar to the variance within Leaves (sigma, 4.4).

tobacco_brm3 |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    N = ~ length(.x),
    Pl = ~ mean(.x < 0),
    Pg = ~ mean(.x > 0),
    rhat,
    ess_bulk,
    ess_tail
  )

# A tibble: 19 × 10
   variable    median    lower  upper     N     Pl    Pg  rhat ess_bulk ess_tail
   <chr>        <dbl>    <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>    <dbl>    <dbl>
 1 b_Interce…  35.7    3.20e+1  39.5   1500 0      1     1.00     1473.    1457.
 2 b_TREATME…  -7.58  -1.15e+1  -2.86  1500 0.998  0.002 1.00     1244.    1385.
 3 sd_LEAF__…   3.45   5.68e-3   6.87  1500 0      1     1.00      947.    1250.
 4 sigma        4.26   2.39e+0   6.61  1500 0      1     0.999    1207.    1312.
 5 Intercept   31.9    2.90e+1  35.2   1500 0      1     1.00     1475.    1290.
 6 r_LEAF[L1…  -0.631 -5.21e+0   4.30  1500 0.638  0.362 0.998    1525.    1543.
 7 r_LEAF[L2…  -1.48  -7.33e+0   2.68  1500 0.753  0.247 1.00     1335.    1321.
 8 r_LEAF[L3…  -0.619 -5.92e+0   3.54  1500 0.653  0.347 1.00     1445.    1457.
 9 r_LEAF[L4…  -2.29  -8.18e+0   2.12  1500 0.82   0.18  1.00     1367.    1312.
10 r_LEAF[L5…  -1.31  -7.00e+0   2.98  1500 0.739  0.261 1.00     1131.    1393.
11 r_LEAF[L6…   1.87  -1.99e+0   7.54  1500 0.187  0.813 1.00     1336.    1538.
12 r_LEAF[L7…   3.91  -7.27e-1   9.60  1500 0.0767 0.923 0.999    1256.    1458.
13 r_LEAF[L8…  -2.55  -8.36e+0   2.05  1500 0.855  0.145 1.00     1221.    1384.
14 prior_Int…  34.9    2.89e+1  40.7   1500 0      1     1.000    1583.    1497.
15 prior_b      0.205 -1.99e+1  20.5   1500 0.493  0.507 1.000    1474.    1403.
16 prior_sig…   3.77   1.19e-2  15.4   1500 0      1     1.01     1550.    1338.
17 prior_sd_…   3.89   2.36e-4  14.5   1500 0      1     0.999    1485.    1501.
18 lprior     -10.8   -1.20e+1  -9.88  1500 1      0     1.000    1544.    1499.
19 lp__       -64.8   -7.30e+1 -58.2   1500 1      0     1.00      883.    1269.

## or if you want to exclude some parameters
tobacco_brm3 |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    rhat,
    ess_bulk,
    ess_tail
  ) |>
  filter(str_detect(variable, "prior|^r_|^lp__", negate = TRUE))

# A tibble: 5 × 7
  variable           median     lower upper  rhat ess_bulk ess_tail
  <chr>               <dbl>     <dbl> <dbl> <dbl>    <dbl>    <dbl>
1 b_Intercept         35.7   32.0     39.5  1.00     1473.    1457.
2 b_TREATMENTWeak     -7.58 -11.5     -2.86 1.00     1244.    1385.
3 sd_LEAF__Intercept   3.45   0.00568  6.87 1.00      947.    1250.
4 sigma                4.26   2.39     6.61 0.999    1207.    1312.
5 Intercept           31.9   29.0     35.2  1.00     1475.    1290.

tobacco_brm3 |> as_draws_df()

# A draws_df: 500 iterations, 3 chains, and 19 variables
   b_Intercept b_TREATMENTWeak sd_LEAF__Intercept sigma Intercept
1           35            -6.1               0.11   4.7        32
2           37            -9.1               2.79   5.4        33
3           34            -5.9               6.78   3.9        31
4           36            -6.8               0.53   6.2        33
5           38            -8.6               3.65   3.4        34
6           37            -8.8               7.99   4.3        32
7           34            -6.9               4.66   4.0        31
8           39           -13.8               5.01   5.4        32
9           33            -6.1               5.61   4.0        30
10          37            -8.1               8.05   2.9        33
   r_LEAF[L1,Intercept] r_LEAF[L2,Intercept] r_LEAF[L3,Intercept]
1                0.0324                0.228               -0.016
2               -1.8753               -0.886               -2.600
3               -1.8856               -0.994                7.809
4                0.0079               -0.555               -0.258
5               -4.3540               -4.230               -2.501
6               -5.9043               -0.017               -0.988
7                1.6895                0.859               -2.292
8                3.0922               -3.153                1.685
9               -1.3981               -0.232                1.927
10              -3.2394               -4.950               -2.209
# ... with 1490 more draws, and 11 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}

tobacco_brm3 |>
  as_draws_df() |>
  dplyr::select(matches("^b_.*|^sigma$|^sd_.*")) |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    Pg = ~ mean(.x > 0),
    Pl = ~ mean(.x < 0),
    rhat,
    ess_bulk,
    ess_tail
  )

Warning: Dropping 'draws_df' class as required metadata was removed.

# A tibble: 4 × 9
  variable           median     lower upper    Pg    Pl  rhat ess_bulk ess_tail
  <chr>               <dbl>     <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
1 b_Intercept         35.7   32.0     39.5  1     0     1.00     1461.    1425.
2 b_TREATMENTWeak     -7.58 -11.5     -2.86 0.002 0.998 1.00     1218.    1366.
3 sd_LEAF__Intercept   3.45   0.00568  6.87 1     0     1.00      934.    1241.
4 sigma                4.26   2.39     6.61 1     0     1.000    1199.    1287.

## or if you want to exclude some parameters
tobacco_brm3 |>
  as_draws_df() |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    rhat,
    ess_bulk,
    ess_tail
  ) |>
  filter(str_detect(variable, "prior|^r_|^lp__", negate = TRUE))

# A tibble: 5 × 7
  variable           median     lower upper  rhat ess_bulk ess_tail
  <chr>               <dbl>     <dbl> <dbl> <dbl>    <dbl>    <dbl>
1 b_Intercept         35.7   32.0     39.5  1.00     1473.    1457.
2 b_TREATMENTWeak     -7.58 -11.5     -2.86 1.00     1244.    1385.
3 sd_LEAF__Intercept   3.45   0.00568  6.87 1.00      947.    1250.
4 sigma                4.26   2.39     6.61 0.999    1207.    1312.
5 Intercept           31.9   29.0     35.2  1.00     1475.    1290.

tobacco_brm3$fit |>
  tidyMCMC(
    estimate.method = "median",
    conf.int = TRUE, conf.method = "HPDinterval",
    rhat = TRUE, ess = TRUE
  )

# A tibble: 18 × 7
   term                 estimate std.error   conf.low conf.high  rhat   ess
   <chr>                   <dbl>     <dbl>      <dbl>     <dbl> <dbl> <int>
 1 b_Intercept            35.7       1.87   32.0          39.5  0.999  1476
 2 b_TREATMENTWeak        -7.52      2.20  -11.5          -2.86 0.998  1219
 3 sd_LEAF__Intercept      3.53      1.90    0.00568       6.87 0.999   981
 4 sigma                   4.40      1.15    2.39          6.61 0.999  1214
 5 Intercept              32.0       1.53   29.0          35.2  1.000  1440
 6 r_LEAF[L1,Intercept]   -0.836     2.31   -5.21          4.30 0.998  1509
 7 r_LEAF[L2,Intercept]   -1.77      2.57   -7.33          2.68 1.00   1361
 8 r_LEAF[L3,Intercept]   -0.955     2.33   -5.92          3.54 1.00   1440
 9 r_LEAF[L4,Intercept]   -2.57      2.76   -8.18          2.12 1.00   1341
10 r_LEAF[L5,Intercept]   -1.61      2.49   -7.00          2.98 1.00   1083
11 r_LEAF[L6,Intercept]    2.17      2.49   -1.99          7.54 0.999  1341
12 r_LEAF[L7,Intercept]    3.95      2.94   -0.727         9.60 0.999  1262
13 r_LEAF[L8,Intercept]   -2.85      2.77   -8.36          2.05 0.999  1218
14 prior_Intercept        34.9       3.09   28.9          40.7  0.999  1594
15 prior_b                 0.121    10.2   -19.9          20.5  1.000  1470
16 prior_sigma             5.31      5.88    0.0119       15.4  1.00   1393
17 prior_sd_LEAF           5.34      5.60    0.000236     14.5  0.999  1469
18 lprior                -10.9       0.586 -12.0          -9.88 1.000  1532

Conclusions:

see above

tobacco_brm3 |> get_variables()

 [1] "b_Intercept"          "b_TREATMENTWeak"      "sd_LEAF__Intercept"  
 [4] "sigma"                "Intercept"            "r_LEAF[L1,Intercept]"
 [7] "r_LEAF[L2,Intercept]" "r_LEAF[L3,Intercept]" "r_LEAF[L4,Intercept]"
[10] "r_LEAF[L5,Intercept]" "r_LEAF[L6,Intercept]" "r_LEAF[L7,Intercept]"
[13] "r_LEAF[L8,Intercept]" "prior_Intercept"      "prior_b"             
[16] "prior_sigma"          "prior_sd_LEAF"        "lprior"              
[19] "lp__"                 "accept_stat__"        "stepsize__"          
[22] "treedepth__"          "n_leapfrog__"         "divergent__"         
[25] "energy__"

tobacco_draw <- tobacco_brm3 |>
  gather_draws(`b.Intercept.*|b_TREAT.*|sd_.*|sigma`, regex = TRUE)
tobacco_draw

# A tibble: 6,000 × 5
# Groups:   .variable [4]
   .chain .iteration .draw .variable   .value
    <int>      <int> <int> <chr>        <dbl>
 1      1          1     1 b_Intercept   34.6
 2      1          2     2 b_Intercept   37.2
 3      1          3     3 b_Intercept   34.0
 4      1          4     4 b_Intercept   36.5
 5      1          5     5 b_Intercept   37.9
 6      1          6     6 b_Intercept   36.7
 7      1          7     7 b_Intercept   34.3
 8      1          8     8 b_Intercept   39.1
 9      1          9     9 b_Intercept   33.4
10      1         10    10 b_Intercept   36.9
# ℹ 5,990 more rows

We can then summarise this

tobacco_draw |> median_hdci()

# A tibble: 4 × 7
  .variable          .value   .lower .upper .width .point .interval
  <chr>               <dbl>    <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 b_Intercept         35.7   32.0     39.5    0.95 median hdci     
2 b_TREATMENTWeak     -7.58 -11.5     -2.86   0.95 median hdci     
3 sd_LEAF__Intercept   3.45   0.0206   6.93   0.95 median hdci     
4 sigma                4.26   2.47     6.70   0.95 median hdci

tobacco_brm3 |>
  gather_draws(`b_Intercept.*|b_TREAT.*`, regex = TRUE) |>
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  stat_slab(aes(
    x = .value, y = .variable,
    fill = stat(ggdist::cut_cdf_qi(cdf,
      .width = c(0.5, 0.8, 0.95),
      labels = scales::percent_format()
    ))
  ), color = "black") +
  scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)

Warning: `stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95), labels =
scales::percent_format()))` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
  labels = scales::percent_format()))` instead.

tobacco_brm3 |>
  gather_draws(`.Intercept.*|.*TREAT.*`, regex = TRUE) |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  facet_wrap(~.variable, scales = "free")

tobacco_brm3 |>
  gather_draws(`.Intercept.*|.*TREAT.*`, regex = TRUE) |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  theme_classic()

tobacco_brm3$fit |> plot(type = "intervals")

'pars' not specified. Showing first 10 parameters by default.

ci_level: 0.8 (80% intervals)

outer_level: 0.95 (95% intervals)

tobacco_brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  facet_wrap(~.variable, scales = "free")

tobacco_brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  geom_vline(xintercept = 0, linetype = "dashed")

tobacco_brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
  geom_vline(xintercept = 0, linetype = "dashed")

Picking joint bandwidth of 0.426

## Or in colour
tobacco_brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges_gradient(
    aes(
      x = exp(.value),
      y = .variable,
      fill = stat(x)
    ),
    alpha = 0.4, colour = "white",
    quantile_lines = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_x_continuous(trans = scales::log2_trans()) +
  scale_fill_viridis_c(option = "C")

Picking joint bandwidth of 0.615

tobacco_brm3 |> tidy_draws()

# A tibble: 1,500 × 28
   .chain .iteration .draw b_Intercept b_TREATMENTWeak sd_LEAF__Intercept sigma
    <int>      <int> <int>       <dbl>           <dbl>              <dbl> <dbl>
 1      1          1     1        34.6           -6.10              0.111  4.71
 2      1          2     2        37.2           -9.13              2.79   5.37
 3      1          3     3        34.0           -5.90              6.78   3.88
 4      1          4     4        36.5           -6.76              0.532  6.22
 5      1          5     5        37.9           -8.55              3.65   3.38
 6      1          6     6        36.7           -8.78              7.99   4.31
 7      1          7     7        34.3           -6.86              4.66   4.04
 8      1          8     8        39.1          -13.8               5.01   5.36
 9      1          9     9        33.4           -6.14              5.61   4.03
10      1         10    10        36.9           -8.12              8.05   2.94
# ℹ 1,490 more rows
# ℹ 21 more variables: Intercept <dbl>, `r_LEAF[L1,Intercept]` <dbl>,
#   `r_LEAF[L2,Intercept]` <dbl>, `r_LEAF[L3,Intercept]` <dbl>,
#   `r_LEAF[L4,Intercept]` <dbl>, `r_LEAF[L5,Intercept]` <dbl>,
#   `r_LEAF[L6,Intercept]` <dbl>, `r_LEAF[L7,Intercept]` <dbl>,
#   `r_LEAF[L8,Intercept]` <dbl>, prior_Intercept <dbl>, prior_b <dbl>,
#   prior_sigma <dbl>, prior_sd_LEAF <dbl>, lprior <dbl>, lp__ <dbl>, …

tobacco_brm3 |> spread_draws(`.*Intercept.*|.*TREAT.*`, regex = TRUE)

# A tibble: 1,500 × 16
   .chain .iteration .draw b_Intercept b_TREATMENTWeak sd_LEAF__Intercept
    <int>      <int> <int>       <dbl>           <dbl>              <dbl>
 1      1          1     1        34.6           -6.10              0.111
 2      1          2     2        37.2           -9.13              2.79 
 3      1          3     3        34.0           -5.90              6.78 
 4      1          4     4        36.5           -6.76              0.532
 5      1          5     5        37.9           -8.55              3.65 
 6      1          6     6        36.7           -8.78              7.99 
 7      1          7     7        34.3           -6.86              4.66 
 8      1          8     8        39.1          -13.8               5.01 
 9      1          9     9        33.4           -6.14              5.61 
10      1         10    10        36.9           -8.12              8.05 
# ℹ 1,490 more rows
# ℹ 10 more variables: Intercept <dbl>, `r_LEAF[L1,Intercept]` <dbl>,
#   `r_LEAF[L2,Intercept]` <dbl>, `r_LEAF[L3,Intercept]` <dbl>,
#   `r_LEAF[L4,Intercept]` <dbl>, `r_LEAF[L5,Intercept]` <dbl>,
#   `r_LEAF[L6,Intercept]` <dbl>, `r_LEAF[L7,Intercept]` <dbl>,
#   `r_LEAF[L8,Intercept]` <dbl>, prior_Intercept <dbl>

tobacco_brm3 |>
  posterior_samples() |>
  as_tibble()

Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.

# A tibble: 1,500 × 19
   b_Intercept b_TREATMENTWeak sd_LEAF__Intercept sigma Intercept
         <dbl>           <dbl>              <dbl> <dbl>     <dbl>
 1        34.6           -6.10              0.111  4.71      31.6
 2        37.2           -9.13              2.79   5.37      32.7
 3        34.0           -5.90              6.78   3.88      31.0
 4        36.5           -6.76              0.532  6.22      33.1
 5        37.9           -8.55              3.65   3.38      33.6
 6        36.7           -8.78              7.99   4.31      32.3
 7        34.3           -6.86              4.66   4.04      30.9
 8        39.1          -13.8               5.01   5.36      32.2
 9        33.4           -6.14              5.61   4.03      30.3
10        36.9           -8.12              8.05   2.94      32.8
# ℹ 1,490 more rows
# ℹ 14 more variables: `r_LEAF[L1,Intercept]` <dbl>,
#   `r_LEAF[L2,Intercept]` <dbl>, `r_LEAF[L3,Intercept]` <dbl>,
#   `r_LEAF[L4,Intercept]` <dbl>, `r_LEAF[L5,Intercept]` <dbl>,
#   `r_LEAF[L6,Intercept]` <dbl>, `r_LEAF[L7,Intercept]` <dbl>,
#   `r_LEAF[L8,Intercept]` <dbl>, prior_Intercept <dbl>, prior_b <dbl>,
#   prior_sigma <dbl>, prior_sd_LEAF <dbl>, lprior <dbl>, lp__ <dbl>

tobacco_brm3 |>
  bayes_R2(re.form = NA, summary = FALSE) |>
  median_hdci()

          y       ymin      ymax .width .point .interval
1 0.3686854 0.09345605 0.5697043   0.95 median      hdci

tobacco_brm3 |>
  bayes_R2(re.form = ~ (1 | LEAF), summary = FALSE) |>
  median_hdci()

          y      ymin     ymax .width .point .interval
1 0.6137915 0.2376727 0.833485   0.95 median      hdci

tobacco_brm3 |>
  bayes_R2(re.form = ~ (TREATMENT | LEAF), summary = FALSE) |>
  median_hdci()

          y       ymin      ymax .width .point .interval
1 0.3686854 0.09345605 0.5697043   0.95 median      hdci

Region of Practical Equivalence

0.1 * sd(tobacco$NUMBER)

[1] 0.6540458

tobacco_brm3 |> rope(range = c(-0.65, 0.65))

# Proportion of samples inside the ROPE [-0.65, 0.65]:

Parameter     | Inside ROPE
---------------------------
Intercept     |      0.00 %
TREATMENTWeak |      0.00 %

rope(tobacco_brm3, range = c(-0.65, 0.65)) |> plot()

## Or based on fractional scale
tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  group_by(.draw) |>
  arrange(desc(TREATMENT)) |>
  summarise(Diff = 100 * (exp(diff(log(.value))) - 1)) |>
  rope(range = c(-10, 10))

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

# Proportion of samples inside the ROPE [-10.00, 10.00]:

Parameter | Inside ROPE
-----------------------
.draw     |      0.00 %
Diff      |      0.14 %

tobacco_brm3 |> modelsummary(
  statistic = c("conf.low", "conf.high"),
  shape = term ~ statistic,
  exponentiate = TRUE
)

Warning: 
`modelsummary` uses the `performance` package to extract goodness-of-fit
statistics from models of this class. You can specify the statistics you wish
to compute by supplying a `metrics` argument to `modelsummary`, which will then
push it forward to `performance`. Acceptable values are: "all", "common",
"none", or a character vector of metrics names. For example: `modelsummary(mod,
metrics = c("RMSE", "R2")` Note that some metrics are computationally
expensive. See `?performance::performance` for details.
 This warning appears once per session.

	(1)
	Est.	2.5 %	97.5 %
b_Intercept	3.145628e+15	8.882737e+13	1.883089e+17
b_TREATMENTWeak	1.000000e-03	0.000000e+00	5.100000e-02
sd_LEAF__Intercept	3.138900e+01	1.316000e+00	2.009665e+03
sigma	7.084100e+01	1.354400e+01	1.215638e+03
Num.Obs.	16
R2	0.614
R2 Adj.	0.435
R2 Marg.	0.369
ICC	0.9
ELPD	-50.2
ELPD s.e.	2.5
LOOIC	100.4
LOOIC s.e.	5.1
WAIC	98.6
RMSE	3.26
r2.adjusted.marginal	-0.0364657762640077

tobacco_brm3 |> modelplot(exponentiate = TRUE)

9 Further investigations

brms

tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  group_by(.draw) |>
  arrange(desc(TREATMENT)) |>
  summarise(Diff = 100 * (exp(diff(log(.value))) - 1)) |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    rhat,
    ess_bulk,
    ess_tail
  )

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

# A tibble: 1 × 7
  variable median lower upper  rhat ess_bulk ess_tail
  <chr>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
1 Diff       26.8  8.68  44.3 1.000    1241.    1452.

## Or via gather and pivot
newdata <- tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  pivot_wider(names_from = TREATMENT, values_from = .value) |>
  mutate(
    Eff = Strong - Weak,
    PEff = 100 * Eff / Weak
  )

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

newdata |> median_hdci(PEff)

# A tibble: 1 × 6
   PEff .lower .upper .width .point .interval
  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1  26.8   8.96   44.7   0.95 median hdci

newdata |> summarise(P = mean(PEff > 0))

# A tibble: 1 × 1
      P
  <dbl>
1 0.998

newdata |> summarise(P = mean(PEff > 20))

# A tibble: 1 × 1
      P
  <dbl>
1 0.803

newdata |>
  dplyr::select(-.chain, -.iteration) |>
  hypothesis("PEff>20")

Hypothesis Tests for class :
       Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
1 (PEff)-(20) > 0     7.09      9.02    -6.88    21.23       4.08       0.8
  Star
1     
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.

newdata <- tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  as.data.frame()

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

head(newdata)

 TREATMENT   emmean lower.HPD upper.HPD
 Strong    35.68479  32.00457  39.54700
 Weak      28.14294  24.52118  32.06291

Point estimate displayed: median 
HPD interval probability: 0.95

ggplot(newdata, aes(y = emmean, x = TREATMENT)) +
  geom_pointrange(aes(ymin = lower.HPD, ymax = upper.HPD)) +
  theme_bw()

tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  ggplot() +
  geom_density_ridges(aes(x = .value, y = TREATMENT), alpha = 0.5, fill = "orange") +
  scale_x_continuous("Average number of lesions") +
  theme_bw()

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

Picking joint bandwidth of 0.372

newdat <- tobacco |> tidyr::expand(TREATMENT)
newdata <- tobacco_brm3 |>
  brms::posterior_epred(newdat, re_formula = NA) |>
  as.data.frame() |>
  rename_with(~ as.character(newdat$TREATMENT)) |>
  mutate(
    Eff = Strong - Weak,
    PEff = 100 * Eff / Weak
  )
head(newdata)

    Strong     Weak      Eff     PEff
1 34.62696 28.52563 6.101326 21.38893
2 37.21866 28.08639 9.132267 32.51492
3 33.97138 28.07141 5.899965 21.01770
4 36.48700 29.72840 6.758597 22.73448
5 37.91998 29.36733 8.552642 29.12298
6 36.73228 27.95512 8.777153 31.39730

newdata |> median_hdci(PEff)

# A tibble: 1 × 6
   PEff .lower .upper .width .point .interval
  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1  26.8   8.96   44.7   0.95 median hdci

newdata |> summarise(P = mean(PEff > 0))

      P
1 0.998

newdata |> summarise(P = mean(PEff > 20))

          P
1 0.8033333

newdata <- tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  pairs() |>
  gather_emmeans_draws()

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

newdata |> median_hdci()

# A tibble: 1 × 7
  contrast      .value .lower .upper .width .point .interval
  <chr>          <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 Strong - Weak   7.58   2.86   11.5   0.95 median hdci

## OR on percentage scale
newdata <- tobacco_brm3 |>
  emmeans(~TREATMENT) |>
  regrid(trans = "log") |>
  pairs() |>
  regrid() |>
  gather_emmeans_draws() |>
  mutate(.value = (.value - 1) * 100)

Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.

newdata |> median_hdci()

# A tibble: 1 × 7
  contrast    .value .lower .upper .width .point .interval
  <chr>        <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 Strong/Weak   26.8   8.96   44.7   0.95 median hdci

newdata |> summarise(P = mean(.value > 0))

# A tibble: 1 × 2
  contrast        P
  <chr>       <dbl>
1 Strong/Weak 0.998

newdata |> summarise(P = mean(.value > 20))

# A tibble: 1 × 2
  contrast        P
  <chr>       <dbl>
1 Strong/Weak 0.803

Bayesian GLMM Part1

1 Preparations

2 Scenario

3 Read in the data

4 Exploratory data analysis

5 Fit the model

6 MCMC sampling diagnostics

7 Partial effects plots

8 Model investigation

9 Further investigations

10 References