Multilevel SEM using estimated level-specifc covariance matrices

Demonstration of conducting MSEM using level-specific covariance matrices without raw data

Author

Mark Lai

Published

June 2, 2024

In this note, I explore how to use estimated level-specific covariance matrices to perform SEM in each level, and contrast the results to simultanenous estimation in multilevel structural equation modeling (MSEM).

library(lavaan)

This is lavaan 0.6-17
lavaan is FREE software! Please report any bugs.

Example from https://lavaan.ugent.be/tutorial/multilevel.html

model <- "
    level: 1
        fw =~ y1 + y2 + y3
        fw ~ x1 + x2 + x3
    level: 2
        fb =~ y1 + y2 + y3
        fb ~ w1 + w2
"
fit <- sem(model = model, data = Demo.twolevel, cluster = "cluster")

Obtain Level-Specific Covariance Matrices

Using covariance matrices, based on Yuan & Bentler (2007). Maximum likelihood estimation is used.

# From saturated (h1) model
sampstat_h1 <- lavInspect(fit, what = "sampstat")
# Sample sizes
nobs_fit <- nobs(fit)
nclus_fit <- lavInspect(fit, what = "nclusters")

Between-level model

The sample size is $J$ where $J$ is the number of clusters.

# Saturated covariance matrix
sampstat_h1$cluster$cov

       y1     y2     y3     w1     w2
y1  0.992                            
y2  0.668  0.598                     
y3  0.548  0.391  0.469              
w1  0.125  0.119  0.036  0.870       
w2  0.086  0.057  0.130 -0.128  0.931

# Fit model
fit_lv2 <- sem("
    fb =~ y1 + y2 + y3
    fb ~ w1 + w2
",
    sample.nobs = nclus_fit,
    sample.cov = sampstat_h1$cluster$cov,
    sample.mean = sampstat_h1$cluster$mean
)
# Compare estimates
# 1. separate model (lv-2)
(par_sep_lv2 <- parameterEstimates(fit_lv2))

   lhs op rhs    est    se      z pvalue ci.lower ci.upper
1   fb =~  y1  1.000 0.000     NA     NA    1.000    1.000
2   fb =~  y2  0.717 0.034 21.020  0.000    0.650    0.784
3   fb =~  y3  0.587 0.034 17.524  0.000    0.522    0.653
4   fb  ~  w1  0.164 0.074  2.209  0.027    0.019    0.310
5   fb  ~  w2  0.128 0.072  1.786  0.074   -0.013    0.269
6   y1 ~~  y1  0.060 0.027  2.244  0.025    0.008    0.112
7   y2 ~~  y2  0.118 0.018  6.629  0.000    0.083    0.153
8   y3 ~~  y3  0.147 0.017  8.533  0.000    0.113    0.181
9   fb ~~  fb  0.894 0.098  9.079  0.000    0.701    1.087
10  w1 ~~  w1  0.866 0.000     NA     NA    0.866    0.866
11  w1 ~~  w2 -0.128 0.000     NA     NA   -0.128   -0.128
12  w2 ~~  w2  0.926 0.000     NA     NA    0.926    0.926
13  y1 ~1      0.022 0.069  0.316  0.752   -0.114    0.158
14  y2 ~1     -0.014 0.054 -0.268  0.789   -0.120    0.091
15  y3 ~1     -0.042 0.048 -0.869  0.385   -0.135    0.052
16  w1 ~1      0.052 0.000     NA     NA    0.052    0.052
17  w2 ~1     -0.091 0.000     NA     NA   -0.091   -0.091
18  fb ~1      0.000 0.000     NA     NA    0.000    0.000

# 2. simultaneous model
(par_simu_lv2 <- parameterEstimates(fit) |>
    subset(level == 2))

   lhs op rhs block level    est    se      z pvalue ci.lower ci.upper
24  fb =~  y1     2     2  1.000 0.000     NA     NA    1.000    1.000
25  fb =~  y2     2     2  0.717 0.052 13.824  0.000    0.615    0.818
26  fb =~  y3     2     2  0.587 0.048 12.329  0.000    0.494    0.680
27  fb  ~  w1     2     2  0.165 0.079  2.093  0.036    0.010    0.319
28  fb  ~  w2     2     2  0.131 0.076  1.715  0.086   -0.019    0.281
29  y1 ~~  y1     2     2  0.058 0.047  1.213  0.225   -0.035    0.151
30  y2 ~~  y2     2     2  0.120 0.031  3.825  0.000    0.059    0.182
31  y3 ~~  y3     2     2  0.149 0.028  5.319  0.000    0.094    0.204
32  fb ~~  fb     2     2  0.899 0.118  7.592  0.000    0.667    1.131
33  w1 ~~  w1     2     2  0.870 0.000     NA     NA    0.870    0.870
34  w1 ~~  w2     2     2 -0.128 0.000     NA     NA   -0.128   -0.128
35  w2 ~~  w2     2     2  0.931 0.000     NA     NA    0.931    0.931
36  y1 ~1         2     2  0.024 0.075  0.327  0.743   -0.122    0.171
37  y2 ~1         2     2 -0.016 0.060 -0.269  0.788   -0.134    0.101
38  y3 ~1         2     2 -0.042 0.054 -0.777  0.437   -0.148    0.064
39  w1 ~1         2     2  0.052 0.000     NA     NA    0.052    0.052
40  w2 ~1         2     2 -0.091 0.000     NA     NA   -0.091   -0.091
41  fb ~1         2     2  0.000 0.000     NA     NA    0.000    0.000

tab1 <- par_sep_lv2 |>
    subset(subset = op %in% c("~", "=~"))
tab2 <- par_simu_lv2 |>
    subset(subset = op %in% c("~", "=~"),
           select = -c(block, level))
rbind(tab1, tab2) |>
    base::`[`(c(1, 6, 2, 7, 3, 8, 4, 9, 5, 10), ) |>
    cbind(model = rep(c("Separate", "Simultaneous"), 5)) |>
    knitr::kable(digits = 2)

Table 1: Comparisons of between-level coefficients

	lhs	op	rhs	est	se	z	pvalue	ci.lower	ci.upper	model
1	fb	=~	y1	1.00	0.00	NA	NA	1.00	1.00	Separate
24	fb	=~	y1	1.00	0.00	NA	NA	1.00	1.00	Simultaneous
2	fb	=~	y2	0.72	0.03	21.02	0.00	0.65	0.78	Separate
25	fb	=~	y2	0.72	0.05	13.82	0.00	0.62	0.82	Simultaneous
3	fb	=~	y3	0.59	0.03	17.52	0.00	0.52	0.65	Separate
26	fb	=~	y3	0.59	0.05	12.33	0.00	0.49	0.68	Simultaneous
4	fb	~	w1	0.16	0.07	2.21	0.03	0.02	0.31	Separate
27	fb	~	w1	0.16	0.08	2.09	0.04	0.01	0.32	Simultaneous
5	fb	~	w2	0.13	0.07	1.79	0.07	-0.01	0.27	Separate
28	fb	~	w2	0.13	0.08	1.71	0.09	-0.02	0.28	Simultaneous

In this case, while the estimated coefficients are very close, using the between-level covariance matrix gives smaller standard errors than simultaneous MSEM. It is unclear whether using the between-level covariance matrix may result in underestimated standard errors.

Within-level model

The sample size is $N - J$ where $N$ is the number of observations and $J$ is the number of clusters.

# Saturated covariance matrix
sampstat_h1$within$cov

       y1     y2     y3     x1     x2     x3
y1  2.000                                   
y2  0.789  1.674                            
y3  0.749  0.564  1.557                     
x1  0.489  0.393  0.376  0.982              
x2  0.416  0.322  0.299  0.001  1.011       
x3  0.221  0.160  0.155 -0.006  0.008  1.045

# Fit model
fit_lv1 <- sem("
    fw =~ y1 + y2 + y3
    fw ~ x1 + x2 + x3
",
    sample.nobs = nobs_fit - nclus_fit,
    sample.cov = sampstat_h1$within$cov,
    sample.mean = sampstat_h1$within$mean
)
# Compare estimates
# 1. separate model (lv-1)
(par_sep_lv1 <- parameterEstimates(fit_lv1))

   lhs op rhs    est    se      z pvalue ci.lower ci.upper
1   fw =~  y1  1.000 0.000     NA     NA    1.000    1.000
2   fw =~  y2  0.773 0.035 22.287  0.000    0.705    0.841
3   fw =~  y3  0.734 0.033 22.033  0.000    0.669    0.799
4   fw  ~  x1  0.510 0.024 21.697  0.000    0.464    0.556
5   fw  ~  x2  0.407 0.022 18.165  0.000    0.363    0.451
6   fw  ~  x3  0.205 0.021  9.766  0.000    0.164    0.246
7   y1 ~~  y1  0.985 0.046 21.538  0.000    0.895    1.075
8   y2 ~~  y2  1.067 0.039 27.179  0.000    0.990    1.143
9   y3 ~~  y3  1.011 0.037 27.530  0.000    0.939    1.083
10  fw ~~  fw  0.547 0.040 13.557  0.000    0.467    0.626
11  x1 ~~  x1  0.982 0.000     NA     NA    0.982    0.982
12  x1 ~~  x2  0.001 0.000     NA     NA    0.001    0.001
13  x1 ~~  x3 -0.006 0.000     NA     NA   -0.006   -0.006
14  x2 ~~  x2  1.011 0.000     NA     NA    1.011    1.011
15  x2 ~~  x3  0.008 0.000     NA     NA    0.008    0.008
16  x3 ~~  x3  1.045 0.000     NA     NA    1.045    1.045
17  y1 ~1      0.002 0.026  0.079  0.937   -0.049    0.053
18  y2 ~1     -0.002 0.025 -0.068  0.945   -0.050    0.047
19  y3 ~1      0.000 0.024 -0.020  0.984   -0.047    0.046
20  x1 ~1     -0.007 0.000     NA     NA   -0.007   -0.007
21  x2 ~1     -0.003 0.000     NA     NA   -0.003   -0.003
22  x3 ~1      0.020 0.000     NA     NA    0.020    0.020
23  fw ~1      0.000 0.000     NA     NA    0.000    0.000

# 2. simultaneous model
(par_simu_lv1 <- parameterEstimates(fit) |>
    subset(level == 1))

   lhs op rhs block level    est    se      z pvalue ci.lower ci.upper
1   fw =~  y1     1     1  1.000 0.000     NA     NA    1.000    1.000
2   fw =~  y2     1     1  0.774 0.034 22.671      0    0.707    0.841
3   fw =~  y3     1     1  0.734 0.033 22.355      0    0.669    0.798
4   fw  ~  x1     1     1  0.510 0.023 22.037      0    0.465    0.555
5   fw  ~  x2     1     1  0.407 0.022 18.273      0    0.364    0.451
6   fw  ~  x3     1     1  0.205 0.021  9.740      0    0.164    0.246
7   y1 ~~  y1     1     1  0.986 0.046 21.591      0    0.896    1.075
8   y2 ~~  y2     1     1  1.066 0.039 27.271      0    0.990    1.143
9   y3 ~~  y3     1     1  1.011 0.037 27.662      0    0.939    1.083
10  fw ~~  fw     1     1  0.546 0.040 13.539      0    0.467    0.626
11  x1 ~~  x1     1     1  0.982 0.000     NA     NA    0.982    0.982
12  x1 ~~  x2     1     1  0.001 0.000     NA     NA    0.001    0.001
13  x1 ~~  x3     1     1 -0.006 0.000     NA     NA   -0.006   -0.006
14  x2 ~~  x2     1     1  1.011 0.000     NA     NA    1.011    1.011
15  x2 ~~  x3     1     1  0.008 0.000     NA     NA    0.008    0.008
16  x3 ~~  x3     1     1  1.045 0.000     NA     NA    1.045    1.045
17  y1 ~1         1     1  0.000 0.000     NA     NA    0.000    0.000
18  y2 ~1         1     1  0.000 0.000     NA     NA    0.000    0.000
19  y3 ~1         1     1  0.000 0.000     NA     NA    0.000    0.000
20  x1 ~1         1     1 -0.007 0.000     NA     NA   -0.007   -0.007
21  x2 ~1         1     1 -0.003 0.000     NA     NA   -0.003   -0.003
22  x3 ~1         1     1  0.020 0.000     NA     NA    0.020    0.020
23  fw ~1         1     1  0.000 0.000     NA     NA    0.000    0.000

tab1 <- par_sep_lv1 |>
    subset(subset = op %in% c("~", "=~"))
tab2 <- par_simu_lv1 |>
    subset(subset = op %in% c("~", "=~"),
           select = -c(block, level))
rbind(tab1, tab2) |>
    base::`[`(c(1, 7, 2, 8, 3, 9, 4, 10, 5, 11, 6, 12), ) |>
    cbind(model = rep(c("Separate", "Simultaneous"), 6)) |>
    knitr::kable(digits = 2)

Table 2: Comparisons of within-level coefficients

	lhs	op	rhs	est	se	z	pvalue	ci.lower	ci.upper	model
1	fw	=~	y1	1.00	0.00	NA	NA	1.00	1.00	Separate
7	fw	=~	y1	1.00	0.00	NA	NA	1.00	1.00	Simultaneous
2	fw	=~	y2	0.77	0.03	22.29	0	0.71	0.84	Separate
8	fw	=~	y2	0.77	0.03	22.67	0	0.71	0.84	Simultaneous
3	fw	=~	y3	0.73	0.03	22.03	0	0.67	0.80	Separate
9	fw	=~	y3	0.73	0.03	22.35	0	0.67	0.80	Simultaneous
4	fw	~	x1	0.51	0.02	21.70	0	0.46	0.56	Separate
10	fw	~	x1	0.51	0.02	22.04	0	0.46	0.56	Simultaneous
5	fw	~	x2	0.41	0.02	18.17	0	0.36	0.45	Separate
11	fw	~	x2	0.41	0.02	18.27	0	0.36	0.45	Simultaneous
6	fw	~	x3	0.21	0.02	9.77	0	0.16	0.25	Separate
12	fw	~	x3	0.20	0.02	9.74	0	0.16	0.25	Simultaneous

In this case, both the estimated coefficients and the standard errors are nearly identical in both approaches.