Introduction

Regression analysis using the individual participant data (IPD)

library(metasplines)
library(splines2)

# Estimate a linear regression model using an individual participant data (IPD):
res <- lm(
  Petal.Width ~
    Species +
    nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5)),
  data=iris)
summary(res)
#> 
#> Call:
#> lm(formula = Petal.Width ~ Species + nsk(Sepal.Length, Boundary.knots = c(4.5, 
#>     7.5), knots = c(5, 6, 6.5)), data = iris)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.55321 -0.10548 -0.00895  0.10758  0.51982 
#> 
#> Coefficients:
#>                                                                        Estimate
#> (Intercept)                                                             0.18478
#> Speciesversicolor                                                       0.92487
#> Speciesvirginica                                                        1.52223
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1  0.05965
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2  0.21911
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3  0.35234
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4  0.38634
#>                                                                        Std. Error
#> (Intercept)                                                               0.05507
#> Speciesversicolor                                                         0.05720
#> Speciesvirginica                                                          0.06811
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1    0.06102
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2    0.08139
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3    0.08618
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4    0.09927
#>                                                                        t value
#> (Intercept)                                                              3.355
#> Speciesversicolor                                                       16.170
#> Speciesvirginica                                                        22.348
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1   0.978
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2   2.692
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3   4.088
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4   3.892
#>                                                                        Pr(>|t|)
#> (Intercept)                                                            0.001016
#> Speciesversicolor                                                       < 2e-16
#> Speciesvirginica                                                        < 2e-16
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.329970
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.007951
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 7.22e-05
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.000152
#>                                                                           
#> (Intercept)                                                            ** 
#> Speciesversicolor                                                      ***
#> Speciesvirginica                                                       ***
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1    
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 ** 
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 ***
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.19 on 143 degrees of freedom
#> Multiple R-squared:  0.9404, Adjusted R-squared:  0.9379 
#> F-statistic: 375.9 on 6 and 143 DF,  p-value: < 2.2e-16

# How is the estimated correlation matrix like?
cor.m <- cov2cor(vcov(res))
colnames(cor.m) <- NULL
round(cor.m, 3)
#>                                                                          [,1]
#> (Intercept)                                                             1.000
#> Speciesversicolor                                                      -0.024
#> Speciesvirginica                                                       -0.041
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 -0.857
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 -0.604
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 -0.618
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 -0.524
#>                                                                          [,2]
#> (Intercept)                                                            -0.024
#> Speciesversicolor                                                       1.000
#> Speciesvirginica                                                        0.768
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 -0.112
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 -0.662
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 -0.615
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 -0.517
#>                                                                          [,3]
#> (Intercept)                                                            -0.041
#> Speciesversicolor                                                       0.768
#> Speciesvirginica                                                        1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 -0.059
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 -0.622
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 -0.659
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 -0.649
#>                                                                          [,4]
#> (Intercept)                                                            -0.857
#> Speciesversicolor                                                      -0.112
#> Speciesvirginica                                                       -0.059
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1  1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2  0.549
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3  0.617
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4  0.511
#>                                                                          [,5]
#> (Intercept)                                                            -0.604
#> Speciesversicolor                                                      -0.662
#> Speciesvirginica                                                       -0.622
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1  0.549
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2  1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3  0.886
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4  0.757
#>                                                                          [,6]
#> (Intercept)                                                            -0.618
#> Speciesversicolor                                                      -0.615
#> Speciesvirginica                                                       -0.659
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1  0.617
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2  0.886
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3  1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4  0.780
#>                                                                          [,7]
#> (Intercept)                                                            -0.524
#> Speciesversicolor                                                      -0.517
#> Speciesvirginica                                                       -0.649
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1  0.511
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2  0.757
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3  0.780
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4  1.000

Note that the selection of the left boundary knot (in this example 4.5) should be the same as the one used on the estimates obtained from literature. This is because the reference point (where the value of the spline is set to zero for identifiability) in splines is generally the left boundary knot. The spline parameter values correspond to the spline function values at the internal (5, 6 and 6.5) and right boundary (7.5) knots, when the nsk splines are used. Other spline families are not supported at the moment.

Pool the estimated spline parameters based on the IPD with literature-based (LB) estimates

# "Literature-based" (LB) estimates:
lb.df <- read.table(text=
"variable,     cov.value,  est,  est.var
Sepal.Length,  4.5,        0,     0   
Sepal.Length,  5,            0.15,  0.01 
Sepal.Length,  5.5,        0.25,  0.01 
Sepal.Length,  6,            0.4,   0.01 
Sepal.Length,  6.5,        0.5,   0.01 
Sepal.Length,  8,          0.25,  0.04 
", sep=",", header=TRUE)

lb.df
#>       variable cov.value  est est.var
#> 1 Sepal.Length       4.5 0.00    0.00
#> 2 Sepal.Length       5.0 0.15    0.01
#> 3 Sepal.Length       5.5 0.25    0.01
#> 4 Sepal.Length       6.0 0.40    0.01
#> 5 Sepal.Length       6.5 0.50    0.01
#> 6 Sepal.Length       8.0 0.25    0.04

# Output table with the point estimates and the estimated variances:
pool_splines(v="Sepal.Length", meta.df=lb.df, glm.res=res)
#> $pooled
#>       stat          V1          V2          V3          V4
#> 1   est.lb 0.140485675 0.390851632 0.503442966 0.375372517
#> 2  est.ipd 0.059649933 0.219106883 0.352341484 0.386343207
#> 3   var.lb 0.009617833 0.009609035 0.010122903 0.021873413
#> 4  var.ipd 0.003723764 0.006624838 0.007427437 0.009854545
#> 5 est.pool 0.082211943 0.289193741 0.416288778 0.382935765
#> 6 var.pool 0.002684427 0.003921326 0.004284089 0.006793773
#> 7   pval.Q 0.484026476 0.177675852 0.254044526 0.950889039
#> 
#> attr(,"idx.param")
#> [1] 4 5 6 7

# Full output containing also spline estimates and covariance matrix:
pooled <- pool_splines(v="Sepal.Length", meta.df=lb.df, glm.res=res, full.output = TRUE)
str(pooled)
#> List of 4
#>  $ pooled   :'data.frame':   7 obs. of  5 variables:
#>   ..$ stat: chr [1:7] "est.lb" "est.ipd" "var.lb" "var.ipd" ...
#>   ..$ V1  : num [1:7] 0.14049 0.05965 0.00962 0.00372 0.08221 ...
#>   ..$ V2  : num [1:7] 0.39085 0.21911 0.00961 0.00662 0.28919 ...
#>   ..$ V3  : num [1:7] 0.50344 0.35234 0.01012 0.00743 0.41629 ...
#>   ..$ V4  : num [1:7] 0.37537 0.38634 0.02187 0.00985 0.38294 ...
#>  $ log.hr   :'data.frame':   150 obs. of  5 variables:
#>   ..$ model    : chr [1:150] "ipd" "ipd" "ipd" "ipd" ...
#>   ..$ cov.value: num [1:150] 4.5 4.57 4.64 4.71 4.79 ...
#>   ..$ est      : num [1:150] 0 0.00865 0.01729 0.02589 0.03444 ...
#>   ..$ ci.low   : num [1:150] 0 -0.0125 -0.0245 -0.0355 -0.045 ...
#>   ..$ ci.upp   : num [1:150] 0 0.0298 0.0591 0.0873 0.1138 ...
#>  $ meta.df  :'data.frame':   6 obs. of  4 variables:
#>   ..$ variable : chr [1:6] "Sepal.Length" "Sepal.Length" "Sepal.Length" "Sepal.Length" ...
#>   ..$ cov.value: num [1:6] 4.5 5 5.5 6 6.5 8
#>   ..$ est      : num [1:6] 0 0.15 0.25 0.4 0.5 0.25
#>   ..$ est.var  : num [1:6] 0 0.01 0.01 0.01 0.01 0.04
#>  $ meta.vcov: num [1:4, 1:4] 0.00962 0.00528 0.00608 0.00741 0.00528 ...

The default output of the pool_splines function is a list with one element:

With the full.output = TRUE option, more output is included:

Plot the IPD, LB and pooled spline estimates

The LB estimates given in the lb.df data frame above are presented as green dots, and the corresponding 95% confidence intervals as vertical dashed lines.

The IPD, LB and pooled point estimates of the spline functions are the smooth curves, and the corresponding dashed lines are the 95% confidence intervals. Note that the LB spline estimates are close to those estimates reported in lb.df, but in the points which do not equal the knots of the spline, there are small differences. Note also, that as the left boundary knot is at 4.5, the values of the splines are zero, and the widths of the confidence intervals are zero.

Here we can see that the pooled spline estimate is between the IPD estimate and the LB estimate, and the confidence interval is narrower than the confidence intervals based on the IPD or literature.

plot(0, 0, type="n", xlim=range(pooled$log.hr$cov.value), ylim=c(-0.2, 0.7),
     xlab="Covariate value", ylab="")
col.v <- rainbow(3)
names(col.v) <- unique(pooled$log.hr$model)
for (i in unique(pooled$log.hr$model)) {
  with(subset(pooled$log.hr, model==i), {
    lines(cov.value, est, col=col.v[i], lwd=2)
    lines(cov.value, ci.low, col=col.v[i], lty=2)
    lines(cov.value, ci.upp, col=col.v[i], lty=2)
  })
}
with(lb.df, {
  points(cov.value, est, pch=15, col=col.v["lb"])
  for (j in 2:nrow(lb.df))
    lines(rep(cov.value[j], 2), est[j] + 1.96 * c(-1, 1) * sqrt(est.var[j]),
          col=col.v["lb"], lty=2)
})
legend("bottomleft", legend=names(col.v), col=col.v, lwd=c(2,2,2))