saeHB-Twofold-Beta

STEP 1 Load package and load the data

library(saeHB.TF.beta)
data("dataBeta")

STEP 2 Data Exploration

dataBeta$CV <- sqrt(dataBeta$vardir)/dataBeta$y
explore(y~X1+X2, CV = "CV", data = dataBeta, normality = TRUE)
#>                   y         X1         X2
#> Min.    0.002826007 0.04205953 0.02461368
#> 1st Qu. 0.677417203 0.34818477 0.20900053
#> Median  0.986658040 0.58338771 0.39409355
#> Mean    0.786269096 0.57240016 0.43864087
#> 3rd Qu. 0.999116512 0.85933934 0.73765722
#> Max.    1.000000000 0.99426978 0.96302423
#> NA      0.000000000 0.00000000 0.00000000
#> 
#> Normality test for y :
#> Decision: Data do NOT follow normal distribution, with p.value = 0 < 0.05

STEPS 3 Fitting Twofold HB Beta Model

model <- betaTF(y~X1+X2,area="codearea",weight="w",iter.mcmc = 10000, burn.in = 3000, iter.update = 5, thin = 10, data=dataBeta)

STEP 4 Check Convergence via Plot MCMC

Trace Plot, Density Plot, ACF Plot, R-Hat Plot

model$plot
#> [[1]]

#> 
#> [[2]]

#> 
#> [[3]]

#> 
#> [[4]]

STEP 5 Extract Result for Area

Area Mean Estimation

model$Est_area
#>         Mean         SD      2.5%       25%       50%       75%    97.5%
#> 1  1.2179303 0.08367189 1.0199331 1.1771017 1.2359316 1.2786094 1.321750
#> 2  0.9663316 0.10936126 0.7284934 0.9008660 0.9787452 1.0433653 1.146264
#> 3  1.2811164 0.10489759 1.0422264 1.2281014 1.3043557 1.3596630 1.415032
#> 4  0.9749039 0.06918723 0.8166622 0.9359717 0.9910931 1.0249401 1.068415
#> 5  0.9812609 0.16483636 0.6162753 0.8838426 0.9916185 1.0920329 1.273873
#> 6  1.4889919 0.13338164 1.1885749 1.4149461 1.5170100 1.5832349 1.669097
#> 7  0.8246541 0.17429089 0.5025987 0.7020238 0.8255332 0.9514103 1.143805
#> 8  0.8481751 0.15196815 0.5238050 0.7471580 0.8583737 0.9609586 1.120119
#> 9  0.7298027 0.18379549 0.3490142 0.6115407 0.7395244 0.8513946 1.077212
#> 10 0.7941518 0.06083836 0.6447614 0.7591267 0.8054407 0.8369791 0.880041

Area Random Effect

model$area_randeff
#>            f_mean
#> f[1]   0.32835454
#> f[2]  -0.01729205
#> f[3]   0.46235495
#> f[4]   0.78472625
#> f[5]  -0.36881912
#> f[6]   0.57163578
#> f[7]  -0.74469434
#> f[8]  -0.34217516
#> f[9]  -0.94902221
#> f[10]  0.36114471

Calculate Area Relative Standard Error (RSE) or CV and Mean Squared Error (MSE)

CV_area <- (model$Est_area$SD)/(model$Est_area$Mean)*100
MSE_area <- model$Est_area$SD^2
summary(cbind(CV_area,MSE_area))
#>     CV_area          MSE_area       
#>  Min.   : 6.870   Min.   :0.003701  
#>  1st Qu.: 7.793   1st Qu.:0.008002  
#>  Median :10.138   Median :0.014875  
#>  Mean   :13.113   Mean   :0.017067  
#>  3rd Qu.:17.637   3rd Qu.:0.026152  
#>  Max.   :25.184   Max.   :0.033781

STEP 6 Extract Result for Subarea

Subarea Mean Estimation

model$Est_area
#>         Mean         SD      2.5%       25%       50%       75%    97.5%
#> 1  1.2179303 0.08367189 1.0199331 1.1771017 1.2359316 1.2786094 1.321750
#> 2  0.9663316 0.10936126 0.7284934 0.9008660 0.9787452 1.0433653 1.146264
#> 3  1.2811164 0.10489759 1.0422264 1.2281014 1.3043557 1.3596630 1.415032
#> 4  0.9749039 0.06918723 0.8166622 0.9359717 0.9910931 1.0249401 1.068415
#> 5  0.9812609 0.16483636 0.6162753 0.8838426 0.9916185 1.0920329 1.273873
#> 6  1.4889919 0.13338164 1.1885749 1.4149461 1.5170100 1.5832349 1.669097
#> 7  0.8246541 0.17429089 0.5025987 0.7020238 0.8255332 0.9514103 1.143805
#> 8  0.8481751 0.15196815 0.5238050 0.7471580 0.8583737 0.9609586 1.120119
#> 9  0.7298027 0.18379549 0.3490142 0.6115407 0.7395244 0.8513946 1.077212
#> 10 0.7941518 0.06083836 0.6447614 0.7591267 0.8054407 0.8369791 0.880041

Subarea Random Effect

model$sub_randeff
#>             u_mean
#> u[1]   0.119715995
#> u[2]  -0.056201482
#> u[3]   0.227430412
#> u[4]   0.226823593
#> u[5]  -0.536328040
#> u[6]   0.301095852
#> u[7]   0.001988169
#> u[8]   0.168360702
#> u[9]   0.364382150
#> u[10]  0.525992674
#> u[11]  0.123937310
#> u[12]  0.211898157
#> u[13] -0.200098286
#> u[14]  0.617127849
#> u[15] -0.905695714
#> u[16]  0.037466149
#> u[17]  0.876489893
#> u[18] -0.388672505
#> u[19] -0.999258002
#> u[20]  0.289406088
#> u[21] -0.024721479
#> u[22]  0.288812727
#> u[23] -0.461070642
#> u[24] -0.288971985
#> u[25] -0.684000074
#> u[26] -0.608315501
#> u[27]  0.408009283
#> u[28]  0.501033723
#> u[29] -0.475810178
#> u[30]  0.341399077

Calculate Subarea Relative Standard Error (RSE) or CV and Mean Squared Error (MSE)

CV_sub <- (model$Est_sub$SD)/(model$Est_sub$Mean)*100
MSE_sub <- model$Est_sub$SD^2
summary(cbind(CV_sub,MSE_sub))
#>      CV_sub          MSE_sub        
#>  Min.   : 6.691   Min.   :0.003821  
#>  1st Qu.: 9.642   1st Qu.:0.007333  
#>  Median :15.050   Median :0.015436  
#>  Mean   :19.198   Mean   :0.018663  
#>  3rd Qu.:26.776   3rd Qu.:0.029237  
#>  Max.   :52.603   Max.   :0.048170

Extract Coefficient Estimation \(\beta\)

model$coefficient
#>           Mean        SD       2.5%       25%       50%       75%     97.5%
#> b[0] 0.8598961 0.2630190  0.3370801 0.6867451 0.8551453 1.0395515 1.3858284
#> b[1] 0.3071740 0.3494490 -0.3458376 0.0597489 0.3201630 0.5505867 0.9800302
#> b[2] 1.1810308 0.4042009  0.3863613 0.9234731 1.1893539 1.4589896 1.9547040

Extract Area Random Effect Variance \(\sigma_u^2\) and Subarea Random Effect Variance \(\sigma_v^2\)

model$refVar
#>       b_var     a_var
#> 1 0.9970455 0.9252804

STEP 7 Visualize The Result

library(ggplot2)

Save the output of Subarea estimation and the Direct Estimation (y)

df <- data.frame(
  area = seq_along(model$Est_sub$Mean),             
  direct = dataBeta$y,              
  mean_estimate = model$Est_sub$Mean
)

Area Mean Estimation

ggplot(df, aes(x = area)) +
  geom_point(aes(y = direct), size = 2, colour = "#388894", alpha = 0.6) +   # scatter points
  geom_point(aes(y = mean_estimate), size = 2, colour = "#2b707a") +   # scatter points
  geom_line(aes(y = direct), linewidth = 1, colour = "#388894", alpha = 0.6) +  # line connecting points
  geom_line(aes(y = mean_estimate), linewidth = 1, colour = "#2b707a") +  # line connecting points
  labs(
    title = "Scatter + Line Plot of Estimated Means",
    x = "Area Index",
    y = "Value"
  )

ggplot(df, aes(x = , direct, y = mean_estimate)) +
  geom_point( size = 2, colour = "#2b707a") +
   geom_abline(intercept = 0, slope = 1, color = "gray40", linetype = "dashed") +
  geom_smooth(method = "lm", color = "#2b707a", se = FALSE) +
  ylim(0, 1) +
  labs(
    title = "Scatter Plot of Direct vs Model-Based",
    x = "Direct",
    y = "Model Based"
  )

Combine the CV of direct estimation and CV from output

df_cv <- data.frame(
  direct = sqrt(dataBeta$vardir)/dataBeta$y*100,              
  cv_estimate = CV_sub
)
df_cv <- df_cv[order(df_cv$direct), ]
df_cv$area <- seq_along(dataBeta$y)

Relative Standard Error of Subarea Mean Estimation

ggplot(df_cv, aes(x = area)) +
  geom_point(aes(y = direct), size = 2, colour = "#388894", alpha = 0.5) +
  geom_point(aes(y = cv_estimate), size = 2, colour = "#2b707a") +
  ylim(0, 100) +
  labs(
    title = "Scatter Plot of Direct vs Model-Based CV",
    x = "Area",
    y = "CV (%)"
  )