## ------------------------------------------------------------------------------
## SPACox.R
##
## Functions:
##   GRAB.SPACox                  : Print brief method information.
##   checkControl.NullModel.SPACox: Validate and populate null-model controls.
##   fitNullModel.SPACox          : Fit SPACox null model.
##   checkControl.Marker.SPACox   : Validate marker-level controls.
##   setMarker.SPACox             : Initialize marker-level analysis objects.
##   mainMarker.SPACox            : Run marker-level SPACox tests.
## ------------------------------------------------------------------------------

#' Instruction of SPACox method
#'
#' SPACox is primarily intended for time-to-event traits in unrelated samples from large-scale 
#' biobanks. It uses the empirical cumulant generating function (CGF) to perform SPA-based 
#' single-variant association tests, enabling analysis with residuals from any null model.
#'
#' @return NULL
#'
#' @examples
#' PhenoFile <- system.file("extdata", "simuPHENO.txt", package = "GRAB")
#' GenoFile <- system.file("extdata", "simuPLINK.bed", package = "GRAB")
#' OutputFile <- file.path(tempdir(), "resultSPACox.txt")
#' PhenoData <- data.table::fread(PhenoFile, header = TRUE)
#'
#' # Step 1 option 1
#' obj.SPACox <- GRAB.NullModel(
#'   survival::Surv(SurvTime, SurvEvent) ~ AGE + GENDER,
#'   data = PhenoData,
#'   subjIDcol = "IID",
#'   method = "SPACox",
#'   traitType = "time-to-event"
#' )
#'
#' # Step 1 option 2
#' residuals <- survival::coxph(
#'   survival::Surv(SurvTime, SurvEvent) ~ AGE + GENDER,
#'   data = PhenoData,
#'   x = TRUE
#' )$residuals
#'
#' obj.SPACox <- GRAB.NullModel(
#'   residuals ~ AGE + GENDER,
#'   data = PhenoData,
#'   subjIDcol = "IID",
#'   method = "SPACox",
#'   traitType = "Residual"
#' )
#'
#' # Step 2
#' GRAB.Marker(obj.SPACox, GenoFile, OutputFile)
#'
#' head(data.table::fread(OutputFile))
#'
#' @details
#'
#' \strong{Additional Control Parameters for GRAB.NullModel()}:
#' \itemize{
#'   \item \code{range} (numeric vector, default: c(-100, 100)):
#'      Range for saddlepoint approximation grid. Must be symmetric (range\[2\] = -range\[1\]).
#'   \item \code{length.out} (integer, default: 10000): Number of grid points for saddlepoint approximation.
#' }
#'
#' \strong{Method-specific elements in the \code{SPACox_NULL_Model} object returned by \code{GRAB.NullModel()}:}:
#' \itemize{
#'   \item \code{mresid}: Martingale residuals (numeric or "Residual" class).
#'   \item \code{cumul}: Cumulative sums for variance estimation (matrix).
#'   \item \code{tX}: Transpose of design matrix (matrix).
#'   \item \code{yVec}: Event indicator (numeric or "Residual" class).
#'   \item \code{X.invXX}: Matrix for variance calculations (matrix).
#' }
#' 
#' \strong{Additional Control Parameters for GRAB.Marker()}:
#' \itemize{
#'   \item \code{pVal_covaAdj_Cutoff} (numeric, default: 5e-05): P-value cutoff for covariate adjustment.
#' }
#'
#' \strong{Output file columns}:
#' \describe{
#'   \item{Marker}{Marker identifier (rsID or CHR:POS:REF:ALT).}
#'   \item{Info}{Marker information in format CHR:POS:REF:ALT.}
#'   \item{AltFreq}{Alternative allele frequency in the sample.}
#'   \item{AltCounts}{Total count of alternative alleles.}
#'   \item{MissingRate}{Proportion of missing genotypes.}
#'   \item{Pvalue}{P-value from the score test.}
#'   \item{zScore}{Z-score from the score test.}
#' }
#'
#' @references
#' Bi et al. (2020). Fast and accurate method for genome-wide time-to-event data analysis and its 
#' application to UK Biobank. \doi{10.1016/j.ajhg.2020.06.003}
#'
GRAB.SPACox <- function() {
  .message("?GRAB.SPACox for instructions")
}


checkControl.NullModel.SPACox <- function(traitType, GenoFile, SparseGRMFile, control) {

  if (!traitType %in% c("time-to-event", "Residual")) {
    stop("For 'SPACox' method, only traitType of 'time-to-event' or 'Residual' is supported.")
  }

  if (!is.null(GenoFile)) {
    warning("Argument 'GenoFile' is ignored for method 'SPACox'.")
  }

  if (!is.null(SparseGRMFile)) {
    warning("Argument 'SparseGRMFile' is ignored for method 'SPACox'.")
  }

  default.control <- list(
    range = c(-100, 100),
    length.out = 10000
  )
  control <- updateControl(control, default.control)

  range <- control$range
  if (range[1] >= -50 || range[2] <= 50 || control$length.out <= 1000) {
    stop("We suggest setting argument 'control$range=c(-100,100)' and 'control$length.out=10000'.")
  }

  if (range[2] != -1 * range[1]) {
    stop("range[2] should be -1*range[1]")
  }

  return(list(control = control, optionGRM = NULL))
}


#' Fit SPACox null model from survival outcomes or residuals
#'
#' Computes martingale residuals (or uses provided residuals) and an empirical
#' cumulant generating function (CGF) for SPA-based single-variant tests.
#'
#' @param response Either a \code{survival::Surv} object (time-to-event) or a
#'   numeric residual vector with class \code{"Residual"}.
#' @param designMat Numeric design matrix (n x p) of covariates.
#' @param subjData Vector of subject IDs aligned with rows of \code{designMat}.
#' @param control List with fields such as \code{range} and \code{length.out}
#'   for the CGF grid.
#' @param ... Extra arguments passed to \code{survival::coxph} when
#'   \code{response} is \code{Surv}.
#'
#' @return A list of class \code{"SPACox_NULL_Model"} with elements:
#'   \describe{
#'     \item{N}{Number of subjects.}
#'     \item{mresid}{Martingale residuals (numeric vector).}
#'     \item{cumul}{CGF grid as a matrix with columns t, K0, K1, K2.}
#'     \item{tX}{Transpose of design matrix with intercept (p+1 x n).}
#'     \item{yVec}{Status/event indicator or residual-based response.}
#'     \item{X.invXX}{Projection helper: X %*% solve(t(X) %*% X).}
#'     \item{subjData}{Character vector of subject IDs.}
#'   }
#'
#' @keywords internal
fitNullModel.SPACox <- function(
  response,
  designMat,
  subjData,
  control,
  ...
) {

  if (!(inherits(response, "Surv") || inherits(response, "Residual"))) {
    stop("For SPAcox, the response variable should be of class 'Surv' or 'Residual'.")
  }

  if (inherits(response, "Surv")) {
    formula <- response ~ designMat
    obj.coxph <- survival::coxph(formula, x = TRUE, ...)

    y <- obj.coxph$y
    yVec <- y[, ncol(y)]

    mresid <- obj.coxph$residuals
    Cova <- designMat
  }

  if (inherits(response, "Residual")) {
    yVec <- mresid <- response
    Cova <- designMat
  }

  range <- control$range
  length.out <- control$length.out

  if (length(mresid) != length(subjData)) {
    stop("Please check the consistency between 'formula' and 'subjData'.")
  }

  ### Get the covariate matrix to adjust for genotype
  X <- cbind(1, Cova)
  X.invXX <- X %*% solve(t(X) %*% X)
  tX <- t(X)

  ### calculate empirical CGF for martingale residuals
  idx0 <- qcauchy(1:length.out / (length.out + 1))
  idx1 <- idx0 * max(range) / max(idx0)

  cumul <- NULL
  .message("Calculating empirical CGF for martingale residuals")
  c <- 0
  for (i in idx1) {
    c <- c + 1
    t <- i
    e_resid <- exp(mresid * t)
    M0 <- mean(e_resid)
    M1 <- mean(mresid * e_resid)
    M2 <- mean(mresid^2 * e_resid)
    K0 <- log(M0)
    K1 <- M1 / M0
    K2 <- (M0 * M2 - M1^2) / M0^2
    cumul <- rbind(cumul, c(t, K0, K1, K2))
    if (c %% 1000 == 0) .message("CGF progress: %d/%d", c, length.out)
  }

  re <- list(
    N = length(mresid),
    mresid = mresid,
    cumul = cumul,
    tX = tX,
    yVec = yVec,
    X.invXX = X.invXX,
    subjData = subjData
  )

  class(re) <- "SPACox_NULL_Model"
  return(re)
}


checkControl.Marker.SPACox <- function(control) {

  default.control <- list(
    pVal_covaAdj_Cutoff = 5e-05
  )
  control <- updateControl(control, default.control)

  if (!is.numeric(control$pVal_covaAdj_Cutoff) || control$pVal_covaAdj_Cutoff <= 0) {
    stop("control$pVal_covaAdj_Cutoff should be a numeric value > 0.")
  }

  return(control)
}


setMarker.SPACox <- function(objNull, control) {

  setSPACoxobjInCPP(
    t_cumul = objNull$cumul,                # matrix: Cumulative hazard matrix
    t_mresid = objNull$mresid,              # numeric vector: Martingale residuals
    t_XinvXX = objNull$X.invXX,             # matrix: (X'X)^(-1) for variance calculation
    t_tX = objNull$tX,                      # matrix: Transpose of design matrix X
    t_N = length(objNull$mresid),           # integer: Sample size
    t_pVal_covaAdj_Cutoff = control$pVal_covaAdj_Cutoff,  # numeric: P-value cutoff for covariate adjustment
    t_SPA_Cutoff = control$SPA_Cutoff       # numeric: P-value cutoff for SPA
  )
}


mainMarker.SPACox <- function(
  genoType,
  genoIndex
) {

  OutList <- mainMarkerInCPP(
    t_method = "SPACox",      # character: Statistical method name
    t_genoType = genoType,    # character: "PLINK" or "BGEN"
    t_genoIndex = genoIndex   # integer vector: Genotype indices to analyze
  )

  obj.mainMarker <- data.frame(
    Marker = OutList$markerVec, # marker IDs
    Info = OutList$infoVec, # marker information: CHR:POS:REF:ALT
    AltFreq = OutList$altFreqVec, # alternative allele frequencies
    AltCounts = OutList$altCountsVec, # alternative allele counts
    MissingRate = OutList$missingRateVec, # alternative allele counts
    Pvalue = OutList$pvalVec, # marker-level p-values
    zScore = OutList$zScore
  )

  return(obj.mainMarker)
}
