\name{crrstep}
\alias{crrstep}
\alias{print.crrstep}
\alias{summary.crrstep}
\alias{print.summary.crrstep}
\alias{coef.crrstep}
\alias{vcov.crrstep}
\alias{confint.crrstep}
\alias{logLik.crrstep}
\alias{AIC.crrstep}
\alias{BIC.crrstep}

\title{Stepwise Regression for Fine-Gray Competing Risks Model}

\description{
Performs forward and backward stepwise variable selection for the Fine-Gray subdistribution hazard model in competing risks analysis. The procedure uses AIC, BIC, or BICcr as selection criteria. BICcr uses a penalty of \eqn{k = \log(d)}, where \eqn{d} is the number of events of interest.
}

\usage{
crrstep(formula, scope.min = ~1, etype, ..., subset, data, 
        direction = c("backward", "forward"), 
        criterion = c("AIC", "BICcr", "BIC"), 
        crr.object = FALSE, trace = TRUE, steps = 100)

\method{print}{crrstep}(x, digits = max(3L, getOption("digits") - 3L), ...)

\method{summary}{crrstep}(object, conf.level = 0.95, ...)

\method{print}{summary.crrstep}(x, digits = max(3L, getOption("digits") - 3L),
        signif.stars = getOption("show.signif.stars"), ...)

\method{coef}{crrstep}(object, ...)

\method{vcov}{crrstep}(object, ...)

\method{confint}{crrstep}(object, parm, level = 0.95, ...)

\method{logLik}{crrstep}(object, ...)

\method{AIC}{crrstep}(object, ..., k = 2)

\method{BIC}{crrstep}(object, ...)
}

\arguments{
  \item{formula}{A formula object where the left-hand side (LHS) is the failure time variable and the right-hand side (RHS) specifies the linear predictors (e.g., \code{time ~ x1 + x2 + x3}). Factors, interactions, and polynomial terms are supported.}
  
  \item{scope.min}{A one-sided formula specifying the minimum model (lower scope) for backward selection or the starting model for forward selection. Default is \code{~1} (null model).}
  
  \item{etype}{An integer variable denoting the event type for each observation. By default, 1 indicates the event of interest, 2 indicates competing events, and 0 indicates censoring. These can be modified using \code{failcode} and \code{cencode} arguments passed to \code{\link[cmprsk]{crr}}.}
  
  \item{\dots}{Additional arguments passed to \code{\link[cmprsk]{crr}}. Important arguments include:
    \describe{
      \item{\code{failcode}}{Integer value denoting the event of interest (default is 1).}
      \item{\code{cencode}}{Integer value denoting censoring (default is 0).}
    }
    For \code{print} and \code{summary} methods, additional arguments are passed to \code{\link{printCoefmat}}.
  }
  
  \item{subset}{An optional vector specifying a subset of observations to use in the analysis.}
  
  \item{data}{A data frame containing all variables referenced in the formula and \code{etype}. Only complete cases are used; rows with missing values in any predictor are automatically removed.}
  
  \item{direction}{Character string specifying the direction of stepwise search. Either \code{"backward"} (default) starting from the full model, or \code{"forward"} starting from \code{scope.min}.}
  
  \item{criterion}{Character string specifying the model selection criterion:
    \describe{
      \item{\code{"AIC"}}{Akaike Information Criterion (default): \eqn{-2 \log L + 2p}}
      \item{\code{"BIC"}}{Bayesian Information Criterion: \eqn{-2 \log L + p \log(n)}}
      \item{\code{"BICcr"}}{BIC for competing risks: \eqn{-2 \log L + p \log(d)}}
    }
    where \eqn{L} is the partial likelihood, \eqn{p} is the number of parameters, \eqn{n} is the sample size, and \eqn{d} is the number of events of interest.
  }
  
  \item{crr.object}{Logical. If \code{TRUE}, returns the \code{crr} object from the final model instead of a \code{crrstep} object. Default is \code{FALSE}.}
  
  \item{trace}{Logical. If \code{TRUE} (default), displays the stepwise model selection process including criterion values at each step.}
  
  \item{steps}{Maximum number of steps allowed in the stepwise selection. Default is 100.}
  
  \item{x}{An object of class \code{"crrstep"} or \code{"summary.crrstep"}.}
  
  \item{object}{An object of class \code{"crrstep"}.}
  
  \item{digits}{Integer specifying the number of significant digits to display.}
  
  \item{conf.level}{Numeric value specifying the confidence level for confidence intervals. Default is 0.95.}
  
  \item{signif.stars}{Logical. If \code{TRUE}, significance stars are printed alongside p-values.}
  
  \item{parm}{A specification of which parameters to compute confidence intervals for. Can be a vector of numbers (indices) or names. If missing, all parameters are included.}
  
  \item{level}{The confidence level for \code{confint}. Default is 0.95.}
  
  \item{k}{Numeric penalty parameter for \code{AIC}. Default is 2.}
}

\details{
This function performs stepwise model selection for the Fine-Gray competing risks regression model implemented in \code{\link[cmprsk]{crr}}. The implementation is based on the approach of \code{\link[MASS]{stepAIC}} from the \pkg{MASS} package, adapted for the competing risks setting.

The function properly handles:
\itemize{
  \item Factor variables (automatically expanded to dummy variables)
  \item Interaction terms (e.g., \code{x1:x2}, \code{x1*x2})
  \item Polynomial terms (e.g., \code{I(x^2)})
  \item Complex terms (e.g., \code{I(x^2):factor})
}

The BICcr criterion, proposed by Volinsky and Raftery (2000) and adapted for competing risks by Kuk and Varadhan (2013), uses the number of events of interest rather than the total sample size in the penalty term. This is appropriate because the effective sample size in survival analysis is determined by the number of events, not the total number of observations.

Missing values are handled by complete case analysis. Observations with missing values in any predictor variable are removed before model fitting, and a message is displayed if \code{trace = TRUE}.
}

\value{
If \code{crr.object = TRUE}, returns the \code{\link[cmprsk]{crr}} object from the final selected model.

Otherwise, returns an object of class \code{"crrstep"} with the following components:

  \item{call}{The matched function call.}
  \item{formula}{The formula for the final selected model.}
  \item{direction}{The direction of stepwise selection used.}
  \item{criterion}{The selection criterion used.}
  \item{criterion_value}{The value of the selection criterion for the final model.}
  \item{coefficients}{Named numeric vector of estimated coefficients.}
  \item{std.error}{Standard errors of the coefficients.}
  \item{zvalue}{Wald z-statistics (\code{coefficients / std.error}).}
  \item{pvalue}{Two-sided p-values based on the normal distribution.}
  \item{conf.int}{Matrix of confidence intervals for the coefficients.}
  \item{conf.level}{The confidence level used for \code{conf.int}.}
  \item{loglik}{Log partial likelihood of the final model.}
  \item{loglik_null}{Log partial likelihood of the null model.}
  \item{n}{Number of observations used.}
  \item{nevent}{Number of events of interest.}
  \item{converged}{Logical indicating whether the \code{crr} algorithm converged.}
  \item{is_null_model}{Logical indicating whether the final model is the null model.}
  \item{crr_fit}{The \code{crr} object from the final model.}

The \code{summary} method returns an object of class \code{"summary.crrstep"} containing:
  \item{call}{The matched function call.}
  \item{formula}{The formula for the final selected model.}
  \item{direction}{The direction of stepwise selection.}
  \item{criterion}{The selection criterion used.}
  \item{criterion_value}{The criterion value for the final model.}
  \item{coefficients}{Formatted coefficient table with estimates, hazard ratios, standard errors, z-values, and p-values.}
  \item{conf.int}{Confidence intervals for hazard ratios.}
  \item{conf.level}{The confidence level used.}
  \item{loglik}{Log partial likelihood of the final model.}
  \item{loglik_null}{Log partial likelihood of the null model.}
  \item{lr_test}{List containing the likelihood ratio test statistic, degrees of freedom, and p-value.}
  \item{n}{Number of observations.}
  \item{nevent}{Number of events of interest.}
  \item{converged}{Logical indicating convergence.}
  \item{is_null_model}{Logical indicating if the null model was selected.}
}

\references{
Fine, J. P. and Gray, R. J. (1999). A proportional hazards model for the subdistribution of a competing risk. \emph{Journal of the American Statistical Association}, 94(446), 496-509.

Volinsky, C. T. and Raftery, A. E. (2000). Bayesian information criterion for censored survival models. \emph{Biometrics}, 56(1), 256-262.

Kuk, D. and Varadhan, R. (2013). Model selection in competing risks regression. \emph{Statistics in Medicine}, 32(18), 3077-3088.
}

\author{
Ravi Varadhan and Deborah Kuk
}

\seealso{
\code{\link[cmprsk]{crr}} for the underlying Fine-Gray model,
\code{\link[MASS]{stepAIC}} for stepwise selection in other model classes,
\code{\link{AIC}}, \code{\link{BIC}} for information criteria
}

\examples{
# Load required package
library(cmprsk)

# Simulate example data
set.seed(123)
n <- 500
ftime <- rexp(n)
fstatus <- sample(0:2, n, replace = TRUE)
cov1 <- matrix(runif(5 * n), nrow = n)
x6 <- as.factor(sample(3, size = n, replace = TRUE))
x7 <- as.factor(sample(5, size = n, replace = TRUE))
cov1 <- cbind(cov1, x6, x7)
dimnames(cov1)[[2]] <- c('x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7')
mydata <- as.data.frame(cov1)
mydata$ftime <- ftime
mydata$fstatus <- fstatus

# Define formula with factors
formula1 <- ftime ~ x1 + x2 + x3 + x4 + x5 + as.factor(x6) + as.factor(x7)

# Backward selection with BIC
fit1 <- crrstep(formula1, etype = fstatus, data = mydata, 
                direction = "backward", criterion = "BIC")
print(fit1)

# Forward selection with AIC (event of interest = 2)
fit2 <- crrstep(formula1, etype = fstatus, data = mydata, 
                direction = "forward", criterion = "AIC",
                failcode = 2, trace = FALSE)
print(fit2)
summary(fit2)

# Extract model components
coef(fit2)
confint(fit2)
confint(fit2, level = 0.90)
AIC(fit2)
BIC(fit2)
logLik(fit2)
vcov(fit2)

# Model with interactions
formula2 <- ftime ~ x1 + x2 + x3 + x4 + x4:as.factor(x6) + x5:as.factor(x7)
fit3 <- crrstep(formula2, etype = fstatus, data = mydata, 
                direction = "backward", criterion = "AIC",
                trace = FALSE)
print(fit3)
summary(fit3)

# Return crr object for further analysis
fit_crr <- crrstep(formula1, etype = fstatus, data = mydata,
                   direction = "backward", criterion = "BICcr",
                   crr.object = TRUE, trace = FALSE)
}

\keyword{regression}
\keyword{survival}