% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quantforesterror.R
\name{quantForestError}
\alias{quantForestError}
\alias{forestError}
\title{Quantify random forest prediction error}
\usage{
quantForestError(
  forest,
  X.train,
  X.test,
  Y.train = NULL,
  what = c("mspe", "bias", "interval", "p.error", "q.error"),
  alpha = 0.05,
  n.cores = 1
)
}
\arguments{
\item{forest}{The random forest object being used for prediction.}

\item{X.train}{A \code{matrix} or \code{data.frame} with the observations
that were used to train \code{forest}; each row should be an observation,
and each column should be a predictor variable.}

\item{X.test}{A \code{matrix} or \code{data.frame} with the observations to
be predicted; each row should be an observation, and each column should be
a predictor variable.}

\item{Y.train}{A vector of the responses of the observations that were used
to train \code{forest}. Required if \code{forest} was created using
\code{ranger}, but not if \code{forest} was created using \code{randomForest},
\code{randomForestSRC}, or \code{quantregForest}.}

\item{what}{A vector of characters indicating what estimates are desired.
Possible options are conditional mean squared prediction errors (\code{"mspe"}),
conditional biases (\code{"bias"}), conditional prediction intervals (\code{"interval"}),
conditional error distribution functions (\code{"p.error"}), and
conditional error quantile functions (\code{"q.error"}).}

\item{alpha}{A vector of type-I error rates desired for the conditional prediction
intervals; required if \code{"interval"} is included in \code{what}.}

\item{n.cores}{Number of cores to use (for parallel computation in \code{ranger}).}
}
\value{
A \code{data.frame} with one or more of the following columns, as described
  in the details section:

  \item{pred}{The random forest predictions of the test observations}
  \item{mspe}{The estimated conditional mean squared prediction errors of
  the random forest predictions}
  \item{bias}{The estimated conditional biases of the random forest
  predictions}
  \item{lower_alpha}{The estimated lower bounds of the conditional alpha-level
  prediction intervals for the test observations}
  \item{upper_alpha}{The estimated upper bounds of the conditional alpha-level
  prediction intervals for the test observations}

  In addition, one or both of the following functions, as described in the
  details section:

  \item{perror}{The estimated cumulative distribution functions of the
  conditional error distributions associated with the test predictions}
  \item{qerror}{The estimated quantile functions of the conditional error
  distributions associated with the test predictions}
}
\description{
Estimates the conditional mean squared prediction errors, conditional biases,
conditional prediction intervals, and conditional error distributions of
random forest predictions.
}
\details{
This function accepts regression random forests built using the \code{randomForest},
\code{ranger}, \code{randomForestSRC}, and \code{quantregForest} packages.
When training the random forest using \code{randomForest}, \code{ranger}, or
\code{quantregForest}, \code{keep.inbag} must be set to \code{TRUE}. When
training the random forest using \code{randomForestSRC}, \code{membership}
must be set to \code{TRUE}.

The predictions computed by \code{ranger} can be parallelized by setting the
value of \code{n.cores} to be greater than 1.

The random forest predictions are always returned as a \code{data.frame}. Additional
columns are included in the \code{data.frame} depending on the user's selections in
the argument \code{what}. In particular, including \code{"mspe"} in \code{what}
will add an additional column with the conditional mean squared prediction
error of each test prediction to the \code{data.frame}; including \code{"bias"} in
\code{what} will add an additional column with the conditional bias of each test
prediction to the \code{data.frame}; and including \code{"interval"} in \code{what}
will add to the \code{data.frame} additional columns with the lower and
upper bounds of conditional prediction intervals for each test prediction.

If \code{"p.error"} or \code{"q.error"} is included in \code{what}, then a
list will be returned as output. The first element of the list, named
\code{"estimates"}, is the \code{data.frame} described in the above paragraph. The
other one or two elements of the list are the estimated cumulative distribution
functions (\code{perror}) and/or the estimated quantile functions (\code{qerror})
of the conditional error distributions associated with the test predictions.
}
\examples{
# load data
data(airquality)

# remove observations with missing predictor variable values
airquality <- airquality[complete.cases(airquality), ]

# get number of observations and the response column index
n <- nrow(airquality)
response.col <- 1

# split data into training and test sets
train.ind <- sample(1:n, n * 0.9, replace = FALSE)
Xtrain <- airquality[train.ind, -response.col]
Ytrain <- airquality[train.ind, response.col]
Xtest <- airquality[-train.ind, -response.col]
Ytest <- airquality[-train.ind, response.col]

# fit random forest to the training data
rf <- randomForest::randomForest(Xtrain, Ytrain, nodesize = 5,
                                 ntree = 500,
                                 keep.inbag = TRUE)

# estimate conditional mean squared prediction errors,
# biases, prediction intervals, and error distribution
# functions for the test observations
output <- quantForestError(rf, Xtrain, Xtest,
                           alpha = 0.05)

# estimate just the conditional mean squared prediction errors
# and prediction intervals for the test observations
output <- quantForestError(rf, Xtrain, Xtest,
                           what = c("mspe", "interval"),
                           alpha = 0.05)

# estimate just the conditional error distribution
# functions for the test observations
output <- quantForestError(rf, Xtrain, Xtest,
                           what = c("p.error", "q.error"))
}
\seealso{
\code{\link{perror}}, \code{\link{qerror}}
}
\author{
Benjamin Lu \code{<b.lu@berkeley.edu>}; Johanna Hardin \code{<jo.hardin@pomona.edu>}
}
