% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/score-splitting.R
\name{split_cp}
\alias{split_cp}
\title{Split Samples And Predict Class Probability (Sample Membership)}
\usage{
split_cp(x_train, x_test, n_trees = 500L, response_name = "label")
}
\arguments{
\item{x_train}{Training (reference) sample.}

\item{x_test}{Test sample.}

\item{n_trees}{The number of trees in random forest.}

\item{response_name}{The column name of the categorical outcome to predict.}
}
\value{
A named list or object of class \code{outlier.test} containing:
\itemize{
   \item \code{train}: vector of scores in training set
   \item \code{test}: vector of scores in test set
}
}
\description{
Predict class probability using random forest with the \pkg{ranger}
package. The prefix \emph{cp} stands for class probability, which reflects
sample membership between training and test set. This function is useful to
test for dataset shift via classifier performance to mimic tests of equal
distribution.
}
\details{
\code{split_cp} fits a classifier to discriminate between training and test
sets. It splits training and test sets into half samples so that the first
halves are for model fitting and the second halves, for out-of-sample
predictions. As a result, estimating the p-value can take advantage of the
asymptotic null distribution.
}
\section{Notes}{

See the docs for \code{score_cp} for more information. \code{split_cp} uses
sample splitting (half samples), rather than out-of-bag predictions as in
\code{score_cp}, for inference. Rinaldo et al. (2019) discusses how sample
splitting can be used for valid inference (p-value estimation).
}

\examples{
\donttest{
library(dsos)
set.seed(12345)
data(iris)
setosa <- iris[1:50, 1:4] # Training sample: Species == 'setosa'
versicolor <- iris[51:100, 1:4] # Test sample: Species == 'versicolor'
outlier_scores <- split_cp(setosa, versicolor, response_name = "label")
str(outlier_scores)
}

}
\references{
Rinaldo, A., Wasserman, L., & G’Sell, M. (2019).
\emph{Bootstrapping and sample splitting for high-dimensional, assumption-lean inference}.
The Annals of Statistics, 47(6), 3438-3469.
}
\seealso{
[score_cp()] for the out-of-bag variant, rather than sample splitting.
}
\concept{splitting}
