% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/preprocessing.r
\name{pre_impute_knn}
\alias{pre_impute_knn}
\title{Nearest neighbors imputation}
\usage{
pre_impute_knn(data, k = 0.05, distance_matrix)
}
\arguments{
\item{data}{Fitting and testing data sets, as returned by
\code{\link{pre_split}}.}

\item{k}{Number of nearest neighbors to calculate mean from. Set to < 1 to
specify a fraction.}

\item{distance_matrix}{A matrix, \code{\link{dist}} object or
  \code{"auto"}. Notice that \code{"auto"} will recalculate the distance
  matrix in each fold, which is only meaningful in case the features of
  \code{x} vary between folds. Otherwise you are just wasting time.}
}
\description{
Nearest neighbor methods needs to have a distance matrix of the dataset it works on.
When doing repeated model fittings on subsets of the entire dataset it is
unnecessary to recalculate it every time, therefore this function requires
the user to manually calculate it prior to resampling and supply it in a
wrapper function.
}
\details{
Features with fewer than \code{k} non-missing values will be removed
automatically.
}
\examples{
x <- iris[-5]
x[sample(nrow(x), 30), 3] <- NA
my.dist <- dist(x)
evaluate(modeling_procedure("lda"), x = x, y = iris$Species,
    pre_process = function(...){
        pre_split(...) \%>\% pre_impute_knn(k = 4, distance_matrix = my.dist)
    }
)
}
\author{
Christofer \enc{Bäcklin}{Backlin}
}

