% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extract_clean_data.R
\name{extract_clean_data}
\alias{extract_clean_data}
\title{Extract final clean data using either absolute or best method generated outliers.}
\usage{
extract_clean_data(
  refdata,
  outliers,
  mode = "abs",
  var_col = NULL,
  threshold = NULL,
  warn = FALSE,
  verbose = FALSE,
  autothreshold = FALSE,
  pabs = 0.1,
  loess = FALSE,
  outlier_to_NA = FALSE,
  cutoff = 0.6
)
}
\arguments{
\item{refdata}{\code{dataframe}. The reference data for the species used in outlier detection.}

\item{outliers}{\code{string}. Output from the outlier detection process.}

\item{mode}{\code{character}. Either \code{abs} to use absolute outliers to filter data or \code{best} to outliers from best method.}

\item{var_col}{\code{string}. A parameter to be used if the \code{data} is a data frame and the user must indicate the column wih species names.}

\item{threshold}{\code{numeric}. Value to consider whether the outlier is an absolute outlier or not.}

\item{warn}{\code{logical}. If \strong{FALSE}, warning on whether absolute outliers obtained at a low threshold is indicated. Default \strong{TRUE}.}

\item{verbose}{\code{logical}. Produces messages or not. Default \strong{FALSE}.}

\item{autothreshold}{\code{vector}. Identifies the threshold with mean number of absolute outliers.The search is limited within 0.51 to 1 since thresholds less than
are deemed inappropriate for identifying absolute outliers. The autothreshold is used when \code{threshold} is set to \code{NULL}.}

\item{pabs}{\code{numeric}. Percentage of outliers allowed to be extracted from the data. If \code{best} is used to extract outliers and the \code{pabs} is exceeded,
the absolute outliers are removed instead. This because some records  in the best methods are repeated and they will likely to remove true values as outliers.}

\item{loess}{\code{logical}. Set to \code{TRUE} to use loess threshold optimization to extract clean data.}

\item{outlier_to_NA}{\code{logical} If \code{TRUE} a clean dataset will have outliers replaced with NAs.
       This parameter is experimented to ouput dataframe when multiple variables of concerns are considered
       during outlier detection.

###param multiple TRUE for multiple species and FALSE for single species considered during outlier detection.}

\item{cutoff}{\code{numeric}. Ranging from 0.5 to 0.8 indicating the cutoff to initiate the
LOESS model to optimize the identification of absolute outliers.}
}
\value{
Either a \code{list} or \code{dataframe} of cleaned records for multiple species.
}
\description{
Extract final clean data using either absolute or best method generated outliers.
}
\examples{
\donttest{
data(jdsdata)
data(efidata)
matchdata <- match_datasets(datasets = list(jds = jdsdata, efi = efidata),
                            lats = 'lat',
                            lons = 'lon',
                            species = c('speciesname','scientificName'),
                            country= c('JDS4_site_ID'),
                            date=c('sampling_date', 'Date'))


danube <- system.file('extdata/danube.shp.zip', package='specleanr')

db <- sf::st_read(danube, quiet=TRUE)


worldclim <- terra::rast(system.file('extdata/worldclim.tiff', package='specleanr'))

rdata <- pred_extract(data = matchdata,
                      raster= worldclim ,
                      lat = 'decimalLatitude',
                      lon= 'decimalLongitude',
                      colsp = 'species',
                      bbox = db,
                      minpts = 10,
                      list=TRUE,
                      merge=FALSE)


out_df <- multidetect(data = rdata, multiple = TRUE,
                      var = 'bio6',
                      output = 'outlier',
                      exclude = c('x','y'),
                      methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel'))

#extracting use the absolute method for one species

extractabs <- extract_clean_data(refdata = rdata, outliers = out_df,
                                 mode = 'abs', threshold = 0.6,
                                 autothreshold = FALSE)

bestmout_bm <- extract_clean_data(refdata = rdata, outliers = out_df,
                                  mode = 'best', threshold = 0.6,
                                 autothreshold = FALSE)
}

}
\seealso{
\code{\link{search_threshold}}
}
