% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/classify_data.R
\name{classify_data}
\alias{classify_data}
\title{Extract final clean data using either absolute or best method generated outliers.}
\usage{
classify_data(
  refdata,
  outliers,
  var_col = NULL,
  threshold = 0.1,
  warn = FALSE,
  verbose = TRUE,
  classify = "med",
  EIF = FALSE
)
}
\arguments{
\item{refdata}{\code{dataframe}. The reference data for the species used in outlier detection.}

\item{outliers}{\code{string}. Output from the outlier detection process.}

\item{var_col}{\code{string}. A parameter to be used if the \code{data} is a data frame and the user must indicate the column with species names.}

\item{threshold}{\code{numeric}. Value to consider whether the outlier is an absolute outlier or not.}

\item{warn}{\code{logical}. If \strong{FALSE}, warning on whether absolute outliers obtained at a low threshold is indicated. Default \strong{TRUE}.}

\item{verbose}{\code{logical}. Produces messages or not. Default \strong{FALSE}.}

\item{classify}{\code{string}. Categorize data base on the correlation coefficient manner based on \code{Akoglu 2018}. For
more information check in the details section.}

\item{EIF}{\code{logical} To calculate the empirical influence function for each value.}
}
\value{
Either a \code{list} or \code{dataframe} of cleaned records for multiple species.
}
\description{
Extract final clean data using either absolute or best method generated outliers.
}
\details{
Outlier cluster weights were based on statistical classification of coefficients mostly for correlation based on \code{Akoglu 2018}.
They are classified based on three naming standards, namely Dancey & Reidy (Physchology), Quinni piac University (Politics) and Chan YH medicine.
All classifications have been used in the function and each affects the data clusters. The default is Chan YH (medicine).
}
\examples{

\donttest{

data(jdsdata)
data(efidata)
matchdata <- match_datasets(datasets = list(jds = jdsdata, efi = efidata),
                            lats = 'lat',
                            lons = 'lon',
                            species = c('speciesname','scientificName'),
                            country= c('JDS4_site_ID'),
                            date=c('sampling_date', 'Date'))


danube <- system.file('extdata/danube.shp.zip', package='specleanr')

db <- sf::st_read(danube, quiet=TRUE)


worldclim <- terra::rast(system.file('extdata/worldclim.tiff', package='specleanr'))

rdata <- pred_extract(data = matchdata,
                      raster= worldclim ,
                      lat = 'decimalLatitude',
                      lon= 'decimalLongitude',
                      colsp = 'species',
                      bbox = db,
                      minpts = 10,
                      list=TRUE,
                      merge=FALSE)


out_df <- multidetect(data = rdata, multiple = TRUE,
                      var = 'bio6',
                      output = 'outlier',
                      exclude = c('x','y'),
                      methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel'))

#extracting use the absolute method for one species

extractabs <- classify_data(refdata = rdata, outliers = out_df)
}

}
\references{
Akoglu, H. 2018. User’s guide to correlation coefficients. - Turk J Emerg Med 18: 91–93.
}
\seealso{
\code{\link{search_threshold}}
}
