% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/onehotencoding.R
\name{ohse}
\alias{ohse}
\title{One Hot Smart Encoding (Dummy Variables)}
\usage{
ohse(
  df,
  redundant = FALSE,
  drop = TRUE,
  ignore = NULL,
  dates = FALSE,
  holidays = FALSE,
  country = "Venezuela",
  currency_pair = NA,
  trim = 0,
  limit = 10,
  variance = 0.9,
  other_label = "OTHER",
  sep = "_",
  quiet = FALSE,
  ...
)
}
\arguments{
\item{df}{Dataframe}

\item{redundant}{Boolean. Should we keep redundant columns? i.e. If the
column only has two different values, should we keep both new columns?
Is set to \code{NULL}, only binary variables will dump redundant columns.}

\item{drop}{Boolean. Drop automatically some useless features?}

\item{ignore}{Vector or character. Which column should be ignored?}

\item{dates}{Boolean. Do you want the function to create more features
out of the date/time columns?}

\item{holidays}{Boolean. Include holidays as new columns?}

\item{country}{Character or vector. For which countries should the holidays
be included?}

\item{currency_pair}{Character. Which currency exchange do you
wish to get the history from? i.e, USD/COP, EUR/USD...}

\item{trim}{Integer. Trim names until the nth character}

\item{limit}{Integer. Limit one hot encoding to the n most frequent
values of each column. Set to \code{NA} to ignore argument.}

\item{variance}{Numeric. Drop columns with more than n variance.
Range: 0-1. For example: if a variable contains 91 unique different
values out of 100 observations, this column will be suppressed if
value is set to 0.9}

\item{other_label}{Character. With which text do you wish to replace
the filtered values with?}

\item{sep}{Character. Separator's string}

\item{quiet}{Boolean. Quiet all messages and summaries?}

\item{...}{Additional parameters}
}
\value{
data.frame on which all features are numerical by nature or
transformed with one hot encoding.
}
\description{
This function lets the user automatically transform a dataframe with
categorical columns into numerical by one hot encoding technic.
}
\examples{
data(dft)
dft <- dft[, c(2, 3, 5, 9, 11)]

ohse(dft, limit = 3) \%>\% head(3)
ohse(dft, limit = 3, redundant = NULL) \%>\% head(3)

# Getting rid of columns with no (or too much) variance
dft$no_variance1 <- 0
dft$no_variance2 <- c("A", rep("B", nrow(dft) - 1))
dft$no_variance3 <- as.character(rnorm(nrow(dft)))
dft$no_variance4 <- c(rep("A", 20), round(rnorm(nrow(dft) - 20), 4))
ohse(dft, limit = 3) \%>\% head(3)
}
\seealso{
Other Data Wrangling: 
\code{\link{balance_data}()},
\code{\link{categ_reducer}()},
\code{\link{cleanText}()},
\code{\link{date_cuts}()},
\code{\link{date_feats}()},
\code{\link{formatNum}()},
\code{\link{holidays}()},
\code{\link{impute}()},
\code{\link{left}()},
\code{\link{normalize}()},
\code{\link{ohe_commas}()},
\code{\link{removenacols}()},
\code{\link{replaceall}()},
\code{\link{textFeats}()},
\code{\link{textTokenizer}()},
\code{\link{vector2text}()},
\code{\link{year_month}()}

Other Feature Engineering: 
\code{\link{date_feats}()},
\code{\link{holidays}()}

Other One Hot Encoding: 
\code{\link{date_feats}()},
\code{\link{holidays}()},
\code{\link{ohe_commas}()}
}
\concept{Data Wrangling}
\concept{Feature Engineering}
\concept{One Hot Encoding}
