% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/funKerasGeneric.R
\name{genericDataPrep}
\alias{genericDataPrep}
\title{Create an input pipeline using tfdatasets}
\usage{
genericDataPrep(
  data,
  batch_size = 32,
  minLevelSizeEmbedding = 100,
  embeddingDim = NULL
)
}
\arguments{
\item{data}{data. List, e.g., df$trainCensus, df$testGeneric, and df$valCensus data)}

\item{batch_size}{batch size. Default: 32}

\item{minLevelSizeEmbedding}{integer. Embedding will be used for
factor variables with more than \code{minLevelSizeEmbedding} levels. Default: \code{100}.}

\item{embeddingDim}{integer. Dimension used for embedding. Default: \code{floor(log(minLevelSizeEmbedding))}.}
}
\value{
a fitted \code{FeatureSpec} object and the hold-out testGeneric (=data$testGeneric).
This is returned as the follwoing list.
\describe{
  \item{\code{train_ds_generic}}{train}
  \item{\code{val_ds_generic}}{validation}
  \item{\code{test_ds_generic}}{test}
  \item{\code{specGeneric_prep}}{feature spec object}
  \item{\code{testGeneric}}{data$testGeneric}
}
}
\description{
Create an input pipeline using tfdatasets
}
\examples{
\donttest{
### These examples require an activated Python environment as described in
### Bartz-Beielstein, T., Rehbach, F., Sen, A., and Zaefferer, M.:
### Surrogate Model Based Hyperparameter Tuning for Deep Learning with SPOT,
### June 2021. http://arxiv.org/abs/2105.14625.
PYTHON_RETICULATE <- FALSE
if(PYTHON_RETICULATE){
target <- "age"
batch_size <- 32
prop <- 2/3
cachedir <- "oml.cache"
dfCensus <- getDataCensus(target = target,
nobs = 1000, cachedir = cachedir, cache.only=FALSE)
data <- getGenericTrainValTestData(dfGeneric = dfCensus,
prop = prop)
specList <- genericDataPrep(data=data, batch_size = batch_size)
## Call iterator:
require(magrittr)
specList$train_ds_generic \%>\%
  reticulate::as_iterator() \%>\%
   reticulate::iter_next()
}
}

}
