% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/modelling.R
\name{prepare_data_for_modelling}
\alias{prepare_data_for_modelling}
\title{Prepare Data for Training a model}
\usage{
prepare_data_for_modelling(env_data, params)
}
\arguments{
\item{env_data}{A data table in long format.
Must include the following columns:
\describe{
\item{Station}{Station identifier for the data.}
\item{Komponente}{The environmental component being measured
(e.g., temperature, NO2).}
\item{Wert}{The measured value of the component.}
\item{date}{Timestamp as \code{POSIXct} object in \verb{YYYY-MM-DD HH:MM:SS} format.}
\item{Komponente_txt}{A textual description of the component.}
}}

\item{params}{A list of modelling parameters loaded from \code{params.yaml}.
Must include:
\describe{
\item{meteo_variables}{A vector of meteorological variable names.}
\item{target}{The name of the target variable.}
}}
}
\value{
A \code{data.table} in wide format, with columns:
\code{date}, one column per component, and temporal features
like \code{date_unix}, \code{day_julian}, \code{weekday}, and \code{hour}.
}
\description{
Prepares environmental data by filtering for relevant components,
converting the data to a wide format, and adding temporal features. Should be
called before
\code{\link[ubair:split_data_counterfactual]{split_data_counterfactual()}}
}
\examples{
env_data <- data.table::data.table(
  Station = c("StationA", "StationA", "StationA"),
  Komponente = c("NO2", "TMP", "NO2"),
  Wert = c(50, 20, 40),
  date = as.POSIXct(c("2023-01-01 10:00:00", "2023-01-01 11:00:00", "2023-01-02 12:00:00"))
)
params <- list(meteo_variables = c("TMP"), target = "NO2")
prepared_data <- prepare_data_for_modelling(env_data, params)
print(prepared_data)

}
