% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/policy_cmab_epsilon_greedy.R
\name{ContextualEpsilonGreedy}
\alias{ContextualEpsilonGreedy}
\title{Policy: ContextualEpsilonGreedy with unique linear models}
\description{
Policy: ContextualEpsilonGreedy with unique linear models
}
\section{Usage}{

\preformatted{
policy <- ContextualEpsilonGreedy(epsilon = 0.1)
}
}

\section{Arguments}{


\describe{
\item{\code{epsilon}}{
double, a positive real value R+
}
}
}

\section{Parameters}{


\describe{
\item{\code{A}}{
d*d identity matrix
}
\item{\code{b}}{
a zero vector of length d
}
}
}

\section{Methods}{


\describe{
\item{\code{new(epsilon = 0.1)}}{ Generates a new \code{ContextualEpsilonGreedy} object. Arguments are defined in the Argument section above.}
}

\describe{
\item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
The parameters defined here can later be accessed by arm index in the following way:
\code{theta[[index_of_arm]]$parameter_name}
}
}

\describe{
\item{\code{get_action(context)}}{
here, a policy decides which arm to choose, based on the current values
of its parameters and, potentially, the current context.
}
}

\describe{
\item{\code{set_reward(reward, context)}}{
in \code{set_reward(reward, context)}, a policy updates its parameter values
based on the reward received, and, potentially, the current context.
}
}
}

\references{
refhere
}
\seealso{
Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
\code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}

Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},  \code{\link{OfflineReplayEvaluatorBandit}}

Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualThompsonSamplingPolicy}}
}
