% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mHMM.R
\name{mHMM}
\alias{mHMM}
\title{Multilevel hidden  Markov model using Bayesian estimation}
\usage{
mHMM(
  s_data,
  gen,
  xx = NULL,
  start_val,
  mcmc,
  return_path = FALSE,
  print_iter,
  show_progress = TRUE,
  gamma_hyp_prior = NULL,
  emiss_hyp_prior = NULL,
  gamma_sampler = NULL,
  emiss_sampler = NULL
)
}
\arguments{
\item{s_data}{A matrix containing the observations to be modeled, where the
rows represent the observations over time. In \code{s_data}, the first
column indicates subject id number. Hence, the id number is repeated over
rows equal to the number of observations for that subject. The subsequent
columns contain the dependent variable(s). Note that the dependent
variables have to be numeric, i.e., they cannot be a (set of) factor
variable(s). The total number of rows are equal to the sum over the number
of observations of each subject, and the number of columns are equal to the
number of dependent variables (\code{n_dep}) + 1. The number of
observations can vary over subjects.}

\item{gen}{List containing the following elements denoting the general model
properties:
\itemize{\item{\code{m}: numeric vector with length 1 denoting the number
of hidden states}
\item{\code{n_dep}: numeric vector with length 1 denoting the
number of dependent variables}
\item{\code{q_emiss}: numeric vector with length \code{n_dep} denoting the
number of observed categories for the categorical emission distribution
for each of the dependent variables.}}}

\item{xx}{An optional list of (level 2) covariates to predict the transition
  matrix and/or the emission probabilities. Level 2 covariate(s) means that
  there is one observation per subject of each covariate. The first element
  in the list \code{xx} is used to predict the transition matrix. Subsequent
  elements in the list are used to predict the emission distribution of (each
  of) the dependent variable(s). Each element in the list is a matrix, with
  the number of rows equal to the number of subjects. The first column of
  each matrix represents the intercept, that is, a column only consisting of
  ones. Subsequent columns correspond to covariates used to predict the
  transition matrix / emission distribution. See \emph{Details} for more
  information on the use of covariates.

  If \code{xx} is omitted completely, \code{xx} defaults to \code{NULL},
  resembling no covariates. Specific elements in the list can also be left
  empty (i.e., set to \code{NULL}) to signify that either the transition
  probability matrix or a specific emission distribution is not predicted by
  covariates.}

\item{start_val}{List containing the start values for the transition
probability matrix gamma and the emission distribution(s). The first
element of the list contains a \code{m} by \code{m} matrix with the start
values for gamma. The subsequent elements contain \code{m} by
\code{q_emiss[k]} matrices for the start values for each of the \code{k} in
\code{n_dep} emission distribution(s). Note that \code{start_val} should not contain
nested lists (i.e., lists within lists).}

\item{mcmc}{List of Markov chain Monte Carlo (MCMC) arguments, containing the
following elements:
\itemize{\item{\code{J}: numeric vector with length 1 denoting the number
of iterations of the MCMC algorithm}
\item{\code{burn_in}: numeric vector with length 1 denoting the
burn-in period for the MCMC algorithm.}}}

\item{return_path}{A logical scalar. Should the sampled state sequence
obtained at each iteration and for each subject be returned by the function
(\code{sample_path = TRUE}) or not (\code{sample_path = FALSE}). Note that
the sampled state sequence is quite a large object, hence the default
setting is \code{sample_path = FALSE}. Can be used for local decoding
purposes.}

\item{print_iter}{The argument print_iter is deprecated; please use
\code{show_progress} instead to show the progress of the algorithm.}

\item{show_progress}{A logical scaler. Should the function show a text
progress bar in the \code{R} console to represent the progress of the
algorithm (\code{show_progress = TRUE}) or not (\code{show_progress =
FALSE}). Defaults to \code{show_progress = TRUE}.}

\item{gamma_hyp_prior}{An optional object of class \code{mHMM_prior_gamma}
containing user specified parameter values for the hyper-prior distribution
on the transition probability matrix gamma, generated by the function
\code{\link{prior_gamma}}.}

\item{emiss_hyp_prior}{An optional object of the class
\code{mHMM_prior_emiss} containing user specified parameter values for the
hyper-prior distribution on categorical the emission distribution,
generated by the function \code{\link{prior_emiss_cat}}.}

\item{gamma_sampler}{An optional object of the class \code{mHMM_pdRW_gamma}
containing user specified settings for the proposal distribution of the
random walk (RW) Metropolis sampler on the subject level transition
probability matrix parameters, generated by the function
\code{\link{pd_RW_gamma}}.}

\item{emiss_sampler}{An optional object of the class \code{mHMM_pdRW_emiss}
containing user specified settings for the proposal distribution of the
random walk (RW) Metropolis sampler on the subject level emission
distribution(s) parameters, generated by the function
\code{\link{pd_RW_emiss_cat}}.}
}
\value{
\code{mHMM} returns an object of class \code{mHMM}, which has
  \code{print} and \code{summary} methods to see the results.
  The object contains the following components:
  \describe{
  \item{\code{PD_subj}}{A list containing one matrix per subject with the
  subject level parameter estimates and the log likelihood over the
  iterations of the hybrid Metropolis within Gibbs sampler. The iterations of
  the sampler are contained in the rows, and the columns contain the subject
  level (parameter) estimates of subsequently the emission probabilities, the
  transition probabilities and the log likelihood.}
  \item{\code{gamma_prob_bar}}{A matrix containing the group level parameter
  estimates of the transition probabilities over the iterations of the hybrid
  Metropolis within Gibbs sampler. The iterations of the sampler are
  contained in the rows, and the columns contain the group level parameter
  estimates. If covariates were included in the analysis, the group level
  probabilities represent the predicted probability given that the covariate
  is at the average value for continuous covariates, or given that the
  covariate equals zero for dichotomous covariates.}
  \item{\code{gamma_int_bar}}{A matrix containing the group level intercepts
  of the Multinomial logistic regression modeling the transition
  probabilities over the iterations of the hybrid Metropolis within Gibbs
  sampler. The iterations of the sampler are contained in the rows, and the
  columns contain the group level intercepts.}
  \item{\code{gamma_cov_bar}}{A matrix containing the group level regression
  coefficients of the Multinomial logistic regression predicting the
  transition probabilities over the iterations of the hybrid Metropolis within
  Gibbs sampler. The iterations of the sampler are contained in the rows, and
  the columns contain the group level regression coefficients.}
  \item{\code{gamma_int_subj}}{A list containing one matrix per subject
  denoting the subject level intercepts of the Multinomial logistic
  regression modeling the transition probabilities over the iterations of the
  hybrid Metropolis within Gibbs sampler. The iterations of the sampler are
  contained in the rows, and the columns contain the subject level
  intercepts.}
  \item{\code{gamma_naccept}}{A matrix containing the number of accepted
  draws at the subject level RW Metropolis step for each set of parameters of
  the transition probabilities. The subjects are contained in the rows, and
  the columns contain the sets of parameters.}
  \item{\code{emiss_prob_bar}}{A list containing one matrix per dependent
  variable, denoting the group level emission probabilities of each dependent
  variable over the iterations of the hybrid Metropolis within Gibbs sampler.
  The iterations of the sampler are contained in the rows of the matrix, and
  the columns contain the group level emission probabilities. If covariates
  were included in the analysis, the group level probabilities represent the
  predicted probability given that the covariate is at the average value for
  continuous covariates, or given that the covariate equals zero for
  dichotomous covariates.}
  \item{\code{emiss_int_bar}}{A list containing one matrix per dependent
  variable, denoting the group level intercepts of each dependent variable of
  the Multinomial logistic regression modeling the probabilities of the
  emission distribution over the iterations of the hybrid Metropolis within
  Gibbs sampler. The iterations of the sampler are contained in the rows of
  the matrix, and the columns contain the group level intercepts.}
  \item{\code{emiss_cov_bar}}{A list containing one matrix per dependent
  variable, denoting the group level regression coefficients of the
  Multinomial logistic regression predicting the emission probabilities within
  each of the dependent variables over the iterations of the hybrid
  Metropolis within Gibbs sampler. The iterations of the sampler are
  contained in the rows  of the matrix, and the columns contain the group
  level regression coefficients.}
  \item{\code{emiss_int_subj}}{A list containing one list per subject denoting
  the subject level intercepts of each dependent variable of the Multinomial
  logistic regression modeling the probabilities of the emission distribution
  over the iterations of the hybrid Metropolis within Gibbs sampler. Each
  lower level list contains one matrix per dependent variable, in which
  iterations of the sampler are contained in the rows, and the columns
  contain the subject level intercepts.}
  \item{\code{emiss_naccept}}{A list containing one matrix per dependent
  variable with the number of accepted draws at the subject level RW
  Metropolis step for each set of parameters of the emission distribution.
  The subjects are contained in the rows, and the columns of the matrix
  contain the sets of parameters.}
  \item{\code{input}}{Overview of used input specifications: the number of
  states \code{m}, the number of used dependent variables \code{n_dep}, the
  number of output categories for each of the dependent variables
  \code{q_emiss}, the number of iterations \code{J} and the specified burn in
  period \code{burn_in} of the hybrid Metropolis within Gibbs sampler, the
  number of subjects \code{n_subj}, the observation length for each subject
  \code{n_vary}, and the column names of the dependent variables
  \code{dep_labels}.}
  \item{\code{sample_path}}{A list containing one matrix per subject with the
  sampled hidden state sequence over the hybrid Metropolis within Gibbs
  sampler. The time points of the dataset are contained in the rows, and the
  sampled paths over the iterations are contained in the columns. Only
  returned if \code{return_path = TRUE}. }
}
}
\description{
\code{mHMM} fits a multilevel (also known as mixed or random effects) hidden
Markov model (HMM) to intense longitudinal data with categorical observations
of multiple subjects using Bayesian estimation, and creates an object of
class mHMM. By using a multilevel framework, we allow for heterogeneity in
the model parameters between subjects, while estimating one overall HMM. The
function includes the possibility to add covariates at level 2 (i.e., at the
subject level) and have varying observation lengths over subjects. For a
short description of the package see \link{mHMMbayes}. See
\code{vignette("tutorial-mhmm")} for an introduction to multilevel hidden
Markov models and the package, and see \code{vignette("estimation-mhmm")} for
an overview of the used estimation algorithms.
}
\details{
Covariates specified in \code{xx} can either be dichotomous or continuous
variables. Dichotomous variables have to be coded as 0/1 variables.
Categorical or factor variables can as yet not be used as predictor
covariates. The user can however break up the categorical variable in
multiple dummy variables (i.e., dichotomous variables), which can be used
simultaneously in the analysis. Continuous predictors are automatically
centered. That is, the mean value of the covariate is subtracted from all
values of the covariate such that the new mean equals zero. This is done such
that the presented probabilities in the output (i.e., for the population
transition probability matrix and population emission probabilities)
correspond to the predicted probabilities at the average value of the
covariate(s).
}
\examples{
###### Example on package example data, see ?nonverbal
\donttest{
# specifying general model properties:
m <- 2
n_dep <- 4
q_emiss <- c(3, 2, 3, 2)

# specifying starting values
start_TM <- diag(.8, m)
start_TM[lower.tri(start_TM) | upper.tri(start_TM)] <- .2
start_EM <- list(matrix(c(0.05, 0.90, 0.05,
                          0.90, 0.05, 0.05), byrow = TRUE,
                        nrow = m, ncol = q_emiss[1]), # vocalizing patient
                 matrix(c(0.1, 0.9,
                          0.1, 0.9), byrow = TRUE, nrow = m,
                        ncol = q_emiss[2]), # looking patient
                 matrix(c(0.90, 0.05, 0.05,
                          0.05, 0.90, 0.05), byrow = TRUE,
                        nrow = m, ncol = q_emiss[3]), # vocalizing therapist
                 matrix(c(0.1, 0.9,
                          0.1, 0.9), byrow = TRUE, nrow = m,
                        ncol = q_emiss[4])) # looking therapist

# Run a model without covariate(s):
# Note that for reasons of running time, J is set at a ridiculous low value.
# One would typically use a number of iterations J of at least 1000,
# and a burn_in of 200.
out_2st <- mHMM(s_data = nonverbal,
                gen = list(m = m, n_dep = n_dep, q_emiss = q_emiss),
                start_val = c(list(start_TM), start_EM),
                mcmc = list(J = 11, burn_in = 5))

out_2st
summary(out_2st)

# plot the posterior densities for the transition and emission probabilities
plot(out_2st, component = "gamma", col =c("darkslategray3", "goldenrod"))

# Run a model including a covariate (see ?nonverbal_cov) to predict the
# emission distribution for each of the 4 dependent variables:

n_subj <- 10
xx_emiss <- rep(list(matrix(c(rep(1, n_subj),nonverbal_cov$std_CDI_change),
                            ncol = 2, nrow = n_subj)), n_dep)
xx <- c(list(matrix(1, ncol = 1, nrow = n_subj)), xx_emiss)
out_2st_c <- mHMM(s_data = nonverbal, xx = xx,
                  gen = list(m = m, n_dep = n_dep, q_emiss = q_emiss),
                  start_val = c(list(start_TM), start_EM),
                  mcmc = list(J = 11, burn_in = 5))

}
###### Example on simulated data
# Simulate data for 10 subjects with each 100 observations:
n_t <- 100
n <- 10
m <- 2
n_dep <- 1
q_emiss <- 3
gamma <- matrix(c(0.8, 0.2,
                  0.3, 0.7), ncol = m, byrow = TRUE)
emiss_distr <- list(matrix(c(0.5, 0.5, 0.0,
                        0.1, 0.1, 0.8), nrow = m, ncol = q_emiss, byrow = TRUE))
data1 <- sim_mHMM(n_t = n_t, n = n, gen = list(m = m, n_dep = n_dep, q_emiss = q_emiss),
                  gamma = gamma, emiss_distr = emiss_distr, var_gamma = .5, var_emiss = .5)

# Specify remaining required analysis input (for the example, we use simulation
# input as starting values):
n_dep <- 1
q_emiss <- 3

# Run the model on the simulated data:
out_2st_sim <- mHMM(s_data = data1$obs,
                 gen = list(m = m, n_dep = n_dep, q_emiss = q_emiss),
                 start_val = c(list(gamma), emiss_distr),
                 mcmc = list(J = 11, burn_in = 5))


}
\references{
\insertRef{rabiner1989}{mHMMbayes}

\insertRef{scott2002}{mHMMbayes}

\insertRef{altman2007}{mHMMbayes}

\insertRef{rossi2012}{mHMMbayes}

\insertRef{zucchini2017}{mHMMbayes}
}
\seealso{
\code{\link{sim_mHMM}} for simulating multilevel hidden Markov data,
  \code{\link{vit_mHMM}} for obtaining the most likely hidden state sequence
  for each subject using the Viterbi algorithm, \code{\link{obtain_gamma}}
  and \code{\link{obtain_emiss}} for obtaining the transition or emission
  distribution probabilities of a fitted model at the group or subject level,
  and \code{\link{plot.mHMM}} for plotting the posterior densities of a
  fitted model.
}
