% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lmmSpline-method.R
\docType{methods}
\name{lmmSpline}
\alias{lmmSpline}
\title{Data-driven linear mixed effect model spline modelling}
\usage{
lmmSpline(data, time, sampleID, timePredict, deri, basis, knots, keepModels,numCores)
}
\arguments{
\item{data}{\code{data.frame} or \code{matrix} containing the samples as rows and features as columns}

\item{time}{\code{numeric} vector containing the sample time point information.}

\item{sampleID}{\code{character}, \code{numeric} or \code{factor} vector containing information about the unique identity of each sample}

\item{timePredict}{\code{numeric} vector containing the time points to be predicted.  By default set to the original time points observed in the experiment.}

\item{deri}{\code{logical} value. If \code{TRUE} returns the predicted derivative information on the observed time points.By default set to \code{FALSE}.}

\item{basis}{\code{character} string. What type of basis to use, matching one of \code{"cubic"}, \code{"p-spline"} or \code{"cubic p-spline"}. The \code{"cubic"} basis (\code{default}) is the cubic smoothing spline as defined by Verbyla \emph{et al.} 1999, the \code{"p-spline"} is the truncated p-spline basis as defined by Durban \emph{et al.} 2005.}

\item{knots}{Alternatively an \code{integer}, the number of knots used for the \code{"p-spline"} or \code{"cubic p-spline"} basis calculation. Otherwise calculated as proposed by Ruppert 2002. Not used for the "cubic" smoothing spline basis as it used the inner design points.}

\item{keepModels}{alternative \code{logical} value if you want to keep the model output. Default value is FALSE}

\item{numCores}{Alternative \code{numeric} value indicating the number of CPU cores to be used. Default value is automatically estimated.}
}
\description{
Function that models a linear or limear mixed model depending on the best fit. Alternatively, the function can return THE derivation information of the fitted models
for the fixed (original) times points and a chosen \code{basis}.
}
\details{
The first model (\code{modelsUsed}=0) assumes the response is a straight line not affected by individual variation. 

Let \eqn{y_{ij}(t_{ij})} be the expression of a feature for individual (or biological replicate) \eqn{i} at time \eqn{t_{ij}}, where \eqn{i=1,2,...,n}, \eqn{j=1,2,...,m_i}, \eqn{n} is the sample size and \eqn{m_i} is the number of observations for individual \eqn{i} for the given feature. 
We fit a simple linear regression of expression \eqn{y_{ij}(t_{ij})} on time \eqn{t_{ij}}. 
The intercept \eqn{\beta_0} and slope \eqn{\beta_1} are estimated via ordinary least squares:
\eqn{y_{ij}(t_{ij})= \beta_0 + \beta_1 t_{ij} + \epsilon_{ij}}, where \eqn{\epsilon_{ij} ~ N(0,\sigma^2_{\epsilon}).}
The second model (\code{modelsUsed}=1) is nonlinear where the straight line in regression replaced with a curve modelled using here for example a spline truncated line basis (\code{basis}="p-spline") as proposed Durban \emph{et al.} 2005:

\deqn{y_{ij}(t_{ij})= f(t_{ij}) +\epsilon_{ij},} 

where \eqn{\epsilon_{ij}~ N(0,\sigma_{\epsilon}^2).}

The penalized spline is represented by \eqn{f}, which depends on a set of knot positions \eqn{\kappa_1,...,\kappa_K} in the range of \eqn{{t_{ij}}}, some unknown coefficients \eqn{u_k}, an intercept \eqn{\beta_0} and a slope \eqn{\beta_1}. The first term in the above equation can therefore be expanded as:
\deqn{f(t_{ij})= \beta_0+ \beta_1t_{ij}+\sum\limits_{k=1}^{K}u_k(t_{ij}-\kappa_k)_+,}
with \eqn{(t_{ij}-\kappa_k)_+=t_{ij}-\kappa_k}, if \eqn{t_{ij}-\kappa_k  > 0, 0} otherwise.

The choice of the number of knots \eqn{K} and their positions influences the flexibility of the curve. 
If the argument \code{knots}=missing, we use a method proposed by Ruppert 2002 to estimate the number of knots given the measured number of time points \eqn{T}, so that the knots \eqn{\kappa_1 \ldots \kappa_K} are placed at quantiles of the time interval of interest: 

\deqn{K= max(5,min(floor(\frac{T}{4}) , 40)).}

In order to account for individual variation, our third model (\code{modelsUsed}=2) adds a subject-specific random effect \eqn{U_i} to the mean response \eqn{f(t_{ij})}. 
Assuming \eqn{f(t_{ij})} to be a fixed (yet unknown) population curve, \eqn{U_i} is treated as a random realization of an underlying Gaussian process with zero-mean and variance \eqn{\sigma_U^2} and is independent from the random error \eqn{\epsilon_{ij}}:

\deqn{y_{ij}(t_{ij}) = f(t_{ij}) + U_i + \epsilon_{ij}}

with \eqn{U_{i} ~ N(0,\sigma_U^2)} and \eqn{\epsilon_{ij} ~ N(0,\sigma_{\epsilon}^2)}.
In the equation above, the individual curves are expected to be parallel to the mean curve as we assume the individual expression curves to be constant over time.
A simple extension to this model is to assume individual deviations are straight lines. The fourth model (\code{modelsUsed}=3) therefore fits individual-specific random intercepts \eqn{a_{i0}} and slopes \eqn{a_{i1}}:

 \deqn{y_{ij}(t_{ij}) = f(t_{ij}) + a_{i0} + a_{i1}t_{ij} + \epsilon_{ij}}
 
with \eqn{\epsilon_{ij} ~ N(0,\sigma_\epsilon^2)} and \eqn{(a_{i0},a_{i1})^T} ~ \eqn{ N(0,\Sigma).}
We assume independence between the random intercept and slope.
 @return lmmSpline returns an object of class \code{lmmspline} containing the following components:
 \itemize{
\item{predSpline}{\code{data.frame} containing predicted values based on linear model object or linear mixed effect model object.}
\item{modelsUsed}{\code{numeric} vector indicating the model used to fit the data. 0 = linear model, 1=linear mixed effect model spline (LMMS) with defined basis ('cubic' by default) 2 = LMMS taking subject-specific random intercept, 3 = LMMS with subject specific intercept and slope.}
\item{model}{\code{list} of models used to model time profiles.}
\item{derivative}{\code{logical} value indicating if the predicted values are the derivative information.}
 }
}
\examples{
\dontrun{
data(kidneySimTimeGroup)
# running for samples in group 1
G1 <- which(kidneySimTimeGroup$group=="G1")
testLMMSpline<- lmmSpline(data=kidneySimTimeGroup$data[G1,],time=kidneySimTimeGroup$time[G1],
                 sampleID=kidneySimTimeGroup$sampleID[G1])
summary(testLMMSpline)
DerivTestLMMSplineTG<- lmmSpline(data=as.data.frame(kidneySimTimeGroup$data[G1,]),
                       time=kidneySimTimeGroup$time[G1],sampleID=kidneySimTimeGroup$sampleID[G1],
                       deri=TRUE,basis="p-spline")
summary(DerivTestLMMSplineTG)}
}
\references{
Durban, M., Harezlak, J., Wand, M. P., & Carroll, R. J. (2005). \emph{Simple fitting of subject-specific curves for longitudinal data.} Stat. Med., 24(8), 1153-67.

Ruppert, D. (2002). \emph{Selecting the number of knots for penalized splines.} J. Comp. Graph. Stat. 11, 735-757

Verbyla, A. P., Cullis, B. R., & Kenward, M. G. (1999). \emph{The analysis of designed experiments and longitudinal data by using smoothing splines.} Appl.Statist, 18(3), 269-311.

Straube J., Gorse A.-D., Huang B.E., Le Cao K.-A. (2015).  \emph{A linear mixed model spline framework for analyzing time course 'omics' data} PLOSONE, 10(8), e0134540.
}
\seealso{
\code{\link{summary.lmmspline}}, \code{\link{plot.lmmspline}}, \code{\link{predict.lmmspline}}, \code{\link{deriv.lmmspline}}
}

