% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tcgsa_seq.R
\name{tcgsa_seq}
\alias{tcgsa_seq}
\title{Time-course Gene Set Analysis}
\usage{
tcgsa_seq(y, x, phi, weights_phi_condi = TRUE, genesets, indiv = NULL,
  Sigma_xi = diag(ncol(phi)), which_test = c("permutation",
  "asymptotic"), which_weights = c("loclin", "voom", "none"),
  n_perm = 1000, preprocessed = FALSE, doPlot = TRUE,
  gene_based_weights = TRUE, bw = "nrd", kernel = c("gaussian",
  "epanechnikov", "rectangular", "triangular", "biweight", "tricube",
  "cosine", "optcosine"), exact = FALSE, transform = FALSE,
  padjust_methods = c("BH", "BY", "holm", "hochberg", "hommel",
  "bonferroni"), lowess_span = 0.5, homogen_traj = FALSE,
  na.rm_tcgsaseq = TRUE, verbose = TRUE)
}
\arguments{
\item{y}{a numeric matrix of size \code{G x n} containing the raw RNA-seq counts or
preprocessed expressions from \code{n} samples for \code{G} genes.}

\item{x}{a numeric matrix of size \code{n x p} containing the model covariates from
\code{n} samples (design matrix). Usually, its first column is the intercept (full of
\code{1}s).}

\item{phi}{a numeric design matrix of size \code{n x K} containing the \code{K} variables
to be tested}

\item{weights_phi_condi}{a logical flag indicating whether heteroscedasticity
weights computation should be conditional on both the variable(s) to be tested
\code{phi} and on covariate(s) \code{x}, or on \code{x} alone. #'Default is \code{TRUE}
in which case conditional means are estimated conditionally on both \code{x} and \code{phi}.}

\item{genesets}{either a vector of index or subscripts that defines which columns of \code{y}
constitute the investigated gene set (when only 1 gene set is being tested).
Can also be a \code{list} of index (or \code{rownames} of \code{y}) when several
gene sets are tested at once, such as the first element of a
\code{\link[GSA:GSA.read.gmt]{gmt}} object. If \code{NULL}, then gene-wise p-values are returned.}

\item{indiv}{a vector of length \code{n} containing the information for
attributing each sample to one of the studied individuals. Coerced
to be a \code{factor}. Default is \code{NULL} in which case each sample is considered
as coming from independent subjects.}

\item{Sigma_xi}{a matrix of size \code{K x K} containing the covariance matrix
of the \code{K} random effects. Only used if \code{homogen_traj} is \code{FALSE}.
Default assume diagonal correlation matrix, i.e. independence of random effects.}

\item{which_test}{a character string indicating which method to use to approximate
the variance component score test, either \code{"permutation"} or \code{"asymptotic"}.
Default is \code{"permutation"}.}

\item{which_weights}{a character string indicating which method to use to estimate
the mean-variance relationship weights. Possibilities are \code{"loclin"},
\code{"voom"} or \code{"none"} (in which case no weighting is performed).
Default is \code{"loclin"}.
See \code{\link{sp_weights}} and \code{\link{voom_weights}} for details.}

\item{n_perm}{the number of perturbations. Default is \code{1000}.}

\item{preprocessed}{a logical flag indicating whether the expression data have
already been preprocessed (e.g. log2 transformed). Default is \code{FALSE}, in
which case \code{y} is assumed to contain raw counts and is normalized into
log(counts) per million.}

\item{doPlot}{a logical flag indicating whether the mean-variance plot should be drawn.
Default is \code{FALSE}.}

\item{gene_based_weights}{a logical flag used for \code{"loclin"} weights, indicating whether to estimate
weights at the gene-level, or rather at the observation-level. Default is \code{TRUE},
and weights are then estimated at the gene-level.}

\item{bw}{a character string indicating the smoothing bandwidth selection method to use. See
\code{\link[stats]{bandwidth}} for details. Possible values are \code{"ucv"}, \code{"SJ"},
\code{"bcv"}, \code{"nrd"} or \code{"nrd0"}}

\item{kernel}{a character string indicating which kernel should be used.
Possibilities are \code{"gaussian"}, \code{"epanechnikov"}, \code{"rectangular"},
\code{"triangular"}, \code{"biweight"}, \code{"tricube"}, \code{"cosine"},
\code{"optcosine"}. Default is \code{"gaussian"} (NB: \code{"tricube"} kernel
corresponds to the loess method).}

\item{exact}{a logical flag indicating whether the non-parametric weights accounting
for the mean-variance relationship should be computed exactly or extrapolated
from the interpolation of local regression of the mean against the
variance. Default is \code{FALSE}, which uses interpolation (faster computation).}

\item{transform}{a logical flag used for \code{"loclin"} weights, indicating whether values should be
transformed to uniform for the purpose of local linear smoothing. This may be helpful if tail
observations are sparse and the specified bandwidth gives suboptimal performance there.
Default is \code{FALSE}.}

\item{padjust_methods}{multiple testing correction method used if \code{genesets}
is a list. Default is "BH", i.e. Benjamini-Hochberg procedure for controlling the FDR.
Other possibilities are: \code{"holm"}, \code{"hochberg"}, \code{"hommel"},
\code{"bonferroni"} or \code{"BY"} (for Benjamini-Yekutieli procedure).}

\item{lowess_span}{smoother span for the lowess function, between 0 and 1. This gives
the proportion of points in the plot which influence the smooth at each value.
Larger values give more smoothness. Only used if \code{which_weights} is \code{"voom"}.
Default is \code{0.5}.}

\item{homogen_traj}{a logical flag indicating whether trajectories should be considered homogeneous.
Default is \code{FALSE} in which case trajectories are not only tested for trend, but also for heterogeneity.}

\item{na.rm_tcgsaseq}{logical: should missing values in \code{y} (including
\code{NA} and \code{NaN}) be omitted from the calculations?
Default is \code{TRUE}.}

\item{verbose}{logical: should informative messages be printed during the
computation? Default is \code{TRUE}.}
}
\value{
A list with the following elements:\itemize{
  \item \code{which_test}: a character string carrying forward the value of the '\code{which_test}' argument
   indicating which test was perform (either "asymptotic" or "permutation").
  \item \code{preprocessed}: a logical flag carrying forward the value of the '\code{preprocessed}' argument
  indicating whether the expression data were already preprocessed, or were provided as raw counts and
  transformed into log-counts per million.
  \item \code{n_perm}: an integer carrying forward the value of the '\code{n_perm}' argument indicating
  the number of perturbations performed (\code{NA} if asymptotic test was performed).
  \item \code{genesets}: carrying forward the value of the '\code{genesets}' argument defining the gene sets
  of interest (\code{NULL} for gene-wise testing).
  \item \code{pval}: computed p-values. A \code{data.frame} with one raw for each each gene set, or
  for each gene if \code{genesets} argument is \code{NULL}, and with 2 columns: the first one '\code{rawPval}'
  contains the raw p-values, the second one contains the FDR adjusted p-values and is either named
  '\code{adjPval}' (according to the '\code{padjust_methods}' argument) in the \code{asymptotic} case
  or '\code{FDR}' in the \code{permutation} case.
}
}
\description{
Wrapper function for performing gene set analysis of (potentially longitudinal) RNA-seq data
}
\examples{
#rm(list=ls())
nsims <- 2 #100
res_quant <- list()
for(i in 1:2){
n <- 2000#0
nr <- 3
r <- nr*20#4*nr#100*nr
t <- matrix(rep(1:nr), r/nr, ncol=1, nrow=r)
sigma <- 0.4
b0 <- 1

#under the null:
b1 <- 0

y.tilde <- b0 + b1*t + rnorm(r, sd = sigma)
y <- t(matrix(rnorm(n*r, sd = sqrt(sigma*abs(y.tilde))), ncol=n, nrow=r) +
      matrix(rep(y.tilde, n), ncol=n, nrow=r))
x <- matrix(1, ncol=1, nrow=r)

#run test
res <- tcgsa_seq(y, x, phi=t, genesets=lapply(0:9, function(x){x*10+(1:10)}),
                        Sigma_xi=matrix(1), indiv=rep(1:(r/nr), each=nr), which_test="asymptotic",
                        which_weights="none", preprocessed=TRUE)
res_genes <- tcgsa_seq(y, x, phi=cbind(t),#, rnorm(r)), #t^2
                      genesets=NULL,
                      Sigma_xi=diag(1), indiv=rep(1:(r/nr), each=nr), which_test="asymptotic",
                      which_weights="none", preprocessed=TRUE)
length(res_genes$pvals[, "rawPval"])
quantile(res_genes$pvals[, "rawPval"])
res_quant[[i]] <- res_genes$pvals[, "rawPval"]
}
#round(rowMeans(sapply(res_quant, quantile)), 3)
#plot(density(unlist(res_quant)))
#mean(unlist(res_quant)<0.05)

\dontrun{
res_genes <- tcgsa_seq(y, x, phi=t, genesets=NULL,
                      Sigma_xi=matrix(1), indiv=rep(1:(r/nr), each=nr), which_test="permutation",
                      which_weights="none", preprocessed=TRUE, n_perm=1000)

mean(res_genes$pvals$rawPval < 0.05)
summary(res_genes$pvals$FDR)
}
}
\references{
Agniel D & Hejblum BP (2017). Variance component score test for
time-course gene set analysis of longitudinal RNA-seq data, \emph{Biostatistics},
18(4):589-604. \href{https://doi.org/10.1093/biostatistics/kxx005}{10.1093/biostatistics/kxx005}.
\href{https://arxiv.org/abs/1605.02351}{arXiv:1605.02351}.

Law, C. W., Chen, Y., Shi, W., & Smyth, G. K. (2014). voom: Precision
weights unlock linear model analysis tools for RNA-seq read counts. \emph{Genome
Biology}, 15(2), R29.
}
\seealso{
\code{\link{sp_weights}} \code{\link{vc_test_perm}} \code{\link{vc_test_asym}} \code{\link{p.adjust}}
}
