\encoding{utf8}
\name{tost.t}
\alias{Mean-equivalence t tests}
\alias{tost.t}
\concept{Mean-equivalence t tests}
\concept{Two One-Sided Tests}
\title{Mean-equivalence \emph{t} tests}
\description{
 \loadmathjax
 Performs two one-sided \emph{t} tests for mean equivalence}
\usage{
tost.t(
  x, 
  y           = NULL, 
  mu          = NA, 
  by          = NULL, 
  eqv.type    = equivalence.types, 
  eqv.level   = 1, 
  upper       = NA,
  paired      = FALSE, 
  var.equal   = FALSE, 
  welch       = FALSE, 
  conf.level  = 0.95, 
  x.name      = "", 
  y.name      = "", 
  by.name     = "", 
  by.values   = NULL, 
  relevance   = TRUE)

equivalence.types
#c("delta", "epsilon")
}
\arguments{
 \item{x}{a (non-empty) numeric vector of data values.}
 \item{y}{an optional (non-empty) numeric vector of data values. Implies \code{by=NULL}.}
 \item{mu}{a number indicating the true value of the mean for a one-sample test. Implies \code{paired=FALSE}, and \code{y=NULL}.}
 \item{by}{an optional (non-empty) vector of group indicator values. Implies \code{y=NA}.}
 \item{eqv.type}{defines whether the equivalence interval will be defined in terms of \mjeqn{\Delta}{Delta} or \mjeqn{\varepsilon}{epsilon} (\code{"delta"}, or \code{"epsilon"}). These options change the way that \code{evq.level} is interpreted: when \code{"delta"} is specified, the \code{evq.level} is measured in the units of the variable(s) being tested, and when \code{"epsilon"} is specified, the \code{evq.level} is measured in units of the \emph{t} distribution; put another way \mjeqn{\varepsilon = \frac{\Delta}{\text{standard error}}}{epsilon = Delta/standard error}. The default is \code{"delta"}.\cr \cr Defining tolerance in terms of \mjeqn{\varepsilon}{epsilon} means that it is not possible to reject any test for mean equivalence's \mjeqn{\text{H}_{0}^{-}}{Ho} if \mjeqn{\varepsilon \le t_{\nu,\alpha}}{epsilon <= the critical value of \emph{t} for a given alpha and degrees of freedom}. Because \mjeqn{\varepsilon = \frac{\Delta}{\text{standard error}}}{epsilon = Delta/standard error}, we can see that it is not possible to reject any \mjeqn{\text{H}_{0}^{-}}{Ho} if \mjeqn{\Delta \le \text{standard error} \times t_{\nu,\alpha}}{Delta <= the product of the standard error and critical value of \emph{t} for a given alpha and degrees of freedom}. \code{tost.t} reports when either of these conditions obtain.}
 \item{eqv.level}{defines the equivalence threshold for the tests depending on whether \code{eqv.type} is \code{"delta"} or \code{"epsilon"} (see above). Researchers are responsible for choosing meaningful values of \mjeqn{\Delta}{Delta} or \mjeqn{\varepsilon}{epsilon}. The default value is 1, which should not automatically be assumed to be a meaningful value for any given research question.}
 \item{upper}{defines the upper equivalence threshold for the test, is assumed to be positive, and transforms the meaning of \code{eqv.level} to mean the \emph{lower} equivalence threshold for the test. Also, \code{eqv.level} is assumed to be a negative value. Taken together, these correspond to Schuirmann's (1987) asymmetric equivalence intervals. If \code{upper==abs(eqv.level)}, then \code{upper} will be ignored.}
 \item{paired}{a logical variable indicating whether you want a paired \emph{t} test. Requires \code{y} to be supplied.}
 \item{var.equal}{a logical variable indicating whether to treat the two samples as being drawn from populations with equal variances. If \code{var.equal=TRUE} the pooled variance is used with degrees of freedom \mjeqn{\nu=n_{x} + n_{y} - 2}{equal to the sum of both groups' sample sizes minus two}, otherwise Satterthwaite' approximation to the degrees of freedom is used (unless \code{welch=TRUE} is specified).}
 \item{welch}{a logical variable indicating \code{tost.t} should use Welch's (1947) approximation for the degrees of freedom will be used in an unpaired \emph{t} test assuming unequal variances. Specifying \code{welch=TRUE} requires that \code{var.equal==FALSE}.}
 \item{conf.level}{confidence level of the interval, and complement of the test's nominal type I error rate \mjeqn{\alpha}{alpha}.}
 \item{x.name}{specifies how the first variable will be labeled in the output. The default value of \code{x.name} is \code{names(x)}, but if that is not present will use the variable name of \code{x}.}
 \item{y.name}{specifies how the second variable will be labeled in the output when \code{by=NULL}. The default value of \code{y.name} is \code{names(y)}, but if that is not present will use the variable name of \code{y}. If \code{by!=NULL}, then information in \code{names(x)}, \code{x}, \code{names(by)}, \code{by}, \code{x.name}, \code{y.name}, and \code{by.values} will be used to label the two groups depending on what information is present in these objects.}
 \item{by.name}{an optional string to customize the grouping variable name in the output. If \code{by.name=""}, \code{names(by)} or the name of the \code{by} variable will be used instead.}
 \item{by.values}{an optional two-element character vector of group names. If none are supplied, the names of the values of \code{names(by)} will be used if present, otherwise the raw values of the \code{by} variable will be used.}
 \item{relevance}{reports results and inference for combined tests for difference and for equivalence for a specific \code{conf.level}, \code{eqv.type}, \code{eqv.level}, and, if used, \code{upper}. See the Remarks section more details on inference from combined tests.}
}
\details{\code{tost.t} tests for the equivalence of means within a symmetric equivalence interval defined by \code{eqv.type} and \code{eqv.level} using a two one-sided \emph{t} tests (TOST) approach (Schuirmann, 1987). Typically "positivist" null hypotheses are framed from an assumption of a lack of difference between two quantities, and reject this assumption only with sufficient evidence. When performing tests for equivalence, one frames a null hypothesis with the assumption that two quantities are different within an equivalence interval defined by some chosen level of tolerance.
 
With respect to an unpaired \emph{t} test, an equivalence null hypothesis takes one of the following two forms depending on whether equivalence is defined in terms of \mjeqn{\Delta}{Delta} (equivalence expressed in the same units as the \code{x} and \code{y} variables) or in terms of \mjeqn{\epsilon}{epsilon} (equivalence expressed in the units of the \emph{t} distribution with the given degrees of freedom):

\emph{}\mjeqn{\phantom{22}\text{H}_{0}^{-}\text{: }|\mu_{x} - \mu_y| \ge \Delta}{&nbsp;&nbsp;Ho: |mu_x -- mu_y| >= Delta},\cr
\emph{}\mjeqn{\phantom{22}}{ }where the equivalence interval ranges from \mjeqn{\left(\mu_x - \mu_y\right) - \Delta}{(mu_x -- mu_y)-Delta} to \mjeqn{\left(\mu_x - \mu_y\right) + \Delta}{(mu_x -- mu_y)+Delta.} This translates directly into two one-sided null hypotheses:

\emph{}\mjeqn{\phantom{2222}\text{ H}_{01}^{-}\text{: }\mu_{x} - \mu_y \ge \Delta}{&nbsp;&nbsp;&nbsp;&nbsp;Ho1: mu_x -- mu_y >= Delta}, or\cr
\emph{}\mjeqn{\phantom{2222}\text{ H}_{02}^{-}\text{: }\mu_{x} - \mu_y \le -\Delta}{&nbsp;&nbsp;&nbsp;&nbsp;Ho1: mu_x -- mu_y <= Delta}.

--OR--

\emph{}\mjeqn{\phantom{22}\text{H}_{0}^{-}\text{: }|T| \ge \varepsilon ,}{&nbsp;&nbsp;Ho: |T| >= epsilon,}\cr
\emph{}\mjeqn{\phantom{22}}{ }where the equivalence interval ranges from \mjeqn{-\varepsilon}{--epsilon} to \mjeqn{\varepsilon}{epsilon}. This also translates directly into two one-sided null hypotheses:

\emph{}\mjeqn{\phantom{2222}\text{H}_{01}^{-}\text{: }T \ge \varepsilon}{&nbsp;&nbsp;&nbsp;&nbsp;Ho1: T >= epsilon}; or\cr
\emph{}\mjeqn{\phantom{2222}\text{H}_{02}^{-}\text{: }T \le -\varepsilon}{&nbsp;&nbsp;&nbsp;&nbsp;Ho2: T <= --epsilon}.

When an asymmetric equivalence interval is defined using the \code{upper} option the general negativist null hypothesis becomes:

\emph{}\mjeqn{\phantom{22}\text{H}_{0}^{-}\text{: }\mu_{x} - \mu_y \le \Delta_{\text{lower}}}{&nbsp;&nbsp;Ho: mu_x -- mu_y <= Delta_lower}, or \mjeqn{\mu_{x} - \mu_y \ge \Delta_{\text{upper}}}{Ho: mu_x -- mu_y >= Delta_upper}\cr
\emph{}\mjeqn{\phantom{22}}{ }where the equivalence interval ranges from \mjeqn{\left(\mu_x - \mu_y\right) + \Delta_{\text{lower}}}{(mu_x -- mu_y) + Delta_lower} to \mjeqn{\left(\mu_x - \mu_y\right) + \Delta_{\text{upper}}}{(mu_x -- mu_y) + Delta_upper}. This also translates directly into two one-sided null hypotheses:

\emph{}\mjeqn{\phantom{2222}\text{H}_{01}^{-}\text{: }\mu_x - \mu_y \ge \Delta_{\text{upper}}}{&nbsp;&nbsp;&nbsp;&nbsp;Ho1: mu_x -- mu_y >= Delta_upper}; or\cr
\emph{}\mjeqn{\phantom{2222}\text{H}_{02}^{-}\text{: }\mu_x - \mu_y \le \Delta_{\text{lower}}}{&nbsp;&nbsp;&nbsp;&nbsp;Ho2: mu_x -- mu_y <= Delta_lower}.

--OR--

\emph{}\mjeqn{\phantom{22}\text{H}_{0}^{-}\text{: }T \le \varepsilon_{\text{lower}}}{&nbsp;&nbsp;Ho: T <= epsilon_lower}, or \mjeqn{T \ge \varepsilon_{\text{upper}}}{T >= epsilon_upper}, with:

\emph{}\mjeqn{\phantom{2222}\text{H}_{01}^{-}\text{: }T \ge \varepsilon_{\text{upper}}}{&nbsp;&nbsp;&nbsp;&nbsp;Ho1: T >= epsilon_upper}; or\cr
\emph{}\mjeqn{\phantom{2222}\text{H}_{02}^{-}\text{: }T \le \varepsilon_{\text{lower}}}{&nbsp;&nbsp;&nbsp;&nbsp;Ho2: T <= epsilon_lower}.\cr
 
NOTE: the appropriate level of \mjeqn{\alpha = (1 - }{alpha = (1 -- }\code{conf.level}\mjeqn{)}{)} is precisely the same as in the corresponding two-sided test for mean difference, so that, for example, if one wishes to make a type I error \%1 of the time, one simply conducts both of the one-sided tests of \mjeqn{\text{H}_{01}^{-}}{Ho1} and \mjeqn{\text{H}_{02}^{-}}{Ho2} by comparing the resulting p-value to 0.01 (Tryon and Lewis, 2008).

\subsection{Remarks}{As described by Tryon and Lewis (2008), when rejection decisions from both tests for difference (e.g., \mjeqn{\text{H}_{0}^{+}\text{: }\mu_{x}- \mu_{y} = 0}{positivist Ho: mu_x -- mu_y = 0} or ) and tests for equivalence (e.g., either \mjeqn{\text{H}_{0}^{-}\text{: }|\mu_{x}- \mu_{y}| \ge \Delta}{negativist Ho: |mu_x -- mu_y| >= Delta}, or \mjeqn{\text{H}_{0}^{-}\text{: }|T| \ge \varepsilon}{negativist Ho: |T| >= epsilon}) are combined, there are four possible interpretations for a given \mjeqn{\alpha}{alpha} and \mjeqn{\Delta}{Delta} or \mjeqn{\varepsilon}{epsilon}:

\enumerate{
\item One may reject \mjeqn{\text{H}_{0}^{+}}{the positivist Ho}, but fail to reject \mjeqn{\text{H}_{0}^{-}}{the negativist Ho}, and conclude that there is a \bold{relevant difference} in means at least as large as \mjeqn{\Delta}{Delta} or \mjeqn{\varepsilon}{epsilon}.

\item One may fail to reject \mjeqn{\text{H}_{0}^{+}}{the positivist Ho}, but reject \mjeqn{\text{H}_{0}^{-}}{the negativist Ho}, and conclude that there is \bold{equivalence} in means within the equivalence range (i.e. defined by \mjeqn{\Delta}{Delta} or \mjeqn{\varepsilon}{epsilon}).

\item One may reject both \mjeqn{\text{H}_{0}^{+}}{the positivist Ho} and \mjeqn{\text{H}_{0}^{-}}{the negativist Ho}, and conclude that there is a \bold{trivial difference} in means which lies within the equivalence range (i.e. defined by \mjeqn{\Delta}{Delta} or \mjeqn{\varepsilon}{epsilon}).

\item One may fail to reject both \mjeqn{\text{H}_{0}^{+}}{the positivist Ho} and \mjeqn{\text{H}_{0}^{-}}{the negativist Ho}, and draw an \bold{indeterminate} conclusion, because the data are underpowered to detect either difference or equivalence.
}
}
}
\value{
\code{tost.t} returns:
 \item{statistics}{a vector of the \emph{t} statistics for the two one-sided tests; if \code{relevance=TRUE}, these are followed by the value of the \emph{t} statistic for the postivist test for difference.}
 \item{p.values}{a vector of \emph{p} values for the \emph{t} tests.}
 \item{estimate}{a scalar or vector of the estimated mean or means, mean difference, or difference in means depending on whether it was a one-sample test, paired test, or a two-sample test.}
 \item{null.value}{the specified hypothesized value of the mean in a one-sample test, or 0 for a paired test or two-sample test.}
 \item{sterr}{the standard error used in the denominator of the \emph{t} statistic.}
 \item{sd}{a vector containing the sample standard deviations of the two variables or two groups in paired and unpaired tests; not returned for one-sample tests.}
 \item{sample_size}{a scalar (one-sample test) or vector (two-sample tests) containing the number of observations in the variable(s).}
 \item{parameter}{the degrees of freedom for the \emph{t} statistics.}
 \item{threshold}{the value of the equivalence/relevance threshold: if \code{upper==NA} then returns the \code{eqv.level} argument. If \code{upper!=NA}, then returns a vector of (\code{eqv.level},\code{upper})}
 \item{conclusion}{a string containing the relevance test conclusion when \code{relevance=TRUE}.}
 }
\author{
Alexis Dinno (\email{alexis.dinno@pdx.edu})

Please contact me with any questions, bug reports or suggestions for improvement. Fixing bugs will be facilitated by sending along:
\enumerate{
\item a copy of the data (de-labeled or anonymized is fine),\cr
\item a copy of the command syntax used, and\cr
\item a copy of the exact output of the command.\cr
}
I am endebted to my winter 2013 and fall 2023 students for their inspiration. Much appreciation to Mick McVeety for troubleshooting the translation of my Stata \bold{tost} package to R.
\subsection{Suggested citation}{Dinno, A. 2025. \bold{tost.t}: Mean-equivalence \emph{t} tests. In: \bold{tost.suite} R software package.
}
}
\references{

Satterthwaite, F. E. (1946) \href{https://www.jstor.org/stable/3002019}{An approximate distribution of estimates of variance components}. \emph{Biometrics Bulletin}. \bold{2}, 110--114.

Schuirmann, D. A. (1987) \href{https://pubmed.ncbi.nlm.nih.gov/3450848/}{A comparison of the two one-sided tests procedure and the power approach for assessing the equivalence of average bioavailability}. \emph{Journal of Pharmacokinetics and Biopharmaceutics}. \bold{15}, 657--680.

Tryon, W. W., and Lewis, C. (2008) \href{https://pubmed.ncbi.nlm.nih.gov/18778155/}{An inferential confidence interval method of establishing statistical equivalence that corrects Tryon's (2001) reduction factor}. \emph{Psychological Methods}. \bold{13}, 272--277

Welch, B. L. (1947) \href{https://www.jstor.org/stable/2332510}{The generalization of "Student's" problem when several different population variances are involved}. \emph{Biometrika}. \bold{34}, 28--35.
}
\seealso{
 \code{\link{t.test}}, \code{\link{tost.ti}}.
}
\examples{
require("webuse")

# Setup
webuse("auto")

# One-sample mean equivalence t test with asymmetric equivalence interval
tost.t(
  x=auto$mpg, 
  mu=20, 
  eqv.type="delta", 
  eqv.level=2.5, 
  upper=3, 
  relevance=FALSE)

# Setup
webuse("fuel")

# Two-sample paired relevance t test of means; equivalence interval is
#   +/- 1.5 sd beyond the critical value of T with df = 11 for alpha = 0.05
tost.t(
  x=fuel$mpg1, 
  y=fuel$mpg2, 
  paired=TRUE, 
  eqv.type="epsilon", 
  eqv.level=qt(p=.95,df=11)+1.5*sqrt(11/9), 
  conf.level=0.95,
  relevance=TRUE)

# Setup
webuse("fuel3")

# Two-group unpaired mean equivalence t test assuming equal variances
#   Notice warning about value of Delta!
tost.t(
  x=fuel3$mpg, 
  by=fuel3$treated, 
  eqv.type="delta", 
  eqv.level=1.5, 
  var.equal=TRUE,
  relevance=FALSE)

# Same example but customizing output labels
tost.t(
  x=fuel3$mpg, 
  by=fuel3$treated, 
  eqv.type="delta", 
  eqv.level=1.5, 
  var.equal=TRUE,
  by.name="Fuel",
  by.values=c("Treated", "Untreated"),
  relevance=FALSE)
}
\keyword{htest}
\keyword{stats}
