% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare_variables.R
\name{compare_variables}
\alias{compare_variables}
\alias{compare_vars}
\alias{s_compare}
\alias{s_compare.numeric}
\alias{s_compare.factor}
\alias{s_compare.character}
\alias{s_compare.logical}
\title{Compare variables between groups}
\usage{
compare_vars(
  lyt,
  vars,
  var_labels = vars,
  na_str = default_na_str(),
  nested = TRUE,
  ...,
  na_rm = TRUE,
  show_labels = "default",
  table_names = vars,
  section_div = NA_character_,
  .stats = c("n", "mean_sd", "count_fraction", "pval"),
  .stat_names = NULL,
  .formats = NULL,
  .labels = NULL,
  .indent_mods = NULL
)

s_compare(x, ...)

\method{s_compare}{numeric}(x, ...)

\method{s_compare}{factor}(x, ...)

\method{s_compare}{character}(x, ...)

\method{s_compare}{logical}(x, ...)
}
\arguments{
\item{lyt}{(\code{PreDataTableLayouts})\cr layout that analyses will be added to.}

\item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}

\item{var_labels}{(\code{character})\cr variable labels.}

\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}

\item{nested}{(\code{flag})\cr whether this layout instruction should be applied within the existing layout structure _if
possible (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split.
underneath analyses, which is not allowed.}

\item{...}{additional arguments passed to \code{s_compare()}, including:
\itemize{
\item \code{denom}: (\code{string}) choice of denominator. Options are \code{c("n", "N_col", "N_row")}. For factor variables, can
only be \code{"n"} (number of values in this row and column intersection).
\item \code{.N_row}: (\code{numeric(1)}) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no
column-based subsetting).
\item \code{.N_col}: (\code{numeric(1)}) Column-wise N (column count) for the full column being tabulated within.
\item \code{verbose}: (\code{flag}) Whether additional warnings and messages should be printed. Mainly used to print out
information about factor casting. Defaults to \code{TRUE}. Used for \code{character}/\code{factor} variables only.
}}

\item{na_rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.}

\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".}

\item{table_names}{(\code{character})\cr this can be customized in the case that the same \code{vars} are analyzed multiple
times, to avoid warnings from \code{rtables}.}

\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}

\item{.stats}{(\code{character})\cr statistics to select for the table.

Options for numeric variables are: \verb{'n', 'sum', 'mean', 'sd', 'se', 'mean_sd', 'mean_se', 'mean_ci', 'mean_sei', 'mean_sdi', 'mean_pval', 'median', 'mad', 'median_ci', 'quantiles', 'iqr', 'range', 'min', 'max', 'median_range', 'cv', 'geom_mean', 'geom_sd', 'geom_mean_sd', 'geom_mean_ci', 'geom_cv', 'median_ci_3d', 'mean_ci_3d', 'geom_mean_ci_3d', 'pval'}

Options for non-numeric variables are: \verb{'n', 'count', 'count_fraction', 'count_fraction_fixed_dp', 'fraction', 'n_blq', 'pval_counts'}}

\item{.stat_names}{(\code{character})\cr names of the statistics that are passed directly to name single statistics
(\code{.stats}). This option is visible when producing \code{\link[rtables:data.frame_export]{rtables::as_result_df()}} with \code{make_ard = TRUE}.}

\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more
information on the \code{"auto"} setting.}

\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}

\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Each element of the vector
should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
for that statistic's row label.}

\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.}
}
\value{
\itemize{
\item \code{compare_vars()} returns a layout object suitable for passing to further layouting functions,
or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing
the statistics from \code{s_compare()} to the table layout.
}

\itemize{
\item \code{s_compare()} returns output of \code{\link[=s_summary]{s_summary()}} and comparisons versus the reference group in the form of p-values.
}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}

The analyze function \code{\link[=compare_vars]{compare_vars()}} creates a layout element to summarize and compare one or more variables, using
the S3 generic function \code{\link[=s_summary]{s_summary()}} to calculate a list of summary statistics. A list of all available statistics
for numeric variables can be viewed by running \code{get_stats("analyze_vars_numeric", add_pval = TRUE)} and for
non-numeric variables by running \code{get_stats("analyze_vars_counts", add_pval = TRUE)}. Use the \code{.stats} parameter to
specify the statistics to include in your output summary table.

Prior to using this function in your table layout you must use \code{\link[rtables:split_cols_by]{rtables::split_cols_by()}} to create a column
split on the variable to be used in comparisons, and specify a reference group via the \code{ref_group} parameter.
Comparisons can be performed for each group (column) against the specified reference group by including the p-value
statistic.
}
\section{Functions}{
\itemize{
\item \code{compare_vars()}: Layout-creating function which can take statistics function arguments
and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}.

\item \code{s_compare()}: S3 generic function to produce a comparison summary.

\item \code{s_compare(numeric)}: Method for \code{numeric} class. This uses the standard t-test
to calculate the p-value.

\item \code{s_compare(factor)}: Method for \code{factor} class. This uses the chi-squared test
to calculate the p-value.

\item \code{s_compare(character)}: Method for \code{character} class. This makes an automatic
conversion to \code{factor} (with a warning) and then forwards to the method for factors.

\item \code{s_compare(logical)}: Method for \code{logical} class. A chi-squared test
is used. If missing values are not removed, then they are counted as \code{FALSE}.

}}
\note{
\itemize{
\item For factor variables, \code{denom} for factor proportions can only be \code{n} since the purpose is to compare proportions
between columns, therefore a row-based proportion would not make sense. Proportion based on \code{N_col} would
be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
for as explicit factor levels.
\item If factor variables contain \code{NA}, these \code{NA} values are excluded by default. To include \code{NA} values
set \code{na.rm = FALSE} and missing values will be displayed as an \code{NA} level. Alternatively, an explicit
factor level can be defined for \code{NA} values during pre-processing via \code{\link[=df_explicit_na]{df_explicit_na()}} - the
default \code{na_level} (\code{"<Missing>"}) will also be excluded when \code{na.rm} is set to \code{TRUE}.
\item For character variables, automatic conversion to factor does not guarantee that the table
will be generated correctly. In particular for sparse tables this very likely can fail.
Therefore it is always better to manually convert character variables to factors during pre-processing.
\item For \code{compare_vars()}, the column split must define a reference group via \code{ref_group} so that the comparison
is well defined.
}
}
\examples{
# `compare_vars()` in `rtables` pipelines

## Default output within a `rtables` pipeline.
lyt <- basic_table() \%>\%
  split_cols_by("ARMCD", ref_group = "ARM B") \%>\%
  compare_vars(c("AGE", "SEX"))
build_table(lyt, tern_ex_adsl)

## Select and format statistics output.
lyt <- basic_table() \%>\%
  split_cols_by("ARMCD", ref_group = "ARM C") \%>\%
  compare_vars(
    vars = "AGE",
    .stats = c("mean_sd", "pval"),
    .formats = c(mean_sd = "xx.x, xx.x"),
    .labels = c(mean_sd = "Mean, SD")
  )
build_table(lyt, df = tern_ex_adsl)

# `s_compare.numeric`

## Usual case where both this and the reference group vector have more than 1 value.
s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)

## If one group has not more than 1 value, then p-value is not calculated.
s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)

## Empty numeric does not fail, it returns NA-filled items and no p-value.
s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)

# `s_compare.factor`

## Basic usage:
x <- factor(c("a", "a", "b", "c", "a"))
y <- factor(c("a", "b", "c"))
s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)

## Management of NA values.
x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
y <- explicit_na(factor(c("a", "b", "c", NA)))
s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE)
s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE)

# `s_compare.character`

## Basic usage:
x <- c("a", "a", "b", "c", "a")
y <- c("a", "b", "c")
s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)

## Note that missing values handling can make a large difference:
x <- c("a", "a", "b", "c", "a", NA)
y <- c("a", "b", "c", rep(NA, 20))
s_compare(x,
  .ref_group = y, .in_ref_col = FALSE,
  .var = "x", verbose = FALSE
)
s_compare(x,
  .ref_group = y, .in_ref_col = FALSE, .var = "x",
  na.rm = FALSE, verbose = FALSE
)

# `s_compare.logical`

## Basic usage:
x <- c(TRUE, FALSE, TRUE, TRUE)
y <- c(FALSE, FALSE, TRUE)
s_compare(x, .ref_group = y, .in_ref_col = FALSE)

## Management of NA values.
x <- c(NA, TRUE, FALSE)
y <- c(NA, NA, NA, NA, FALSE)
s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE)
s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE)

}
\seealso{
\code{\link[=s_summary]{s_summary()}} which is used internally to compute a summary within \code{s_compare()}, and \code{\link[=a_summary]{a_summary()}}
which is used (with \code{compare = TRUE}) as the analysis function for \code{compare_vars()}.
}
