% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/missing_prop_matrix.R
\name{create_missingness_prop_matrix}
\alias{create_missingness_prop_matrix}
\title{Create Missingness Proportion Matrix}
\usage{
create_missingness_prop_matrix(
  data,
  index_col = NULL,
  cols_ignore = NULL,
  na_values = c(NA, NaN, Inf, -Inf),
  repeat_feature_names = character(0),
  loose = FALSE
)
}
\arguments{
\item{data}{Data frame or matrix containing the input data with potential missing values.}

\item{index_col}{Character scalar. Name of an index column to exclude from analysis (optional).
If supplied and present, it will be removed from analysis; row names are preserved as-is.}

\item{cols_ignore}{Character vector of column names to exclude from the proportion matrix (optional).}

\item{na_values}{Vector of values to treat as missing in addition to standard missing values.
Defaults to \code{c(NA, NaN, Inf, -Inf)}.}

\item{repeat_feature_names}{Character vector of "base" feature names that have repeated timepoints.
Repeat measurements must be in the form \code{<feature>_<timepoint>} where \code{<feature>}
is alphanumeric (and may include dots) and \code{<timepoint>} is an integer (e.g., \code{"CRP_1"}).}

\item{loose}{Logical. If True, will match any column starting with feature from repeat_feature_names}
}
\value{
A numeric matrix of dimension \code{nrow(data)} by \code{n_features}, where rows are
samples and columns are features (base names). Entries are per-sample missingness proportions in \verb{[0, 1]}.
The returned matrix has an attribute \code{"feature_columns_map"}: a named list mapping each
output feature to the source columns used to compute its proportion.
}
\description{
Creates a matrix where each entry represents the proportion of missing values
for each sample–feature combination across multiple timepoints. Each sample will have
one proportion value per feature. Features may have repeated time points
(columns named like \code{feature_1}, \code{feature_2}, ...). This matrix can be used
with \code{cluster_on_missing_prop()} to group samples with similar missingness patterns.
}
\examples{
df <- data.frame(
  id = paste0("s", 1:4),
  CRP_1 = c(1.2, NA, 2.1, NaN),
  CRP_2 = c(NA, NA, 2.0, 1.9),
  IL6_1 = c(0.5, 0.7, Inf, 0.4),
  IL6_2 = c(0.6, -Inf, 0.8, 0.5),
  Albumin = c(3.9, 4.1, 4.0, NA)
)

m <- create_missingness_prop_matrix(
  data = df,
  index_col = "id",
  cols_ignore = NULL,
  repeat_feature_names = c("CRP", "IL6")
)

dim(m)         # 4 x 3 (CRP, IL6, Albumin)
# per-sample proportion missing across CRP_1 and CRP_2
m[ , "CRP"]    
attr(m, "feature_columns_map")

}
