% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Silhouette.R, R/plotSilhouette.R,
%   R/summary.silhouette.R
\name{Silhouette}
\alias{Silhouette}
\alias{plot.Silhouette}
\alias{summary.Silhouette}
\title{Calculate Silhouette Widths, Summary, and Plot for Clustering Results}
\usage{
Silhouette(
  prox_matrix,
  proximity_type = c("dissimilarity", "similarity"),
  method = c("medoid", "pac"),
  prob_matrix = NULL,
  a = 2,
  sort = FALSE,
  print.summary = FALSE,
  clust_fun = NULL,
  ...
)

\method{plot}{Silhouette}(x, label = FALSE, summary.legend = TRUE, grayscale = FALSE, ...)

\method{summary}{Silhouette}(object, print.summary = TRUE, ...)
}
\arguments{
\item{prox_matrix}{A numeric matrix where rows represent observations and columns represent proximity measures (e.g., distances or similarities) to clusters. Typically, this is a membership or dissimilarity matrix from clustering results. If \code{clust_fun} is provided, \code{prox_matrix} should be the name of the matrix component as a string (e.g., if \code{clust_fun = \link[ppclust]{fcm}} from \pkg{ppclust} package the \code{prox_matrix = "d"}).}

\item{proximity_type}{Character string specifying the type of proximity measure in \code{prox_matrix}. Options are \code{"similarity"} (higher values indicate closer proximity) or \code{"dissimilarity"} (lower values indicate closer proximity). Defaults to \code{"dissimilarity"}.}

\item{method}{Character string specifying the silhouette calculation method. Options are \code{"pac"} (Probability of Alternative Cluster) or \code{"medoid"}. Defaults to \code{"medoid"}.}

\item{prob_matrix}{A numeric matrix where rows represent observations and columns represent cluster membership probabilities, depending on \code{prob_type}). If \code{clust_fun} is provided, \code{prob_matrix} should be the name of the matrix component as a string (e.g., \code{"u"} for \code{\link[ppclust]{fcm}}). When not \code{NULL}, fuzzy silhouette width is calculated. Defaults to \code{NULL} for crisp silhouette.}

\item{a}{Numeric value controlling the fuzzifier or weight scaling in fuzzy silhouette averaging. Higher values increase the emphasis on strong membership differences. Must be positive. Defaults to \code{2}.}

\item{sort}{Logical; if \code{TRUE}, sorts the output \code{widths} data frame by cluster and descending silhouette width. Defaults to \code{FALSE}.}

\item{print.summary}{Logical; if \code{TRUE}, prints a summary table of average silhouette widths and sizes for each cluster. Defaults to \code{TRUE}.}

\item{clust_fun}{Optional S3 or S4 function object or function as character string specifying a clustering function that produces the proximity measure matrix. For example, \code{\link[ppclust]{fcm}} or \code{"fcm"}. If provided, \code{prox_matrix} must be the name of the matrix component in the clustering output (e.g., \code{"d"} for \code{\link[ppclust]{fcm}} when \code{proximity_type = "dissimilarity"}). Defaults to \code{NULL}.}

\item{...}{Additional arguments passed to \code{clust_fun}, such as \code{x,centers} for \code{\link[ppclust]{fcm}}.}

\item{x}{An object of class \code{"Silhouette"}, typically the output of the \code{\link{Silhouette}} and \code{\link{softSilhouette}} function. Also supports objects classes \code{\link[factoextra]{eclust}}, \code{\link[factoextra]{hcut}}, \code{\link[cluster]{pam}}, \code{\link[cluster]{clara}}, \code{\link[cluster]{fanny}}, or \code{\link[cluster]{silhouette}} from \pkg{cluster}, \pkg{factoextra} packages. For these classes, explicitly call \code{plotSilhouette()} to generate the plot.}

\item{label}{Logical; if \code{TRUE}, the x-axis is labeled with observation row indices from the input data and titled "Row Index". Defaults to \code{FALSE}.}

\item{summary.legend}{Logical; if \code{TRUE}, prints a summary of average silhouette widths and sizes for each cluster in legend ("Cluster (Size): Width"). If \code{FALSE}, the legend shows only cluster numbers. Defaults to \code{TRUE}.}

\item{grayscale}{Logical; if \code{TRUE}, the plot uses a grayscale color palette for clusters. If \code{FALSE}, uses the default or specified color palette. Defaults to \code{FALSE}.}

\item{object}{An object of class \code{"Silhouette"}, typically the output of the \code{\link{Silhouette}} and \code{\link{softSilhouette}} function.}
}
\value{
A data frame of class \code{"Silhouette"} containing cluster assignments, nearest neighbor clusters, silhouette widths for each observation, and weights (for fuzzy clustering). The object includes the following attributes:
\describe{
\item{proximity_type}{The proximity type used (\code{"similarity"} or \code{"dissimilarity"}).}
\item{method}{The silhouette calculation method used (\code{"medoid"} or \code{"pac"}).}
}

Further,  \code{summary} returns a list containing:
\itemize{
\item \code{clus.avg.widths}: A named numeric vector of average silhouette widths per cluster.
\item \code{avg.width}: The overall average silhouette width.
\item \code{sil.sum}: A data frame with columns \code{cluster}, \code{size}, and \code{avg.sil.width} summarizing cluster sizes and average silhouette widths.
}
}
\description{
Computes the silhouette width for each observation based on clustering results, measuring how similar an observation is to its own cluster compared to nearest neighbor cluster. The silhouette width ranges from -1 to 1, where higher values indicate better cluster cohesion and separation.
}
\details{
The \code{Silhouette} function implements the Simplified Silhouette method introduced by Van der Laan, Pollard, & Bryan (2003), which adapts and generalizes the classic silhouette method of Rousseeuw (1987).

Clustering quality is evaluated using a proximity matrix, denoted as
\eqn{\Delta = [\delta_{ik}]_{n \times K}} for dissimilarity measures or
\eqn{\Delta' = [\delta'_{ik}]_{n \times K}} for similarity measures.
Here, \eqn{i = 1, \ldots, n} indexes observations, and \eqn{k = 1, \ldots, K} indexes clusters.
\eqn{\delta_{ik}} represents the dissimilarity (e.g., distance) between observation \eqn{i} and cluster \eqn{k},
while \eqn{\delta'_{ik}} represents similarity values.

The silhouette width \eqn{S(x_i)} for observation \eqn{i} depends on the proximity type:

For \strong{dissimilarity} measures:
\deqn{
  S(x_i) = \frac{ \min_{k' \neq k} \delta_{ik'} - \delta_{ik} }{ N(x_i) }
}
For \strong{similarity} measures:
\deqn{
  S(x_i) = \frac{ \delta'_{ik} - \max_{k' \neq k} \delta'_{ik'} }{ N(x_i) }
}
where \eqn{N(x_i)} is a normalizing factor defined by the method.

\strong{Choice of method:}
The normalizer \eqn{N(x_i)} is selected according to the \code{method} argument. The method names reference their origins but may be used with any proximity matrix, not exclusively certain clustering algorithms:
\itemize{
\item For \code{medoid} (Van der Laan et al., 2003):
\itemize{
\item Dissimilarity: \eqn{\max(\delta_{ik}, \min_{k' \neq k} \delta_{ik'})}
\item Similarity:    \eqn{\max(\delta'_{ik}, \max_{k' \neq k} \delta'_{ik'})}
}
\item For \code{pac} (Raymaekers & Rousseeuw, 2022):
\itemize{
\item Dissimilarity: \eqn{\delta_{ik} + \min_{k' \neq k} \delta_{ik'}}
\item Similarity:    \eqn{\delta'_{ik} + \max_{k' \neq k} \delta'_{ik'}}
}
}

\strong{Note:}
The \code{"medoid"} and \code{"pac"} options reflect the normalization formula—not a requirement to use the PAM algorithm or posterior/ensemble methods—and are general scoring approaches. These methods can be applied to any suitable proximity matrix, including proximity, similarity, or dissimilarity matrices derived from \strong{classification algorithms}. This flexibility means silhouette indices may be computed to assess group separation when clusters or groups are formed from classification-derived proximities, not only from unsupervised clustering.

If \code{prob_matrix} is \code{NULL}, the \strong{crisp silhouette index} (\eqn{CS}) is:
\deqn{
  CS = \frac{1}{n} \sum_{i=1}^{n} S(x_i)
}
summarizing overall clustering quality.

If \code{prob_matrix} is provided, denoted as \eqn{\Gamma = [\gamma_{ik}]_{n \times K}},
with \eqn{\gamma_{ik}} representing the probability of observation \eqn{i} belonging to cluster \eqn{k},
the \strong{fuzzy silhouette index} (\eqn{FS}) is used:
\deqn{
  FS = \frac{ \sum_{i=1}^{n} \left( \gamma_{ik} - \max_{k' \neq k} \gamma_{ik'} \right)^{\alpha} S(x_i) }{ \sum_{i=1}^{n} \left( \gamma_{ik} - \max_{k' \neq k} \gamma_{ik'} \right)^{\alpha} }
}
where \eqn{\alpha} (the \code{a} argument) controls the emphasis on confident assignments.
}
\examples{
# Standard silhouette with k-means on iris dataset
data(iris)
# Crisp Silhouette with k-means
out <- kmeans(iris[, -5], 3)
if (requireNamespace("ppclust", quietly = TRUE)) {
  library(proxy)
  dist <- proxy::dist(iris[, -5], out$centers)
  silh_out <- Silhouette(dist,print.summary = TRUE)
  plot(silh_out)
} else {
  message("Install 'ppclust': install.packages('ppclust')")
}
\donttest{
# Scree plot for optimal clusters (2 to 7)
if (requireNamespace("ppclust", quietly = TRUE)) {
  library(ppclust)
  avg_sil_width <- numeric(6)
  for (k in 2:7) {
    out <- Silhouette(
      prox_matrix = "d",
      proximity_type = "dissimilarity",
      prob_matrix = "u",
      clust_fun = ppclust::fcm,
      x = iris[, 1:4],
      centers = k,
      sort = TRUE
    )
    # Compute average silhouette width from widths
    avg_sil_width[k - 1] <- summary(out, print.summary = FALSE)$avg.width
  }
  plot(avg_sil_width,
    type = "o",
    ylab = "Overall Silhouette Width",
    xlab = "Number of Clusters",
    main = "Scree Plot"
  )
} else {
  message("Install 'ppclust': install.packages('ppclust')")
}
}

}
\references{
Rousseeuw, P. J. (1987). Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. \emph{Journal of Computational and Applied Mathematics}, 20, 53--65. \doi{10.1016/0377-0427(87)90125-7}

Van der Laan, M., Pollard, K., & Bryan, J. (2003). A new partitioning around medoids algorithm. \emph{Journal of Statistical Computation and Simulation}, 73(8), 575--584. \doi{10.1080/0094965031000136012}

Campello, R. J., & Hruschka, E. R. (2006). A fuzzy extension of the silhouette width criterion for cluster analysis. \emph{Fuzzy Sets and Systems}, 157(21), 2858--2875. \doi{10.1016/j.fss.2006.07.006}

Raymaekers, J., & Rousseeuw, P. J. (2022). Silhouettes and quasi residual plots for neural nets and tree-based classifiers. \emph{Journal of Computational and Graphical Statistics}, 31(4), 1332--1343. \doi{10.1080/10618600.2022.2050249}

Bhat Kapu, S., & Kiruthika. (2024). Some density-based silhouette diagnostics for soft clustering algorithms. Communications in Statistics: Case Studies, Data Analysis and Applications, 10(3-4), 221-238. \doi{10.1080/23737484.2024.2408534}
}
\seealso{
\code{\link{softSilhouette}}, \code{\link{plotSilhouette}}
}
