% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustervalidation.R
\name{gplus_kproto}
\alias{gplus_kproto}
\title{Validating k Prototypes Clustering: Gplus index}
\usage{
gplus_kproto(object = NULL, data = NULL, k = NULL, dists = NULL,
  ...)
}
\arguments{
\item{object}{Object of class \code{kproto} resulting from a call with \code{kproto(..., keep.data=TRUE)}}

\item{data}{Original data; only required if \code{object == NULL}.}

\item{k}{Vector specifying the search range for optimum number of clusters; if \code{NULL} the range will set as \code{2:sqrt(n)}. Only required if \code{object == NULL}.}

\item{dists}{for internal purposes only}

\item{...}{Further arguments passed to \code{\link[clustMixType]{kproto}}, like:
\itemize{
  \item \code{nstart}: If > 1 repetetive computations of \code{kproto} with random initializations are computed.
  \item \code{lambda}: Factor to trade off between Euclidean distance of numeric variables and simple matching coefficient between categorical variables.
  \item \code{verbose}: Logical whether information about the cluster procedure should be given. Caution: If \code{verbose=FALSE}, the reduction of the number of clusters is not mentioned.
}}
}
\value{
For computing the optimal number of clusters based on the Gplus index for k-Prototype clustering the output contains:

\item{k_opt}{optimal number of clusters}

\item{indices}{calculated indices for \eqn{k=2,...,k_max}}

For computing the Gplus index-value for a given k-Prototype clustering the output contains:

\item{index}{calculated index-value}
}
\description{
Calculating the Gplus index for a k-Prototypes clustering with k clusters or 
computing the optimal number of clusters based on the Gplus index for k-Prototype clustering.
}
\details{
\deqn{Gplus = \frac{2 \cdot s(-)}{\frac{n(n-1)}{2} \cdot (\frac{n(n-1)}{2}-1)}} \cr 
Comparisons are made between all within-cluster dissimilarities and all between-cluster dissimilarities. 
\eqn{s(-)} is the number of discordant comparisons and a comparison is named discordant if a within-cluster 
dissimilarity is strictly greater than a between-cluster dissimilarity. \cr
The minimum value of the index is used to indicate the optimal number of clusters.
}
\examples{
# generate toy data with factors and numerics

n   <- 10
prb <- 0.99
muk <- 2.5 
clusid <- rep(1:4, each = n)

x1 <- sample(c("A","B"), 2*n, replace = TRUE, prob = c(prb, 1-prb))
x1 <- c(x1, sample(c("A","B"), 2*n, replace = TRUE, prob = c(1-prb, prb)))
x1 <- as.factor(x1)

x2 <- sample(c("A","B"), 2*n, replace = TRUE, prob = c(prb, 1-prb))
x2 <- c(x2, sample(c("A","B"), 2*n, replace = TRUE, prob = c(1-prb, prb)))
x2 <- as.factor(x2)

x3 <- c(rnorm(n, mean = -muk), rnorm(n, mean = muk), rnorm(n, mean = -muk), rnorm(n, mean = muk))
x4 <- c(rnorm(n, mean = -muk), rnorm(n, mean = muk), rnorm(n, mean = -muk), rnorm(n, mean = muk))

x <- data.frame(x1,x2,x3,x4)

# apply k prototyps
kpres <- kproto(x, 4)

# calculate index-value
gplus_value <- gplus_kproto(object = kpres)

# calculate optimal number of cluster
k_opt <- gplus_kproto(data = x, k = 3:5, nstart = 5, verbose = FALSE)

}
\references{
\itemize{
    \item Charrad, M., Ghazzali, N., Boiteau, V., Niknafs, A. (2014): 
    NbClust: An R Package for Determining the Relevant Number of Clusters in a Data Set. 
    \href{http://www.jstatsoft.org/v61/i06/}{\emph{Journal of Statistical Software, Vol 61, Issue 6}}.
  }
}
\seealso{
Other clustervalidation indices: \code{\link[clustMixType]{dunn_kproto}},
\code{\link[clustMixType]{dunn_kproto}}, \code{\link[clustMixType]{gamma_kproto}},
\code{\link[clustMixType]{mcclain_kproto}}, \code{\link[clustMixType]{ptbiserial_kproto}},
\code{\link[clustMixType]{silhouette_kproto}}, \code{\link[clustMixType]{tau_kproto}}
}
\author{
Rabea Aschenbruck
}
