#' @title separability
#' @description Calculates variety of univariate or multivariate separability metrics for two class samples
#'           
#' @param x  X vector
#' @param y  Y vector
#' @param plot  plot separability (TRUE/FALSE)
#' @param cols  colours for plot (must be equal to number of classes)
#' @param clabs  labels for two classes
#' @param ...  additional arguments passes to plot
#'
#' @return A data.frame with the following separability metrics: 
#' @return   B  Bhattacharryya distance statistic 
#' @return  JM  Jeffries-Matusita distance statistic
#' @return   M  M-Statistic
#' @return   D  Divergence index
#' @return   TD  Transformed Divergence index
#'
#' @export
#' @note
#' M-Statistic (Kaufman & Remer 1994) - This is a measure of the difference of the distributional peaks. A large M-statistic indicates good separation between the two classes as within-class variance is 
#' minimized and between-class variance maximized (M <1 poor, M >1 good).
#'    
#' Bhattacharyya distance (Bhattacharyya 1943; Harold 2003) - Measures the similarity of two discrete or continuous probability distributions.   
#'    
#' Jeffries-Matusita distance (Bruzzone et al., 2005; Swain et al., 1971) - The J-M distance is a function of separability that directly relates to the
#' probability of how good a resultant classification will be. The J-M distance is asymptotic to v2, where values of v2 suggest complete separability
#'    
#' Divergence and transformed Divergence (Du et al., 2004) - Maximum likelihood approach. Transformed divergence gives an exponentially 
#' decreasing weight to increasing distances between the classes.
#'
#' @author Jeffrey S. Evans  <jeffrey_evans<at>tnc.org>
#'
#' @references
#'    Anderson, M. J., & Clements, A. (2000) Resolving environmental disputes: a statistical method for choosing among competing cluster models. Ecological Applications 10(5):1341-1355
#' @references
#'    Bhattacharyya, A. (1943) On a measure of divergence between two statistical populations defined by their probability distributions'. Bulletin of the Calcutta Mathematical Society 35:99-109
#' @references
#'    Bruzzone, L., F. Roli, S.B. Serpico (1995) An extenstion to multiclass cases of the Jefferys-Matusita distance. IEEE Transactions on Pattern Analysis and Machine Intelligence 33:1318-1321
#' @references
#'    Du, H., C.I. Chang, H. Ren, F.M. D'Amico, J. O. Jensen, J., (2004) New Hyperspectral Discrimination Measure for Spectral Characterization. Optical Engineering 43(8):1777-1786.
#' @references
#'    Kailath, T., (1967) The Divergence and Bhattacharyya measures in signal selection. IEEE Transactions on Communication Theory 15:52-60  
#' @references
#'    Kaufman Y., and L. Remer (1994) Detection of forests using mid-IR reflectance: An application for aerosol studies. IEEE T. Geosci.Remote. 32(3):672-683.
#' 
#' @examples 
#'    norm1 <- dnorm(seq(-20,20,length=5000),mean=0,sd=1) 
#'    norm2 <- dnorm(seq(-20,20,length=5000),mean=0.2,sd=2)                          
#'      separability(norm1, norm2) 
#'            
#'    s1 <- c (1362,1411,1457,1735,1621,1621,1791,1863,1863,1838)
#'    s2 <- c (1362,1411,1457,10030,1621,1621,1791,1863,1863,1838)
#'      separability(s1, s2, plot=TRUE) 
#'                                   
separability <- function(x, y, plot = FALSE, cols = c("red", "blue"), clabs = c("Class1", "Class2"), ...) {
    if (length(cols) > 2) 
        stop("TOO MANY COLORS")
    if (length(clabs) > 2) 
        stop("TOO MANY CLASS LABELS")
    trace.of.matrix <- function(SquareMatrix) {
        sum(diag(SquareMatrix))
    }
    x <- as.matrix(x)
    y <- as.matrix(y)
    mdif <- mean(x) - mean(y)
    p <- (cov(x) + cov(y))/2
    bh.distance <- 0.125 * t(mdif) * p^(-1) * mdif + 0.5 * log(det(p)/sqrt(det(cov(x)) * det(cov(y))))
    m <- (abs(mean(x) - mean(y)))/(sd(x) + sd(y))
    jm.distance <- 2 * (1 - exp(-bh.distance))
    dt1 <- 1/2 * trace.of.matrix((cov(x) - cov(y)) * (cov(y)^(-1) - cov(x)^(-1)))
    dt2 <- 1/2 * trace.of.matrix((cov(x)^(-1) + cov(y)^(-1)) * (mean(x) - mean(y)) * t(mean(x) - mean(y)))
    divergence <- dt1 + dt2
    transformed.divergence <- 2 * (1 - exp(-(divergence/8)))
    if (plot == TRUE) {
        color1 <- as.vector(col2rgb(cols[1])/255)
        color2 <- as.vector(col2rgb(cols[2])/255)
        d1 <- density(x)
        d2 <- density(y)
        plot(d1, type = "n", ylim = c(min(c(d1$y, d2$y)), max(c(d1$y, d2$y))), xlim = c(min(c(d1$x, d2$x)), max(c(d1$x, 
            d2$x))), ...)
        polygon(d1, col = rgb(color1[1], color1[2], color1[3], 1/4))
        polygon(d2, col = rgb(color2[1], color2[2], color2[3], 1/4))
        abline(v = mean(x), lty = 1, col = "black")
        abline(v = mean(y), lty = 2, col = "black")
        legend("topright", legend = clabs, fill = c(rgb(color1[1], color1[2], color1[3], 1/4), rgb(color2[1], color2[2], 
            color2[3], 1/4)))
    }
    return(data.frame(B = bh.distance, JM = jm.distance, M = m, mdif = abs(mdif), D = divergence, TD = transformed.divergence))
} 
