# =============================================================================
# MODULE 2: INFORMATION-THEORETIC METRICS
# =============================================================================

#' Shannon Entropy
#'
#' Computes Shannon entropy from discretized distribution.
#'
#' @param x Numeric vector
#' @param n_bins Number of histogram bins (default 10)
#' @return Entropy in nats, or NA_real_ if insufficient data
#' @export
#' @examples
#' entropy(rnorm(100))
entropy <- function(x, n_bins = 10) {
  x <- x[is.finite(x)]
  if (length(x) < 3) return(NA_real_)
  breaks <- seq(min(x) - 1e-10, max(x) + 1e-10, length.out = n_bins + 1)
  counts <- graphics::hist(x, breaks = breaks, plot = FALSE)$counts
  probs <- counts / sum(counts)
  probs <- probs[probs > 0]
  -sum(probs * log(probs))
}

#' Jensen-Shannon Divergence
#'
#' Symmetric information-theoretic divergence with Laplace smoothing.
#'
#' @param p First distribution (numeric vector)
#' @param q Second distribution (numeric vector)
#' @param n_bins Number of histogram bins (default 15)
#' @return JSD value (0 to ln(2)), or NA_real_ if insufficient data
#' @export
#' @references
#' Lin J (1991). Divergence measures based on the Shannon entropy.
#' IEEE Transactions on Information Theory 37(1):145-151.
#' @examples
#' jensen_shannon(rnorm(50, 0, 1), rnorm(50, 2, 1))
jensen_shannon <- function(p, q, n_bins = 15) {
  p <- p[is.finite(p)]
  q <- q[is.finite(q)]
  if (length(p) < 5 || length(q) < 5) return(NA_real_)
  all_vals <- c(p, q)
  breaks <- seq(min(all_vals) - 1e-10, max(all_vals) + 1e-10, length.out = n_bins + 1)
  p_hist <- graphics::hist(p, breaks = breaks, plot = FALSE)$counts
  q_hist <- graphics::hist(q, breaks = breaks, plot = FALSE)$counts
  alpha <- 1
  p_prob <- (p_hist + alpha) / (sum(p_hist) + alpha * n_bins)
  q_prob <- (q_hist + alpha) / (sum(q_hist) + alpha * n_bins)
  m_prob <- (p_prob + q_prob) / 2
  kl_pm <- sum(p_prob * log(p_prob / m_prob))
  kl_qm <- sum(q_prob * log(q_prob / m_prob))
  (kl_pm + kl_qm) / 2
}
