# Copyright (C) 2014 Mohammad H. Ferdosi
#
# HSPhase is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# HSPhase program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http:#www.gnu.org/licenses/>.
#' Calus-style recursive clustering of individuals using an OH matrix
#'
#' Performs a recursive hierarchical clustering on an opposing-homozygotes (OH)
#' matrix to split individuals into two groups at each step (Ward clustering),
#' until within-group OH values fall below a threshold derived from allele
#' frequencies estimated from the genotype matrix.
#'
#' The function returns a two-column data frame containing individual IDs and an
#' assigned group code. Group codes are generated randomly (via \code{rnorm()})
#' and therefore are not stable across runs.
#'
#' @param oh A numeric matrix representing the opposing-homozygotes (OH) counts
#'   between individuals. Row and column names should be individual IDs. The
#'   matrix is expected to be square and symmetric.
#' @param genotype A numeric genotype matrix of dimension \eqn{n \times m}
#'   (individuals \eqn{\times} SNPs), coded as 0 (AA), 1 (AB), 2 (BB), and 9 for
#'   missing values (as used in \code{hsphase}).
#'
#' @details
#' The threshold \code{maxsnpnooh} is computed from per-SNP minor allele
#' frequencies (\code{.maf}) and then reduced by 10\%. The recursion proceeds as:
#' \enumerate{
#'   \item Compute pairwise distances from \code{oh} using \code{.fastdist} and
#'   convert to a \code{dist} object.
#'   \item Apply hierarchical clustering (\code{\link[stats]{hclust}} with
#'   \code{method = "ward.D"}).
#'   \item Cut the dendrogram into \code{k = 2} groups.
#'   \item For each group, compute the maximum within-group OH value; if it
#'   exceeds \code{maxsnpnooh} and group size is > 2, recurse into that subgroup.
#'   Otherwise, write group assignments to a temporary file and stop recursion.
#' }
#'
#' @return A \code{data.frame} with columns:
#' \describe{
#'   \item{id}{Individual ID (character).}
#'   \item{group}{An integer-like group code (generated randomly; not reproducible).}
#' }
#'
#' @section Side effects:
#' This function writes to and reads from a file named \code{"temp.txt"} in the
#' current working directory, and then deletes it.
#'
#' @seealso \code{\link[stats]{hclust}}, \code{\link[stats]{cutree}},
#'   \code{\link[stats]{as.dist}}
#'
#' @keywords internal
.prCalus <- function(oh, genotype)
{
	maf_geno <- function(x)
	{
		z <- length(which(x == 0))
		o <- length(which(x == 1))
		maf <- (z * 2 + o)/(sum(!is.na(x)) * 2)
		if (!is.na(maf)) 
			if (maf > 0.5) 
				maf <- 1 - maf
		maf
	}
	
	p <- apply(genotype, 2, .maf)
	p <- p[!is.na(p)]
	q <- 1 - p
	
	maxsnpnooh <- (sum(p^2 * q^2) + sum(2 * (p^2 * q^2)))/2
	maxsnpnooh <- maxsnpnooh - (.1 * maxsnpnooh)
	
	cat("id group \n", file = "temp.txt")
	rhsr_rc <- function(oh, maxsnpnooh = maxsnpnooh)
	{
		print("----")
		## d <- dist(oh, method = 'manhattan')
		d <- as.dist(.fastdist(oh))
		if (length(d) > 2)
		{
			fit <- hclust(d, method = "ward.D")
			groups <- cutree(fit, k = 2)
			a <- which(groups == 1)
			b <- which(groups == 2)
			
			
			if (length(a) > 2)
			{
				subohA <- oh[a, a]
				maxSubohA <- max(subohA[lower.tri(subohA)])
			} else
			{
				maxSubohA <- 0
			}
			
			if (length(b) > 2)
			{
				subohB <- oh[b, b]
				maxSubohB <- max(subohB[lower.tri(subohB)])
			} else
			{
				maxSubohB <- 0
			}
			
			if (maxSubohA > maxsnpnooh && length(a) > 2)
			{
				
				rhsr_rc(oh[a, a], maxsnpnooh)
			} else
			{
				
				utils::write.table(data.frame(names(a), round(abs(rnorm(1) * 10^5))), "temp.txt", append = TRUE, col.names = FALSE, row.names = FALSE)
			}
			if (maxSubohB > maxsnpnooh && length(b) > 2)
			{
				rhsr_rc(oh[b, b], maxsnpnooh)
			} else
			{
				utils::write.table(data.frame(names(b), round(abs(rnorm(1) * 10^6))), "temp.txt", append = TRUE, col.names = FALSE, row.names = FALSE)
			}
		} else
		{
			if (!is.integer(oh)) 
				utils::write.table(data.frame(rownames(oh), round(abs(rnorm(1) * 10^6))), "temp.txt", append = TRUE, col.names = FALSE, row.names = FALSE)
		}
	}
	result <- rhsr_rc(oh, maxsnpnooh)
	result <- utils:: read.table("temp.txt", header = TRUE)
	file.remove("temp.txt")
	result
} 
