% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotgg.R
\name{plotgg}
\alias{plotgg}
\title{Heatmap plot of affinity() output}
\usage{
plotgg(
  data,
  variable,
  legendlimit,
  col = NULL,
  show.value = NULL,
  value.digit = NULL,
  text.size = NULL,
  text.col = NULL,
  plot.margin = NULL,
  drop.empty = TRUE,
  sig.only = FALSE,
  ...
)
}
\arguments{
\item{data}{Output list returned by \code{\link{affinity}}.}

\item{variable}{Name of a numeric column in \code{data$all} to plot.}

\item{legendlimit}{Either \code{"datarange"} or \code{"balanced"}.}

\item{col}{Color specification for the fill scale. For \code{alpha_mle}
(diverging), supply \code{c(low, high)} or \code{c(low, mid, high)}.
For all other variables (sequential), supply \code{c(low, high)}.
If \code{NULL}, defaults are used (including an auto-generated low color for
sequential scales).}

\item{show.value}{Logical; if \code{TRUE}, values are printed on tiles. If
\code{NULL}, values are printed automatically when the number of plotted
entities is \eqn{\le 20}.}

\item{value.digit}{Number of digits used when printing values; default is 2.}

\item{text.size}{Size of printed values; default is 2.5.}

\item{text.col}{Color of printed values on tiles (used when values are shown).}

\item{plot.margin}{Plot margin passed to \code{ggplot2::theme(plot.margin = ...)}.
Typically a \code{ggplot2::margin(t, r, b, l, unit)} object.}

\item{drop.empty}{Logical; if \code{TRUE} (default), entities whose values are
all \code{NA} for the selected \code{variable} are removed from the plot.
Set to \code{FALSE} to keep all entities.}

\item{sig.only}{Logical or numeric. If \code{FALSE} (default), all values are
plotted. If \code{TRUE}, tiles are masked to \code{NA} wherever
\code{p_value > 0.05}. If numeric, the value is used as the p-value cutoff
(e.g., \code{sig.only = 0.01}). Requires a \code{p_value} column in
\code{data$all}. When \code{variable = "p_value"}, p-values above the cutoff
are masked to \code{NA}.}

\item{...}{Additional arguments (currently unused).}
}
\value{
A heatmap plot generated with \code{ggplot2}.
}
\description{
This function works on the output of \code{\link{affinity}} and uses
\code{ggplot2::ggplot()} to generate a heatmap for numeric columns of the
\code{$all} dataframe, excluding interval columns (median interval and
confidence intervals) and the confidence level (which is constant across
pairs in a single run).
}
\details{
This function is a wrapper around \code{ggplot2} with carefully chosen
defaults to generate an interpretable heatmap of pairwise associations.
The plot shows the lower triangle of an \eqn{N \times N} matrix (diagonal
excluded), where both rows and columns represent the same set of entities.
The upper triangle is omitted because it is a mirror image of the lower
triangle.

By default (\code{drop.empty = TRUE}), entities whose values are entirely
\code{NA} for the selected \code{variable} are removed from both axes. This
avoids plotting empty rows and columns when an entity has no usable values
(e.g., due to degenerate distributions or missing data). Set
\code{drop.empty = FALSE} to retain all entities and reproduce the full grid,
including empty rows or columns.

If \code{sig.only} is enabled, values of the selected \code{variable} are
masked to \code{NA} wherever \code{p_value} exceeds the specified cutoff, so
only statistically significant tiles are shown. Use \code{sig.only = TRUE}
to apply the default cutoff (0.05), or supply a numeric cutoff (e.g.,
\code{sig.only = 0.01}). Requires a \code{p_value} column in \code{data$all}.
When \code{variable = "p_value"}, p-values above the cutoff are masked to
\code{NA}.

Legend titles are mapped to human-readable labels (some shown on two lines),
rather than using raw column names from \code{data$all}.

The plot can be requested using column names from the \code{$all} dataframe
returned by \code{affinity}. Additional \code{ggplot2} layers or theme
modifications can be added by appending them with \code{+}, as in standard
\code{ggplot2} usage.

The \code{legendlimit} argument controls how the color scale is defined.
For \code{alpha_mle}, the default midpoint is 0 (null expectation), and the
color scale can be either data-driven (\code{"datarange"}) or symmetrically
balanced around zero (\code{"balanced"}), using the maximum absolute value
observed.  For indices bounded in \eqn{[0,1]} (\code{p_value}, \code{jaccard},
\code{sorensen}, \code{simpson}), the balanced scale uses fixed limits
\eqn{[0,1]}. For \code{p_value}, the color mapping is reversed so smaller
p-values appear more intense. For count-based variables, no natural midpoint exists; the
color scale spans the observed range. For \code{obs_cooccur_X} and
\code{exp_cooccur}, a shared color scale is applied so the two plots are
visually comparable.

When \code{show.value = TRUE}, numeric values are printed on each tile using
\code{ggplot2::geom_text()}. If \code{show.value = NULL} (default), values are
printed automatically when the number of plotted entities is \eqn{\le 20}.
Rounding and text appearance are controlled by \code{value.digit},
\code{text.size}, and \code{text.col}.
}
\examples{
data(finches)
head(finches)

library(ggplot2)


# the remainder of the script has been enclosed under \donttest{}
# to bypass the CRAN's 5 second limit on example files
# --------------------------------------------------------------

\donttest{

  # plotting various variables
  # ---------------------------------------------
  # compute alpha and other quantities for island-pair affinity (beta diversity)
  # the square matrices are not used for plotting
  myout <- affinity(data = finches, row.or.col = "col")
  # myout

  plotgg(data = myout, variable = "alpha_mle", legendlimit = "datarange")
  # in the example above, null expectation of the alpha_mle (=0) has white color,
  # and negative and positive values stretch between "#87beff" and "#fd6a6c", respectively
  # so that the color spectrum is applied NOT to the range of data
  # but to the same extent of values
  # on both sides of zero, which is max(abs(valrange)) and -(max(abs(valrange))).
  # however, the legend can be printed to show the extent of data with "datarange"
  # or the entire spectrum where the color is applied with "balanced".
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced")
  # notice that the two plots above are identical but the legend has
  # different range with the same color scale.


  plotgg(data = myout, variable = "sorensen", legendlimit = "balanced")
  plotgg(data = myout, variable = "jaccard", legendlimit = "balanced")

  # in the case of observed and expected cooccurrences, one color scale is applied for both plots
  # so that the shades of color across plots can be visually compared
  plotgg(data = myout, variable = "exp_cooccur", legendlimit = "datarange")
  plotgg(data = myout, variable = "exp_cooccur", legendlimit = "balanced")
  plotgg(data = myout, variable = "obs_cooccur_X", legendlimit = "balanced")

  plotgg(data = myout, variable = "entity_1_count_mA", legendlimit = "datarange")
  plotgg(data = myout, variable = "entity_2_count_mB", legendlimit = "datarange")
  plotgg(data = myout, variable = "total_N", legendlimit = "datarange")
  # for "entity_1_count_mA", "entity_2_count_mB", "sites_total_N",
  # if legendlimit is set to "balanced", it will be changed to "datarange"
  plotgg(data = myout, variable = "entity_2_count_mB", legendlimit = "balanced")


  # plot only statistically significant tiles (based on p_value)
  # -----------------------------------------------------------
  # sig.only = TRUE masks non-significant tiles (p_value > 0.05) to NA
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced", sig.only = TRUE)

  # you can also supply a stricter p-value cutoff (e.g., 0.01)
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced", sig.only = 0.01)


  # change color of the plot and text
  # ---------------------------------------------
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced")
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced",
         col = c('#99cc33', 'black', '#ff9933'), text.col = "white")
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced",
         col = c('#99cc33', '#ff9933'), text.col = "white")

  plotgg(data = myout, variable = "obs_cooccur_X", legendlimit = "balanced")
  plotgg(data = myout, variable = "obs_cooccur_X", legendlimit = "balanced",
         col = c('black', 'red'), text.col = "white")


  # change the characteristics of text printed in the plot
  # ------------------------------------------------------
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced")

  # change the number of digits; the default is 2
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced", value.digit = 3)

  # make the fonts bigger; the default is 2.5
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced", text.size = 3.5)

  # hide values from the plot
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced", show.value = FALSE)


  # increase or decrease margin
  # ---------------------------------------------
  myout <- affinity(data = finches, row.or.col = "row")
  # myout

  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced")
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced",
         plot.margin = ggplot2::margin(1,1,5,2, "cm"))


  # change angle of x-axis tick label; the default is 35 degrees
  # ------------------------------------------------------------
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced")
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced") +
    ggplot2::theme(axis.text.x = element_text(angle = 45))

  # to change to 90 degrees, adjust vjust
  # bad ->
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced") +
    ggplot2::theme(axis.text.x = element_text(angle = 90))
  # good ->
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced") +
    ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5))


  # additional elements in the plot
  # ----------------------------------
  # because it is ggplot output, you can use the arguments of ggplot() to make changes

  # add plot title and change legend title
  plotgg(data = myout, variable = "alpha_mle", legendlimit = "balanced") +
    ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
    ggplot2::ggtitle("Affinity of island pairs measured with Alpha MLE") +
    ggplot2::labs(fill = 'My Personal\nTitle')



  # show/hide entities that are entirely empty (all-NA tiles)
  # --------------------------------------------------------
  # Here we create an artificial "empty" entity by setting one column to NA.
  # This guarantees that all pairwise comparisons involving that entity have no usable data,
  # so the corresponding tiles become NA for variables such as alpha_mle.

  finches2 <- as.matrix(finches)
  storage.mode(finches2) <- "numeric"
  finches2[, 3] <- NA_real_  # make the first entity entirely missing (choose any column)
  myout2 <- affinity(data = finches2, row.or.col = "col")

  # Default behavior: drop.empty = TRUE (empty entity removed from the axes)
  plotgg(data = myout2, variable = "alpha_mle", legendlimit = "balanced")

  # Keep empty entities (legacy/full grid): shows the empty row/column
  plotgg(data = myout2, variable = "alpha_mle", legendlimit = "balanced", drop.empty = FALSE)

  # keep empty entities even after masking (shows rows/columns with all-NA tiles)
  plotgg(data = myout2, variable = "alpha_mle", legendlimit = "balanced",
         sig.only = TRUE, drop.empty = FALSE)


  # automatic suppression of numeric values on tiles
  # -------------------------------------------------
  # By default, numeric values are printed on tiles only when the number of
  # plotted entities is reasonably small (<= 20). This avoids severe visual
  # clutter when the heatmap becomes large.

  finches_big <- finches

  # duplicate columns to artificially inflate the number of entities
  finches_big <- cbind(finches_big, finches_big[, 1:5])
  colnames(finches_big)[(ncol(finches) + 1):ncol(finches_big)] <-
    paste0(colnames(finches)[1:5], "_dup")

  myout_big <- affinity(data = finches_big, row.or.col = "col")

  # Numeric values are NOT printed because the number of entities exceeds 20
  plotgg(data = myout_big, variable = "alpha_mle", legendlimit = "balanced")

  # To force printing numeric values despite the large number of entities:
  plotgg(data = myout_big, variable = "alpha_mle", legendlimit = "balanced", show.value = TRUE)


} #end of \donttest{}


}
\seealso{
\code{\link{affinity}}
}
\author{
Kumar Mainali
}
