##' @name autoplot.rpart
##' @export autoplot.rpart
##' @aliases autoplot.rpart
##' @method autoplot rpart
##' @title Plot a recursive partitioning tree using \code{ggplot2}
##' @include genSurv.R
##' @param object An object of class \code{rpart} as
##' returned by \code{rpart::rpart()}
##' @param ... Additional arguments (not implemented)
##' @param title Title for plot
##' @param titSize Title text size
##' @param uniform The default is to use a non-uniform spacing
##' \emph{proportional to the error in the fit}.
##' If \code{uniform=TRUE}, uniform vertical
##' spacing of the nodes is used. This may be less
##' cluttered when fitting a large plot onto a page.
##' @param minbranch This parameter is ignored if \code{uniform=TRUE}.
##' \cr
##' The usual tree shows branch lengths \emph{in proportion to improvement}.
##' \cr
##' Where improvement is
##' minimal, there may be insuffcient room for node labels.
##' \code{minbranch} sets the minimum length for a branch to \code{minbranch}
##' times the average branch length
##' @param compress If \code{compress=FALSE} (the default), the leaf
##' nodes will be at the horizontal plot
##' co-ordinates of '1:nleaves'.
##' \cr
##' If \code{compress=TRUE}, the tree is arranged in a more compact form.
##' (The compaction algorithm assumes \code{uniform=TRUE}).
##' The result is usually an improvement even when that is not the case.
##' @param nspace Applies only when \code{compress=TRUE}.
##' \cr
##' The amount of extra space between a node with children and a leaf,
##' as compared to the minimal space between leaves. The default is the
##' value of \code{branch}.
##' @param branch Controls the shape of the branches from parent to child node.
##' \cr Needs to be in the range \eqn{0-1}.  A value of \eqn{1} gives square
##' shouldered branches,
##' a value of \eqn{0} gives V shaped branches, with other values being intermediate.
##' @param all If \code{all="FALSE"} only terminal nodes
##' (leaves) will be labelled
##' @param nodeLabels These can be used to replace the names of the
##' default labels from the fit.
##' Should be given in the same order as those names
##' @param lineSize Line size connecting nodes
##' @param vArrows Add vertical arrows for descending lines
##' @param nodeSize Node text size
##' @param nodeColor Node text color
##' @param leaf If \code{leaf="fitR"} (the default) terminal nodes (leaves)
##' are labelled with the fitted response. If this is a \code{factor}, the
##' \code{labels} of the \code{factor} are used.
##' \cr
##' The following apply only when the object is fit with
##' \code{rpart(..., method="exp")}:
##' \itemize{
##'  \item If \code{leaf="en"}, the leaves are labelled with number of events and
##' number at risk.
##'  \item If \code{leaf="both"} labels show both fitted responses and
##' no. events/ no. at risk
##'  }
##' @param leafSize Leaf (terminal node) text size
##' @param leafColor Leaf text color
##' @param digits Number of significant digits for fitted response.
##' \cr
##' Default is \code{getOption("digits")}
##' @param yNU \bold{y} value for \bold{N}odes moved \bold{U}p.
##' Used to prevent text from overlapping.
##' This multiplier is applied to the difference
##' of the largest and smallest \eqn{y} values plotted.
##' May need to be increased if larger text sizes are
##' used in labelling nodes or node labels span > 1 line.
##' Typically is \eqn{< 0.1}.
##' @param yND \bold{y} value for \bold{N}odes moved \bold{D}own.
##' As above, but applies to text appearing
##' below the node.
##' @param yLD \bold{y} value for \bold{L}eaves moved \bold{D}own.
##' As above, but applies to text appearing
##' below the leaves.
##' @return A \code{list} with the following elmenents:
##' \describe{
##'  \item{plot}{A plot, as generated by \code{ggplot}}
##'  \item{segments}{A \code{data.table} with the co-ordinates used to plot the lines}
##'  \item{nodes}{A \code{data.table} with the co-ordinates used to plot the nodes.
##' \cr
##' Columns are labelled as follows:
##'  \describe{
##'  \item{x, y}{\eqn{x} and \eqn{y} co-ordinates}
##'  \item{node}{Node name}
##'  \item{n}{Number of observations (number at risk) for this node}
##'  \item{isP}{Is node a \bold{p}arent?}
##'  \item{lC, rC}{Left and right \bold{c}hildren of node}
##'  \item{isL}{Is node a leaf (terminal node)?}
##'  \item{resp}{Predicted \bold{resp}onse}
##'  \item{yNU, yND, yLD}{adjusted \eqn{y} values for nodes and leaves}
##'  }
##'  And where applicable:
##' \describe{
##'  \item{e}{Number of events}
##'  \item{en}{Number of events / number of observations}
##'  }}
##' }
##' @details The plot shows a division at each node. This is read as \emph{right=TRUE}.
##' \cr
##' Thus for a node reading \bold{x > 0.5} the line descending to the right is that where \bold{x > 0.5}.
##' @author Chris Dardis. Adapted from work by Andrie de Vries and Terry Therneau
##' @examples
##' data("cu.summary", package="rpart")
##' fit <- rpart(Price ~ Mileage + Type + Country, cu.summary)
##' autoplot(fit)
##' progstat <- factor(stagec$pgstat, levels = 0:1, labels = c("No", "Prog"))
##' cfit <- rpart(progstat ~ age + eet + g2 + grade + gleason + ploidy,
##'               data = stagec, method = 'class')
##' autoplot(cfit)
##' set.seed(1)
##' df1 <- genSurvDf(model=FALSE)
##' r1 <- rpart(Surv(t1, e) ~ ., data=df1, method="exp")
##' autoplot(r1, leaf="en", title="Nodes show events / no. at risk")
##' autoplot(r1, compress=TRUE, branch=0.5, nspace=0.1,
##'          title="Nodes show events / no. at risk")
##' ### oversize text; need to adjust y values for text to compensate
##' a1 <- autoplot(r1, compress=TRUE, digits=5,
##'                nodeSize=10, yNU=0.05, yND=0.03,
##'                leafSize=10, , yLD=0.08, nodeLabels=seq(17))$plot
##' ### can use expand_limits if text is cut off at margins
##' a1 + expand_limits(x=c(0.75, 7.25))
##'
autoplot.rpart <- function(object,
                           ...,
                           title="Recursive partitioning tree \n Terminal nodes show fitted response",
                           titSize=20,
                           uniform=FALSE,
                           minbranch = 0.3,
                           compress=FALSE,
                           nspace,
                           branch = 1,
                           all=TRUE,
                           nodeLabels=NULL,
                           lineSize=1,
                           vArrows=FALSE,
                           nodeSize=5,
                           nodeColor="darkblue",
                           leaf=c("fitR", "en", "both"),
                           leafSize=5,
                           leafColor="darkblue",
                           digits=NULL,
                           yNU=0.02,
                           yND=0.02,
                           yLD=0.02){
### prevent errors in R CMD check
    ind <- x <- y <- V5 <- e <- n <- resp <- NULL
     xend <- yend <- yM <- ym <- en <- respEN <- NULL
###
    if (!inherits(object, "rpart")) stop("Not a legitimate \"rpart\" object")
    if (nrow(object$frame) <= 1L) stop("Fit is not a tree, just a root")
    if (!missing(nspace) & !compress) warning("nspace not used unless compress=TRUE")
    if (missing(nspace)) nspace <- branch
    stopifnot(branch >= 0 & branch <= 1 & nspace >=0 & nspace <= 1)
### no compression
    if (!compress) nspace <- -1L
    parms <- list(uniform = uniform, branch = branch,
                  nspace = nspace, minbranch = minbranch)
### see below for .rpco function
    rp1 <- .rpco(object, parms)
### taken from rpart:::rpart.branch()
    node <- as.numeric(row.names(object$frame))
    is.left <- (node%%2L == 0L)
    node.left <- node[is.left]
    parent <- match(node.left/2L, node)
    sibling <- match(node.left + 1L, node)
    left.child <- match(node * 2L, node)
    right.child <- match(node * 2L + 1L, node)
###
    temp <- (rp1$x[sibling] - rp1$x[is.left]) * (1 - branch)/2
### Draw a series of horseshoes or V's, left son, up, down to right son
### NA's in the vector cause lines() to "lift the pen"
    xx <- rbind(rp1$x[is.left],
                rp1$x[is.left] + temp,
                rp1$x[sibling] - temp,
                rp1$x[sibling],
                NA)
    yy <- rbind(rp1$y[is.left],
                rp1$y[parent],
                rp1$y[parent],
                rp1$y[sibling],
                NA)
### get segments from xx, yy
    d1 <- data.table(
        stack(as.data.frame(xx)),
        stack(as.data.frame(yy))
        )[, ind := NULL]
### ind = index
    setnames(d1, c("x", "y", "ind"))
    d1[, c("xend", "yend") := d1[-1, list(c(x, NA), c(y, NA)) ] ]
    d1 <- d1[complete.cases(d1), ]
### text labels
### taken from rpart:::text.rpart
### (section for 'fancy' removed)
    col <- names(object$frame)
    ylevels <- attr(object, "ylevels")
    if (!is.null(ylevels <- attr(object, "ylevels")))
        col <- c(col, ylevels)
### data.table for plotting labels
    d2 <- data.table(matrix(NA, nrow=nrow(object$frame), ncol=8))
    d2[, c("V1", "V2") := as.data.frame(rp1)]
    if (is.null(nodeLabels)){
        d2[, "V3" := labels(object)]
    } else {
        if(length(nodeLabels) != nrow(object$frame)) stop("Need same no. labels as nodes")
        d2[, "V3" := nodeLabels]
    }
    d2[, "V4" := object$frame$n]
    d2[, "V5" := FALSE]
    d2[parent, "V5" := TRUE]
    d2[, "V6" := left.child]
    d2[, "V7" := right.child]
    d2[, "V8" := !V5]
    setnames(d2, c("x", "y", "node", "n", "isP", "lC", "rC", "isL") )
### 2nd value for y exists?
    y2e <- "yval2" %in% names(object$frame)
    if(y2e & object$method=="exp") {
        d2[, "resp" := object$frame$yval2[, 1L] ]
        d2[, "e" := object$frame$yval2[, 2L]]
        d2[, "en" := paste(e, "/", n, sep="")]
    } else {
        d2[, "resp" := object$frame$yval ]
    }
    if (is.null(digits)) digits <- getOption("digits")
    fmt1 <- paste0("%.", digits, "g")
    d2[, "resp" := sprintf(fmt1, resp)]
    if (!is.null(ylevels)) d2[, "resp" :=
                              as.character(factor(object$frame$yval,
                                                  labels=ylevels))]
### begin ggplot
    g1 <- ggplot(data=d1, aes(x=x, y=y)) +
         geom_segment(aes(xend=xend, yend=yend), size=lineSize)
    if(vArrows){
        d1[, yM := pmax(y, yend)]
        d1[, ym := pmin(y, yend)]
        g1 <- g1 +
            geom_segment(data=d1[d1$y!=d1$yend, ],
                         aes(x=x, y=yM, xend=xend, yend=ym),
                         size=lineSize, arrow=arrows())
    }
### add small differences to y to separate from lines
    dif1 <- diff(range(d2$y))
    d2[, "yNU" := y + yNU * dif1]
    d2[, "yND" := y - yND * dif1]
    d2[, "yLD" := y - yLD * dif1]
    if (all){
### label nodes (all)
        g1 <- g1 + geom_text(data=d2, aes(x=x, y=yNU, label=node),
                         size=nodeSize, col=nodeColor) +
### add n for nodes (not leaves)
                   geom_text(data=d2[!d2$isL], aes(x=x, y=yND,
                             label=paste("n=", n)),
                             size=nodeSize, col=nodeColor)
    } else {
### leaves only
        g1 <- g1 + geom_text(data=d2[d2$isL], aes(x=x, y=yNU, label=node),
                             size=nodeSize, col=nodeColor)
    }
### label leaves
    leaf <- match.arg(leaf)
    if(leaf=="fitR" | !y2e) {
        g1 <- g1 + geom_text(data=d2[d2$isL], aes(x=x, y=yLD, label=resp),
                             size=leafSize, col=leafColor)
    } else if (leaf=="en") {
        g1 <- g1 + geom_text(data=d2[d2$isL], aes(x=x, y=yLD, label=en),
                             size=leafSize, col=leafColor)
    } else {
    d2[, "respEN" := paste(resp, "\n", en)]
    g1 <- g1 + geom_text(data=d2[d2$isL], aes(x=x, y=yLD, label=respEN),
                             size=leafSize, col=leafColor)
}
    g1 <- g1 +
        ggtitle(title) +
        labs(x=NULL, y=NULL) +
        theme(plot.title = element_text(size=titSize),
              axis.ticks = element_blank(),
              axis.text = element_blank())
    ###
    return(list(plot=g1,
                segments=d1,
                nodes=d2))
}
###
###----------------------------------------
###
### copied from rpart:::rpartco
###----------------------------------------
###
.rpco <- function (tree, parms) {
    frame <- tree$frame
    node <- as.numeric(row.names(frame))
    depth <- floor(log(node, base = 2) + 1e-07)
    depth <- depth - min(depth)
###
    is.leaf <- (frame$var == "<leaf>")
    if (length(parms)) {
        uniform <- parms$uniform
        nspace <- parms$nspace
        minbranch <- parms$minbranch
    } else {
        uniform <- FALSE
        nspace <- -1
        minbranch <- 0.3
    }
if (uniform) {
        y <- (1 + max(depth) - depth)/max(depth, 4L)
    } else {
        y <- dev <- frame$dev
        temp <- split(seq(node), depth)
        parent <- match(node%/%2L, node)
        sibling <- match(ifelse(node%%2L,
                                node - 1L,
                                node + 1L),
                         node)
        for (i in temp[-1L]) {
            temp2 <- dev[parent[i]] - (dev[i] + dev[sibling[i]])
            y[i] <- y[parent[i]] - temp2
        }
        fudge <- minbranch * diff(range(y))/max(depth)
        for (i in temp[-1L]) {
            temp2 <- dev[parent[i]] - (dev[i] + dev[sibling[i]])
            haskids <- !(is.leaf[i] & is.leaf[sibling[i]])
            y[i] <- y[parent[i]] - ifelse(temp2 <= fudge & haskids,
                                          fudge,
                                          temp2)
        }
        y <- y/(max(y))
    }
    x <- double(length(node))
    x[is.leaf] <- seq(sum(is.leaf))
    left.child <- match(node * 2L, node)
    right.child <- match(node * 2L + 1L, node)
    temp <- split(seq(node)[!is.leaf], depth[!is.leaf])
    for (i in rev(temp)) x[i] <- 0.5 * (x[left.child[i]] + x[right.child[i]])
    if (nspace < 0) return(list(x = x, y = y))
    compress <- function(x, me, depth) {
        lson <- me + 1L
        if (is.leaf[lson])
            left <- list(left = x[lson],
                         right = x[lson],
                         depth = depth + 1L,
                         sons = lson)
        else {
            left <- compress(x, me + 1L, depth + 1L)
            x <- left$x
        }
        rson <- me + 1L + length(left$sons)
        if (is.leaf[rson])
            right <- list(left = x[rson],
                          right = x[rson],
                          depth = depth + 1L,
                          sons = rson)
        else {
            right <- compress(x, rson, depth + 1L)
            x <- right$x
        }
        maxd <- max(left$depth, right$depth) - depth
        mind <- min(left$depth, right$depth) - depth
        slide <- min(right$left[1L:mind] - left$right[1L:mind]) - 1L
        if (slide > 0) {
            x[right$sons] <- x[right$sons] - slide
            x[me] <- (x[right$sons[1L]] + x[left$sons[1L]])/2
        }
        else slide <- 0
        if (left$depth > right$depth) {
            templ <- left$left
            tempr <- left$right
            tempr[1L:mind] <- pmax(tempr[1L:mind],
                                   right$right - slide)
        }
        else {
            templ <- right$left - slide
            tempr <- right$right - slide
            templ[1L:mind] <- pmin(templ[1L:mind], left$left)
        }
        list(x = x,
             left = c(x[me] - nspace * (x[me] - x[lson]), templ),
             right = c(x[me] - nspace * (x[me] - x[rson]), tempr),
             depth = maxd + depth, sons = c(me, left$sons, right$sons))
    }
    x <- compress(x, 1L, 1L)$x
    return(list(x = x, y = y))
}
