#' Training the ENNreg model
#'
#'\code{ENNreg} trains the ENNreg model using batch or minibatch learning procedures.
#'
#' If \code{batch=TRUE}, function \code{harris} from package \code{evclust} is used for
#' optimization. Otherwise, the RMSprop minibatch learning algorithm is used. The three
#' parameters in list \code{options} are:
#' \describe{
#'   \item{maxiter}{Maximum number of iterations (default: 100).}
#'   \item{rel.error}{Relative error for stopping criterion (default: 1e-4).}
#'   \item{print}{Number of iterations between two displays (default: 10).}
#'  }
#' Additional parameters for the RMSprop, used only if \code{batch=FALSE}, are contained in
#' list \code{opt.rmsprop}. They are:
#' ' \describe{
#'   \item{batch_size}{Minibatch size.}
#'   \item{epsi}{Global learning rate.}
#'   \item{rho}{Decay rate.}
#'   \item{delta}{Small constant to stabilize division by small numbers.}
#'   \item{Dtmax}{The algorithm stops when the loss has not decreased in the last Dtmax
#'   iterations.}
#'  }
#'
#' @param X Input matrix of size n x p, where n is the number of objects and p the number of
#' attributes.
#' @param y Vector of length n containing observations of the response variable.
#' @param init Initial model generated by \code{\link{ENNreg_init}} (default=NULL).
#' @param K Number of prototypes (default=NULL; must be supplied if initial model is not supplied).
#' @param batch If TRUE (default), batch learning is used; otherwise, online learning is
#' used.
#' @param nstart Number of random starts of the k-means algorithm (default: 100, used only if initial
#' model is not supplied).
#' @param c Multiplicative coefficient applied to scale parameter gamma (defaut: 1, used only if
#' initial model is not supplied)
#' @param lambda Parameter of the loss function (default=0.9)
#' @param xi Regularization coefficient penalizing precision (default=0).
#' @param rho Regularization coefficient shrinking the solution towards a linear model (default=0).
#' @param eps Parameter of the loss function (if NULL, set to 0.01 times the standard deviation of y).
#' @param nu Parameter of the loss function to avoid a division par zero (default=1e-16).
#' @param optimProto If TRUE (default), the initial prototypes are optimized.
#' @param verbose If TRUE (default) intermediate results are displayed.
#' @param options Parameters of the optimization procedure (see details).
#' @param opt.rmsprop Parameters of the RMSprop algorithm (see details).
#'
#' @return An object of class "ENNreg"  with the following components:
#' \describe{
#' \item{loss}{Value of the loss function.}
#' \item{param}{Parameter values.}
#' \item{K}{Number of prototypes.}
#' \item{pred}{Predictions on the training set (a list containing the prototype unit activations,
#' the output means, variances and precisions, as well as the lower and upper expectations).}
#' }
#' @export
#' @importFrom evclust harris
#' @importFrom stats sd dnorm
#'
#' @references
#'
#' Thierry Denoeux. An evidential neural network model for regression based on random fuzzy
#' numbers. In "Belief functions: Theory and applications (proc. of BELIEF 2022)", pages 57-66,
#' Springer, 2022.
#'
#' Thierry Denoeux. Quantifying prediction uncertainty in regression using random fuzzy sets: the ENNreg
#' model. TechRxiv preprint, 2023b
#'
#' @seealso \code{\link{predict.ENNreg}}, \code{\link{ENNreg_init}}, \code{\link{ENNreg_cv}},
#' \code{\link{ENNreg_holdout}}
#'
#' @examples # Boston dataset
#' \donttest{
#' library(MASS)
#' X<-as.matrix(scale(Boston[,1:13]))
#' y<-Boston[,14]
#' set.seed(220322)
#' n<-nrow(Boston)
#' ntrain<-round(0.7*n)
#' train <-sample(n,ntrain)
#' fit <- ENNreg(X[train,],y[train],K=30)
#' plot(y[train],fit$pred$mux,xlab="observed response",ylab="predicted response")
#' }
#'
ENNreg<-function(X,y,init=NULL,K=NULL,batch=TRUE,nstart=100,c=1,lambda=0.9,xi=0,rho=0,
                 eps=NULL,nu=1e-16,optimProto=TRUE,verbose=TRUE,
                 options=list(maxiter=1000,rel.error=1e-4,print=10),
                 opt.rmsprop=list(batch_size=100,epsi=0.001,rho=0.9,delta=1e-8,Dtmax=100)){
  if(is.null(eps)) eps<-0.01*sd(y)
  if(is.null(init)){
    if(is.null(K)) stop("Initial model or number of prototypes must be supplied") else
      init<-ENNreg_init(X,y,K,nstart,c)
  } else K<-init$K
  X<-as.matrix(X)
  n<-length(y)
  p<-ncol(X)

  if(batch){
    opt.harris <- c(as.integer(verbose),options$maxiter,options$rel.error,
                        options$print)
    opt<-harris(foncgrad_RFS,init$param$psi,options=opt.harris,tr=FALSE,X=X,y=y,
                K=K,eps=eps,lambda=lambda, xi=xi,rho=rho,nu=nu,
                optimProto=optimProto)
  } else {
    opt<-rmsprop(init$param$psi,verbose=verbose,options=options,X=X,y=y,K=K,eps=eps,lambda=lambda,
                 xi=xi,rho=rho,nu=nu,optimProto=optimProto,opt.rmsprop=opt.rmsprop)
  }





  psi<-opt$par
  alpha<-psi[1:K]
  beta<-psi[(K+1):(K*p+K)]
  Beta<-matrix(beta,K,p)
  sig<-psi[(K*p+K+1):(K*p+2*K)]
  sig2<-sig^2
  eta<-psi[(K*p+2*K+1):(K*p+3*K)]
  gam<-psi[(K*p+3*K+1):(K*p+4*K)]
  w<-psi[(K*p+4*K+1):(2*K*p+4*K)]
  W<-matrix(w,K,p)
  h<-eta^2
  ####################### Propagation
  d<-matrix(0,n,K)
  a<-matrix(0,n,K)
  for(k in 1:K){
    d[,k] <- rowSums((X - matrix(W[k,],n,p,byrow=TRUE))^2)
    a[,k] <- exp(- gam[k]^2 * d[,k])
  }
  H<-matrix(h,n,K,byrow=TRUE)
  hx<-rowSums(a*H)
  mu<-X%*%t(Beta)+matrix(alpha,n,K,byrow=TRUE) # size (n,K)
  mux<-rowSums(mu*a*H)/hx
  sig2x<-rowSums(matrix(sig2,n,K,byrow=TRUE)*a^2*H^2)/hx^2
  fit<-list(loss=opt$value,
            param=list(alpha=alpha,Beta=Beta,sig=sig,h=h,gam=gam,W=W,psi=psi),
            K=K,
            pred=list(s=a,mux=mux,sig2x=sig2x,hx=hx,Einf=mux-sqrt(pi/(2*hx))),
            Esup=mux+sqrt(pi/(2*hx)))
  class(fit)<-"ENNreg"
  return(fit)
}
