#' Workhorse for simulation studies
#'
#'  Generates data according to all provided 
#'  constellations in \code{dataGrid} and applies
#'  all provided constellations in \code{procGrid}
#'  to them.
#'
#'
#'@param dataGrid  a \code{data.frame} where the first column
#'  is a character vector with function names. The other
#'  columns contain parameters for the functions specified
#'  in the first column. Parameters with NA are ignored.
#'@param procGrid  similar as \code{dataGrid} the first
#'  column must contain function names. The other columns 
#'  contain parameters for the functions specified in the
#'  first column. The data generated according to 
#'  \code{dataGrid} will always be passed to the first
#'  unspecified argument of the functions sepcified in the first 
#'  column of \code{procGrid}.
#'@param replications  number of replications for the simulation
#'@param discardGeneratedData  if \code{TRUE} the generated 
#'  data is deleted after all function constellations in 
#'  \code{procGrid} have been applied. Otherwise, ALL
#'  generated data sets will be part of the returned object.
#'@param progress if \code{TRUE} a progress bar is shown in the console.
#'@param post.proc  functions to summarize the results over
#'  the replications, e.g. mean, sd.
#'@param ncpus  a cluster of \code{ncpus} workers (R-processes)
#'  is created on the local machine to conduct the
#'  simulation. If \code{ncpus}
#'  equals one no cluster is created and the simulation
#'  is conducted by the current R-process.
#'@param cluster  a cluster generated by the \code{parallel}
#'  package that will be used to conduct the simulation.
#'  If \code{cluster} is specified, then \code{ncpus} will
#'  be ignored.
#'@param clusterSeed  if the simulation is done in parallel
#'  manner, then the ‘combined multiple-recursive generator’ from L'Ecuyer (1999) 
#'  is used to generate random numbers. Thus \code{clusterSeed} must be a (signed) integer 
#'  vector of length 6. The 6 elements of the seed are internally regarded as 
#'  32-bit unsigned integers. Neither the first three nor the last three 
#'  should be all zero, and they are limited to less than 4294967087 and 
#'  4294944443 respectively.
#'@param clusterLibraries  a character vector specifying
#'  the packages that should be loaded by the workers.
#'@param clusterGlobalObjects  a character vector specifying
#'  the names of R objects in the global environment that should
#'  be exported to the global environment of every worker.
#'@param fallback must be missing or a character specfying a file. Every time
#'  when the data generation function is changed, the results so far obtained
#'  are saved in the file specified by \code{fallback}.
#'@param envir  must be provided if the functions specified
#'  in \code{dataGrid} or \code{procGrid} are not part
#'  of the global environment.
#'@return  The returned object is a list of the class
#'  \code{evalGrid}, where the fourth element is a list of lists named 
#'  \code{simulation}. \code{simulation[[i]][[r]]} contains:
#'  \item{data}{the data set that was generated by the
#'  \code{i}th constellation (\code{i}th row) of \code{dataGrid}
#'  in the \code{r}th replication}
#'  \item{results}{a list containing \code{nrow(procGrid)} 
#'  objects. The \code{j}th object is the returned value
#'  of the function specified by the \code{j}th constellation
#'  (\code{j}th row) of \code{procGrid} applied to the data
#'  set contained in \code{data}}
#'@note  If \code{cluster} is provided by the user the
#'  function \code{evalGrids} will NOT stop the cluster.
#'  This has to be done by the user. Conducting parallel
#'  simulations by specifing \code{ncpus} will interally
#'  create a cluster and stop it after the simulation
#'  is done.
#'@author  Marsel Scheer
#'@seealso  \code{\link{as.data.frame.evalGrid}}
#'@examples
#'
#'rng = function(data, ...) {
#'ret = range(data)
#'names(ret) = c("min", "max")
#'ret
#'}
#'
#'# call runif(n=1), runif(n=2), runif(n=3)
#'# and range on the three "datasets"
#'# generated by runif(n=1), runif(n=2), runif(n=3)
#'eg = evalGrids(
#'  expandGrid(fun="runif", n=1:3),
#'  expandGrid(proc="rng"),
#'  rep=10
#')
#'eg
#'
#'# summarizing the results in a data.frame
#'as.data.frame(eg)
#'
#'# we now generate data for a regression
#'# and fit different regression models
#'
#'# not that we use SD and not sd (the
#'# reason for this is the cast() call below)
#'regData = function(n, SD){
#'  data.frame(
#'    x=seq(0,1,length=n),
#'    y=rnorm(n, sd=SD))
#'}
#'
#'eg = evalGrids(
#'  expandGrid(fun="regData", n=20, SD=1:2),
#'  expandGrid(proc="lm", formula=c("y~x", "y~I(x^2)")),
#'  replications=2)
#'
#'# can not be converted to data.frame, because
#'# an object of class "lm" can not converted to
#'# a data.frame
#'try(as.data.frame(eg))
#'
#'# for the data.frame we just extract the r.squared
#'# from the fitted model
#'as.data.frame(eg, value.fun=function(fit) c(rsq=summary(fit)$r.squared))
#'
#'# for the data.frame we just extract the coefficients
#'# from the fitted model
#'df = as.data.frame(eg, value.fun=coef)
#'
#'# since we have done 2 replication we can calculate
#'# sum summary statistics
#'require(reshape)
#'df$replication=NULL
#'mdf = melt(df, id=1:7, na.rm=TRUE)
#'cast(mdf, ... ~ ., c(mean, length, sd))
#'
#'# note if the data.frame would contain the column
#'# named "sd" instead of "SD" the cast will generate
#'# an error
#'names(df)[5] = "sd"
#'mdf = melt(df, id=1:7, na.rm=TRUE)
#'try(cast(mdf, ... ~ ., c(mean, length, sd)))
#'
#'
#'# extracting the summary of the fitted.model
#'as.data.frame(eg, value.fun=function(x) {
#'  ret = coef(summary(x))
#'  data.frame(valueName = rownames(ret), ret, check.names=FALSE)
#'})
#'
#'
#'
#'# we now compare to methods for
#'# calculating quantiles
#'
#'# the functions and parameters
#'# that generate the data
#'N = c(10, 50, 100)
#'library(plyr)
#'dg = rbind.fill(
#'  expandGrid(fun="rbeta", n=N, shape1=4, shape2=4),
#'  expandGrid(fun="rnorm", n=N))
#'  
#'# definition of the two quantile methods
#'emp.q = function(data, probs) c(quantile(data, probs=probs))
#'nor.q = function(data, probs) {
#'  ret = qnorm(probs, mean=mean(data), sd=sd(data))
#'  names(ret) = names(quantile(1, probs=probs))
#'  ret
#'}
#'
#'# the functions and parameters that are
#'# applied to the generate data
#'pg = rbind.fill(expandGrid(proc=c("emp.q", "nor.q"), probs=c(0.01, 0.025, 0.05)))
#'
#'# generate data and apply quantile methods
#'set.seed(1234)
#'eg = evalGrids(dg, pg, replication=50, progress=TRUE)
#'
#'# convert the results to a data.frame
#'df = as.data.frame(eg)
#'df$replication=NULL
#'mdf = melt(df, id=1:8, na.rm=TRUE)
#'
#'# calculate, print and plot summary statistics 
#'require(ggplot2)
#'print(a <- arrange(cast(mdf, ... ~ ., c(mean, sd)), n))
#'ggplot(a, aes(x=fun, y=mean, color=proc)) + geom_point(size=I(3)) + facet_grid(probs ~ n)
#'@importFrom reshape funstofun melt melt.data.frame cast
#'@export
evalGrids <-
  function(dataGrid, procGrid=expandGrid(proc="length"), replications = 1, 
           discardGeneratedData=FALSE, progress=FALSE, post.proc=NULL, 
           ncpus = 1L, cluster=NULL, clusterSeed=rep(12345,6),
           clusterLibraries=NULL,
           clusterGlobalObjects=NULL,           
           fallback=NULL,
           envir=globalenv()) {

    mc = match.call()
    if (!is.null(post.proc)){      
      if (length(post.proc) == 1) {
        postFun = post.proc
      } else {
        postFun = do.call(funstofun, as.list(match.call()$post.proc[-1]))    
      }
    }
    
    
    df = dataGrid
    if (is.data.frame(dataGrid)){
      df = lapply(1:nrow(dataGrid), function(i){
        j=i
        fun = get(dataGrid[j,1], envir=envir)
        para = as.list(dataGrid[j,-c(1, which(is.na(dataGrid[j,]))), drop=FALSE])
        function() do.call(fun, para)
      })
    }
    
    pf = procGrid
    if (is.data.frame(procGrid)){
      pf = lapply(1:nrow(procGrid), function(i){
        j=i
        fun = get(procGrid[j,1], envir=envir)
        para = as.list(procGrid[j,-c(1, which(is.na(procGrid[j,]))), drop=FALSE])        
        function(x) {para[[length(para)+1]]=x; do.call(fun, para)}
      })
    }
    #   createID = function(id){
    #     if(!is.element(".ID", ls(globalenv())))
    #       assign(".ID", id, envir=globalenv())
    #   }
    #     
    #   createID(0)
    sim.fun = function(fc){
      withOutData = function(dummy){
        list(data=NULL, results=lapply(pf, function(f){f(fc())}))
      }
      withData = function(dummy){
        data = fc()
        list(data=data, results=lapply(pf, function(f){f(data)}))      
      }
      if (discardGeneratedData){
        if (!is.null(cluster)){
          ret = parLapply(cluster, 1:replications, withOutData)
        } else {
          ret = lapply(1:replications, withOutData)
        }
      } else {
        if (!is.null(cluster)){
          ret = parLapply(cluster, 1:replications, withData)
        } else {
          ret = lapply(1:replications, withData)
        }
      }
      
      if (!is.null(post.proc)){
        ret = llply(1:nrow(procGrid), function(j) {
          ret = ldply(ret, function(rep) rep$results[[j]])
          idx = which(sapply(1:ncol(ret), function(i) all(is.numeric(ret[,i]))))
          mdf = melt(ret, measure.vars=idx)
          cast(mdf, ... ~ variable, postFun)
          })       
        ret = list(list(results=ret))
      }
      if(!is.null(fallback)){
        if(!is.null(cluster)){
          rs = clusterCall(cluster, function() .Random.seed)
        } else {
          rs = .Random.seed
        }
        attr(ret, ".Random.seed") = rs
      }
      ret
    }
    
    
    if (!is.null(cluster) && ncpus > 1)
      warning("cluster provided. Ignore argument ncpus.")
    
    if (is.null(cluster) && ncpus > 1){
      require(parallel)
      RNGkind("L'Ecuyer-CMRG")
      cluster = makeCluster(rep("localhost", ncpus), type="PSOCK")  
    }
    
    
    if(!is.null(cluster)){      
      if (!is.null(clusterGlobalObjects)){  
        clusterExport(cl=cluster, varlist=clusterGlobalObjects)
      }
      if (!is.null(clusterLibraries)){      
        for( L in clusterLibraries){
          eval(call("clusterEvalQ", cl=cluster, expr=call("library", L)))
        }
      }
      #     parSapply(cluster, seq_len(ncpus), createID)
      clusterExport(cl=cluster, varlist=c("df", "pf"))
      clusterSetRNGStream(cluster, iseed=clusterSeed)
    }     
    t1 = Sys.time()
    if(progress){
      pb = progress_text()
      #pb$init(replications)
      #simulation = replicate(replications, {pb$step(); sim.fun()})
      pb$init(nrow(dataGrid))
      if (!is.null(fallback)){
        simulation = vector("list", length(df))
        cnt = 1
        print("With fallback!")
        for(i in 1:length(df)){
          pb$step()
          simulation[[cnt]] = sim.fun(df[[i]])
          fallBackObj = list(call=mc, dataGrid=dataGrid, procGrid=procGrid, simulation=simulation)
          class(fallBackObj) = "evalGrid"
          save(fallBackObj, file=paste(fallback, "Rdata", sep="."))          
          cnt = cnt + 1          
        }
      } else {
        simulation = lapply(df, function(fc) {pb$step(); sim.fun(fc)})  
      }      
      pb$term()
    } else {
      if (!is.null(fallback)){
        simulation = vector("list", length(df))
        cnt = 1
        print("With fallback!")
        for(i in 1:length(df)){
          simulation[[cnt]] = sim.fun(df[[i]])
          fallBackObj = list(call=mc, dataGrid=dataGrid, procGrid=procGrid, simulation=simulation)
          class(fallBackObj) = "evalGrid"
          save(fallBackObj, file=paste(fallback, "Rdata", sep="."))          
          cnt = cnt + 1          
        }
      } else {
        simulation = lapply(df, sim.fun)
      }      
    }
    t2 = Sys.time()

    if (ncpus > 1)
      stopCluster(cluster)
    
    est.reps.per.hour = as.integer(replications/as.numeric(difftime(t2, t1, units="hour")))
    
    print(paste("Estimated replications per hour: ", est.reps.per.hour))
        
    #  if(is.null(dim(simulation)))
    #   simulation = matrix(simulation, nrow=1)
    
    ret = list(call=mc, dataGrid=dataGrid, procGrid=procGrid, simulation=simulation, 
               post.proc = post.proc,
               est.reps.per.hour=est.reps.per.hour,
               sessionInfo=sessionInfo())
    class(ret) = "evalGrid"
    ret
  }
