#################################################
# Tree Growing Process                         ##
#################################################

growTree <- function(model=NULL, mydata=NULL,
                      control=NULL, invariance=NULL, meta=NULL,
                      edgelabel=NULL, depth=0, constraints=NULL, ...)
{
  if(is.null(mydata)) {
    stop("There was no data for growing the tree")
  }
  
  if (is.null(meta)) {
    warning("SEM tree could not determine model variables and covariates in the dataset.");
    return(NULL);
  }
  
  if (control$verbose) {
    message("Growing level ",depth);
  }
  
  if (control$report.level>0) {
	  report(paste("Growing tree level",depth), 0)
  }
  
  
  
  # Node null settings in testing for significant splitting
  node <- list()
  node$left_child <- NULL
  node$right_child <- NULL
  node$caption <- "TERMINAL"
  node$N <- dim(mydata)[1]
  
  # -- sample columns in case of SEM forest usage --
  fulldata <- mydata
  fullmeta <- meta
  if (control$mtry > 0) {
    
    # get names of model variables before sampling
    model.names <- names(mydata)[meta$model.ids]
    covariate.names <- names(mydata)[meta$covariate.ids]
    #perform sampling
    mydata <- sampleColumns(mydata, names(mydata)[meta$covariate.ids], control$mtry)
    # get new model ids after sampling by name
    meta$model.ids <- sapply(model.names, function(x) {which(x==names(mydata))})
    names(meta$model.ids) <- NULL
    meta$covariate.ids <- unlist(lapply(covariate.names, function(x) {which(x==names(mydata))}))
    
    node$colnames <- colnames(mydata)
    if (control$verbose) {
      message("Sampled: ",paste(node$colnames))
    }
  }
  
  node$p.values.valid <- control$method  == "naive" | control$method=="fair"
  
  node$lr <- NA
  node$edge_label <- edgelabel
  
  # estimate model once with complete mydata
  if(control$sem.prog == 'OpenMx'){
    full.model <- mxAddNewModelData(model,mydata,name="INITIALIZED MODEL")
    node$model <- try(mxRun(full.model, suppressWarnings=T, silent=T))
  }
  if(control$sem.prog == 'lavaan'){
    if (control$verbose) {message("Checking lavaan model now.")}
    #node$model <- try(suppressWarnings(lavaan(parTable(model),data=mydata,model.type=model@Options$model.type,missing="fiml")),silent=T)
    node$model <- try(suppressWarnings(eval(parse(text=paste(model@Options$model.type,'(parTable(model),data=mydata,missing=\'',model@Options$missing,'\')',sep="")))),silent=T)
    #browser()
  }
  
   if (class(node$model)=="try-error")
   {
     message("Model had a run error.")
	   node$term.reason <-  node$model[[1]]
	   node$model <- NULL;
	   return(node);
   }
  
  if (is.null(node$model)) {
    node$term.reason <- "Model was NULL! Model could not be estimated."; 
    return(node);
  }
  
  # list of point estimates, std.dev, and names of all freely estimated parameters
  ###########################################################
  ###               OPENMX USED HERE                      ###
  ###########################################################
  if(control$sem.prog == 'OpenMx'){
	  
	  # wicked style using triple-colon
    #node$params <- OpenMx:::summary.MxModel(node$model)$parameters[,5];
    #names(node$params) <- OpenMx:::summary.MxModel(node$model)$parameters[,1];
    #node$params_sd <- OpenMx:::summary.MxModel(node$model)$parameters[,6];
    #node$param_names <- OpenMx:::summary.MxModel(node$model)$parameters[,1];
	
    # some export/namespace problem here with the generic
    # getS3method("summary","MxModel") gets me the right fun
    msm <- getS3method("summary","MxModel")
    
    node$params <- msm(node$model)$parameters[,5];
    names(node$params) <- msm(node$model)$parameters[,1];
    node$params_sd <- msm(node$model)$parameters[,6];
    node$param_names <- msm(node$model)$parameters[,1];
  }
  ###########################################################
  ###               lavaan USED HERE                      ###
  ###########################################################
  if(control$sem.prog == 'lavaan'){
    #read in estimated parameters
    #browser()
    parameters <- data.frame(lavaan::parameterEstimates(node$model))[!is.na(data.frame(lavaan::parameterEstimates(node$model))[,"z"]),]
    node$params <- lavaan::coef(node$model) # put parameters into node 
    names(node$params) <- names(lavaan::coef(node$model)) # parameter names are stored as well
    for(i in 1:nrow(parameters)){ # if any labels are missing (some labels provided), then put default labels in the label col.
      if(!is.null(parameters$label)){
        if(parameters$label[i]==""){parameters$label[i]<-paste(parameters$lhs[i],parameters$op[i],parameters$rhs[i],sep="")}
      }
    } # if all labels are missing make a label column
    if(is.null(parameters$label)){
      label <- paste(parameters$lhs,parameters$op,parameters$rhs,sep="")
      parameters<- cbind(parameters,label)
    } 
    # store the SE of the estimates
    se <- rep(NA,length(unique(parameters$se)))
    for(i in 1:length(unique(parameters$label))){
      for(j in 1:nrow(parameters)){
          if(unique(parameters$label)[i]==parameters$label[j]){se[i]<-parameters$se[j]}
      }
    }
    node$params_sd <- se
    node$param_names <- names(lavaan::coef(node$model))
  }

  # df
  
  if (!is.null(constraints$focus.parameters)) {
    # df's are equal to number of focus parameters
    node$df <- length(constraints$focus.parameters)
  } else {
    # focus.parameters=NULL is as if all parameters were focus parameters, that is,
    # df == num. parameters
    node$df <- length(node$param_names)
  }
  

  
  # add unique node id via global variable
  node$node_id <- getGlobal("global.node.id")
#  assign("global.node.id", global.node.id+1 ,envir = getSemtreeNamespace())
  setGlobal("global.node.id", node$node_id+1)
  
  # determine whether we should skip splitting
  # 1. minimum cases in node
  if (!is.na(control$min.N)) {
    if (node$N <= 2*control$min.N) {
      if(control$verbose){
        message("Minimum user defined N for leaf node.")
      }
      node$term.reason <- "Minimum number of cases in leaf node" 
      return(node);
    }
  }
  # 2. maximum depth for tree reached
  if (!is.na(control$max.depth)){
    if (depth >= control$max.depth) {
      if(control$verbose){
        message("Max user defined tree depth reached.")
      }
      node$term.reason <- "Maximum depth reached in leaf node" 
      return(node);		
    }
  }
  
  # determine best split
  result <- NULL
  # 1. unbalanced selection method
  if (control$method == "naive") {
    result <- tryCatch(
      ################################################
      naiveSplit(model, mydata, control, invariance, meta, constraints=constraints, ...)	
      ################################################
      ,
      error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); }
    );
  } 
  # 2a. split half data to determine best split then use hold out set to compare one split per covariate
  else if (control$method == "fair") {
    control$fair3Step <- FALSE
    result <- tryCatch(
      ################################################
      fairSplit(model, mydata, control, invariance, meta, constraints=constraints, ...)
      ################################################
      ,
      error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); }
    );		
  }
  # 2b. split half data to determine best split then use hold out set to compare one split per covariate, with step 3 all splits retest
  else if (control$method == "fair3") {
    control$fair3Step <- TRUE
    result <- tryCatch(
      ################################################ 
      fairSplit(model, mydata, control, invariance, meta, constraints=constraints, ...)
      ################################################
      ,
      error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); }
    );		
  }
  # 3. Traditional cross validation for covariate split selection
  else if (control$method == "cv") {
    result <- tryCatch(
      ################################################
      crossvalidatedSplit(model, mydata, control, invariance, meta, constraints=constraints, ...)
      ################################################
      ,
      error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); }
    );		
    node$p.values.valid <- FALSE	
  }
  # 4. Experimental
  #else if (control$method == "exp") {
  #  result <- tryCatch(
  #    ################################################
  #    experimentalSplit(model, mydata, control, invariance, meta, ...)
      ################################################
  #    ,
  #    error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); }
  #  );  	
  #  node$p.values.valid <- FALSE	    
  #}

  # return values in result are:
  # LL.max		: numeric, log likelihood ratio of best split
  # split.max 	: numeric, value to split best column on 
  # col.max		: index of best column
  # cov.name	: name of best candidate
  
  # store the value of the selected test statistic
  node$lr <- result$LL.max
  node$result <- result
  
  # if no split found, exit node without continuing growth
  if (is.null(result$LL.max)) {
    if (control$verbose) {
      message("Best Likelihood Ratio was NULL. Stop splitting")
    }
    return(node);
  }
  
  if (control$verbose) {
    message("Best LR ",round(result$LL.max,7)," : ",result$name.max," at covariate column ",
            result$col.max,"\n");
  }
  
  # ---------	determine whether to continue splitting	--------------
  if (class(control$custom.stopping.rule)=="function") {
    stopping.rule <- control$custom.stopping.rule
  } else {
    stopping.rule <- stoppingRuleDefault
  }
  # stoppingRuleDefault() is a function that gets inputs node, result, control
  srule <- stopping.rule(node, result, control)
  # browser()
  if (class(srule)=="list") {
    node <- srule$node
    cont.split <- !(srule$stop.rule)
  } else {
    cont.split <- !srule 
    node$p.values.valid <- FALSE
  }
  # restore mydata here if (mtry was > 0)	-- for semforests
  if (control$mtry > 0) {
    
    # also need to remap col.max to original data!
    col.max.name <- names(mydata)[result$col.max]
    result$col.max <- which(names(fulldata)==col.max.name)
    
    # restore data
    mydata <- fulldata
    meta <- fullmeta
  }
  
  
  
  if  ((!is.null(cont.split)) && (!is.na(cont.split)) && (cont.split)) {
    if (control$report.level > 10) {
      report("Stop splitting based on stopping rule.", 1)
    }
    
    # store the split name (covariate name and split value) RHS is yes branch
    if(result$type.max==1) {
      # unordered factor collating and splitting
      lvl <- (control$method == "fair")
      result1 <- recodeAllSubsets(mydata[,result$col.max],colnames(mydata)[result$col.max],
                                  growbool=T, use.levels=lvl)
      
      
      test2 <- rep(NA, nrow(mydata))
      if(!is.na(result1$num_sets) & !is.null(result1$num_sets)){
        for(j in 1:result1$num_sets) {
          test1 <- rep(NA, nrow(mydata))
          for(i in 1:nrow(mydata)) {
            if(isTRUE(result1$columns[i,j])) {test1[i] <- 1}
            else if(!is.na(result1$columns[i,j])){test1[i] <- 0}
            else{test1[i]<-NA}
          }
          test1 <- as.factor(test1)
          test2 <- data.frame(test2, test1)
        }
      }
      test2 <- test2[,-1]
      
      
      named <- colnames(result1$columns)[result$split.max]
      node$caption <- paste(colnames(result1$columns)[result$split.max])
      node$rule = list(variable=result$col.max, relation="%in%", value=c(result1$values), name = result$name.max)
      
      if(result1$num_sets==1) {
        sub1 <- subset (mydata, as.numeric(test2) == 2)
        sub2 <- subset (mydata, as.numeric(test2) == 1)
      }
      else {
        sub1 <- subset (mydata, as.numeric(test2[[result$split.max]]) == 2)  
        sub2 <- subset (mydata, as.numeric(test2[[result$split.max]]) == 1)
      }
      
    }
    else if (result$type.max==2){
      # ordered factor splitting of data
      node$caption <- paste(result$name.max,">=", result$split.max,sep=" ")
      node$rule = list(variable=result$col.max, relation=">=", value=c(result$split.max), name = result$name.max)
      sub1 <- subset( mydata, as.numeric(as.character(mydata[, (result$col.max)])) >result$split.max)
      sub2 <- subset( mydata, as.numeric(as.character(mydata[, (result$col.max)]))<=result$split.max)
    }
    else {
      # continuous variables splitting
      node$caption <- paste(result$name.max,">=", result$split.max,sep=" ")
      node$rule = list(variable=result$col.max, relation=">=", value=c(result$split.max), name = result$name.max)
      sub1 <- subset( mydata, as.numeric(mydata[, (result$col.max)]) >result$split.max)
      sub2 <- subset( mydata, as.numeric(mydata[, (result$col.max)])<=result$split.max)
    }
    
    ##########################################################
    ## NEW CODE TO INCLUDE CASES MISSING ON SPLITTING VARIABLE
    class(node) <- "semtree"
    if(control$use.all& (nrow(mydata)>(nrow(sub1)+nrow(sub2)))){
      if(control$verbose){message("Missing on splitting variable: ",result$name.max)}
      completeSplits <- calculateBestSubsets(model, mydata, sub1, sub2, result)
      sub1 <- completeSplits$sub1
      sub2 <- completeSplits$sub2
    }
    ##########################################################
    
    # result1 - RHS; result2 - LHS
    result2 <- growTree( model, sub2, control, invariance, meta, edgelabel=0, depth=depth+1, constraints)
    result1 <- growTree( model, sub1, control, invariance, meta, edgelabel=1, depth=depth+1, constraints)
    
    node$left_child <- result2
    node$right_child <- result1
    
    return(node);
    
  } else {
    # if cont.split is F or NA or NULL then return node without further splitting
    return(node);
  }  
}	
