% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootstrap_MRF.R
\name{bootstrap_MRF}
\alias{bootstrap_MRF}
\title{Bootstrap observations to estimate MRF parameter coefficients}
\usage{
bootstrap_MRF(data, n_bootstraps, sample_seed, symmetrise, n_nodes, n_cores,
  n_covariates, family, sample_prop, spatial = FALSE, coords = NULL)
}
\arguments{
\item{data}{Dataframe. The input data where the \code{n_nodes}
left-most variables are variables that are to be represented by nodes in the graph.
Note that \code{NA}'s are allowed for covariates. If present, these missing values
will be imputed from the distribution \code{rnorm(mean = 0, sd = 1)}, which assumes that
all covariates are scaled and centred (i.e. by using the function
\code{\link[base]{scale}} or similar)}

\item{n_bootstraps}{Positive integer. Represents the total number of bootstrap samples
to test. Default is \code{100}.}

\item{sample_seed}{Numeric. Used as the seed value for generating bootstrap replicates, allowing
users to generate replicated datasets on different systems. Default is a random seed}

\item{symmetrise}{The method to use for symmetrising corresponding parameter estimates
(which are taken from separate regressions). Options are \code{min} (take the coefficient with the
smallest absolute value), \code{max} (take the coefficient with the largest absolute value)
or \code{mean} (take the mean of the two coefficients). Default is \code{mean}}

\item{n_nodes}{Positive integer. The index of the last column in \code{data}
which is represented by a node in the final graph. Columns with index
greater than \code{n_nodes} are taken as covariates. Default is the number of
columns in \code{data}, corresponding to no additional covariates}

\item{n_cores}{Integer. The number of cores to spread the job across using
\code{\link[parallel]{makePSOCKcluster}}. Default is 1 (no parallelisation)}

\item{n_covariates}{Positive integer. The number of covariates in \code{data},
before cross-multiplication. Default is \code{ncol(data) - n_nodes}}

\item{family}{The response type. Responses can be quantitative continuous (\code{family = "gaussian"}),
non-negative counts (\code{family = "poisson"}) or binomial 1s and 0s (\code{family = "binomial"})}

\item{sample_prop}{Positive probability value indicating the proportion of rows to sample from
\code{data} in each bootstrap iteration. Default is no subsampling (\code{sample_prop == 1})}

\item{spatial}{Logical. If \code{TRUE}, spatial MRF / CRF models are bootstrapped using
\code{\link{MRFcov_spatial}}. Note, GPS coordinates must be supplied as \code{coords} for spatial
models to be run.
These regression splines will be included in each node-wise regression as covariates.
This ensures that resulting node interaction parameters are estimated after accounting for
possible spatial autocorrelation. Note that interpretation of spatial autocorrelation is difficult,
and so it is recommended to compare predictive capacities spatial and non-spatial CRFs through
the \code{\link{predict_MRF}} function}

\item{coords}{A two-column \code{dataframe} (with \code{nrow(coords) == nrow(data)})
representing the spatial coordinates of each observation in \code{data}. Ideally, these
coordinates will represent Latitude and Longitude GPS points for each observation.}
}
\value{
A \code{list} containing:
\itemize{
  \item \code{direct_coef_means}: \code{dataframe} containing mean coefficient values taken from all
  bootstrapped models across the iterations
  \item \code{direct_coef_upper90} and \code{direct_coef_lower90}: \code{dataframe}s
  containing coefficient 95 percent and 5 percent quantiles taken from all
  bootstrapped models across the iterations
  \item \code{indirect_coef_mean}: \code{list} of symmetric matrices
  (one matrix for each covariate) containing mean effects of covariates
  on pairwise interactions
  \item \code{mean_key_coefs}: \code{list} of matrices of length \code{n_nodes}
  containing mean covariate coefficient values and their relative importances
  (using the formula \code{x^2 / sum (x^2)}
  taken from all bootstrapped models across iterations. Only coefficients
  with mean relative importances \code{>0.01} are returned. Note, relative importance are only
  useful if all covariates are on a similar scale.
  \item \code{mod_type}: A character stating the type of model that was fit
  (used in other functions)
  \item \code{mod_family}: A character stating the family of model that was fit
   (used in other functions)
   \item \code{poiss_sc_factors}: A vector of the square-root mean scaling factors
   used to standardise \code{poisson} variables (only returned if \code{family = "poisson"})
   }
}
\description{
This function runs \code{\link{MRFcov}} models multiple times to capture uncertainty
in parameter esimates. The dataset is shuffled and missing
values (if found) are imputed in each bootstrap iteration.
}
\details{
\code{MRFcov} models are fit via cross-validation using
\code{\link[glmnet]{cv.glmnet}}. For each model, the \code{data} is bootstrapped
by shuffling row observations and fitting models to a subset of observations,
using \code{\link[dplyr]{sample_n}},
to account for uncertainty in parameter estimates.
Parameter estimates from the set of bootstrapped models are summarised
to present means and confidence intervals (as 95 percent quantiles).
}
\examples{
\donttest{
data("Bird.parasites")

# Perform 100 bootstrap replicates in total
bootedCRF <- bootstrap_MRF(data = Bird.parasites,
                          n_nodes = 4,
                          family = 'binomial',
                          n_cores = 3)


# Using spatial coordinates for a spatial CRF
Latitude <- sample(seq(120, 140, length.out = 100), nrow(Bird.parasites), TRUE)
Longitude <- sample(seq(-19, -22, length.out = 100), nrow(Bird.parasites), TRUE)
coords <- data.frame(Latitude = Latitude, Longitude = Longitude)
bootedSpatial <- bootstrap_MRF(data = Bird.parasites, n_nodes = 4,
                             family = 'binomial',
                             spatial = TRUE,
                             coords = coords,
                             n_cores = 3)}
}
\seealso{
\code{\link{MRFcov}}, \code{\link{MRFcov_spatial}},
\code{\link[glmnet]{cv.glmnet}}
}
