% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MutationProfiling.R
\name{expectedMutations}
\alias{expectedMutations}
\title{Calculate expected mutation frequencies}
\usage{
expectedMutations(
  db,
  sequenceColumn = "sequence_alignment",
  germlineColumn = "germline_alignment",
  targetingModel = HH_S5F,
  regionDefinition = NULL,
  mutationDefinition = NULL,
  nproc = 1,
  cloneColumn = "clone_id",
  juncLengthColumn = "junction_length"
)
}
\arguments{
\item{db}{\code{data.frame} containing sequence data.}

\item{sequenceColumn}{\code{character} name of the column containing input 
sequences.}

\item{germlineColumn}{\code{character} name of the column containing 
the germline or reference sequence.}

\item{targetingModel}{\link{TargetingModel} object. Default is \link{HH_S5F}.}

\item{regionDefinition}{\link{RegionDefinition} object defining the regions
and boundaries of the Ig sequences. To use regions definitions,
sequences in \code{sequenceColumn} and \code{germlineColumn}
must be aligned, following the IMGT schema.}

\item{mutationDefinition}{\link{MutationDefinition} object defining replacement
and silent mutation criteria. If \code{NULL} then 
replacement and silent are determined by exact 
amino acid identity.}

\item{nproc}{\code{numeric} number of cores to distribute the operation
over. If the cluster has already been set the call function with 
\code{nproc} = 0 to not reset or reinitialize. Default is 
\code{nproc} = 1.}

\item{cloneColumn}{clone id column name in \code{db}}

\item{juncLengthColumn}{junction length column name in \code{db}}
}
\value{
A modified \code{db} \code{data.frame} with expected mutation frequencies 
          for each region defined in \code{regionDefinition}.
         
          The columns names are dynamically created based on the regions in  
          \code{regionDefinition}. For example, when using the \link{IMGT_V}
          definition, which defines positions for CDR and FWR, the following columns are
          added:  
          \itemize{
            \item  \code{mu_expected_cdr_r}:  number of replacement mutations in CDR1 and 
                                           CDR2 of the V-segment.
            \item  \code{mu_expected_cdr_s}:  number of silent mutations in CDR1 and CDR2 
                                           of the V-segment.
            \item  \code{mu_expected_fwr_r}:  number of replacement mutations in FWR1, 
                                           FWR2 and FWR3 of the V-segment.
            \item  \code{mu_expected_fwr_s}:  number of silent mutations in FWR1, FWR2 and
                                           FWR3 of the V-segment.
          }
}
\description{
\code{expectedMutations} calculates the expected mutation frequencies for each 
sequence in the input \code{data.frame}.
}
\details{
Only the part of the sequences defined in \code{regionDefinition} are analyzed. 
For example, when using the \link{IMGT_V} definition, mutations in
positions beyond 312 will be ignored.
}
\examples{
# Subset example data
data(ExampleDb, package="alakazam")
db <- subset(ExampleDb, c_call \%in\% c("IGHA", "IGHG") & sample_id == "+7d")
set.seed(112)
db <- dplyr::slice_sample(db, n=100)
# Calculate expected mutations over V region
db_exp <- expectedMutations(db,
                            sequenceColumn="sequence_alignment",
                            germlineColumn="germline_alignment_d_mask",
                            regionDefinition=IMGT_V,
                            nproc=1)

# Calculate hydropathy expected mutations over V region
db_exp <- expectedMutations(db,
                           sequenceColumn="sequence_alignment",
                           germlineColumn="germline_alignment_d_mask",
                           regionDefinition=IMGT_V,
                           mutationDefinition=HYDROPATHY_MUTATIONS,
                           nproc=1)

}
\seealso{
\link{calcExpectedMutations} is called by this function to calculate the expected 
mutation frequencies. See \link{observedMutations} for getting observed 
mutation counts. See \link{IMGT_SCHEMES} for a set of predefined 
\link{RegionDefinition} objects.
}
