\name{read.msa}
\alias{read.msa}
\title{Reading an MSA Object}
\usage{read.msa(filename, format=c(guess.format.msa(filename), "FASTA")[1],
    alphabet=NULL, features=NULL, do.4d=FALSE, ordered=ifelse(do.4d ||
    !is.null(features), FALSE, TRUE), tuple.size=(if (do.4d) 3 else
    NULL), do.cats=NULL, refseq=NULL, offset=0, seqnames=NULL,
    discard.seqnames=NULL, pointer.only=FALSE)}
\description{Reads an MSA from a file.}
\note{If the input is in "MAF" format and features is specified, the
resulting alignment will be stripped of gaps in the reference (1st)
sequence.}
\note{If the features argument is an object stored in C, its values will
be changed by this function!}
\value{an MSA object.}
\keyword{msa}
\keyword{FASTA}
\keyword{MAF}
\keyword{PHYLIP}
\keyword{SS}
\seealso{\code{\link{msa}}, \code{\link{read.feat}}}
\author{Melissa J. Hubisz and Adam Siepel}
\arguments{\item{filename}{The name of the input file containing an alignment.}
\item{format}{input file format: one of "FASTA", "MAF", "SS", "PHYLIP",
"MPM", must be correctly specified.}
\item{alphabet}{the alphabet of non-missing-data chraracters in the
alignment.  Determined automatically from the alignment if not given.}
\item{features}{An object of type \code{feat}.  If provided, the return
value will only
contain portions of the alignment which fall within a feature.
The alignment will not be ordered.
The loaded regions can be further constrained with the do.4d or
do.cats options.  Note that if this object is passed as a pointer to a
structure stored in C, the values will be altered by this function!}
\item{do.4d}{Logical.  If \code{TRUE}, the return value will contain only
the columns corresponding to four-fold degenerate sties.  Requires
features to be specified.}
\item{ordered}{Logical.  If \code{FALSE}, the MSA object may not retain
the original column order.}
\item{tuple.size}{Integer.  If given, and if pointer.only is \code{TRUE},
MSA will be stored in sufficient statistics format, where each tuple
contains tuple.size consecutive columns of the alignment.}
\item{do.cats}{Character vector.  If given, and if features is specified,
then only the types of features named here will be represented in the
returned alignment.}
\item{refseq}{Character string specifying a FASTA format file with a
reference sequence.  If given, the reference sequence will be
"filled in" whereever missing from the alignment.}
\item{offset}{An integer giving offset of reference sequence from
beginning of chromosome.  Not used for MAF format.}
\item{seqnames}{A character vector.  If provided, discard any sequence
in the msa that is not named here.  This is only implemented efficiently
for MAF input files, but in this case, the reference sequence must be
named.}
\item{discard.seqnames}{A character vector.  If provided, discard
sequenced named here.  This is only implemented efficiently for MAF
input files, but in this case, the reference sequenced must NOT be
discarded.}
\item{pointer.only}{If \code{TRUE}, MSA will be stored by reference as
an external pointer to an object created by C code, rather than
directly in R memory.  This improves performance and may be necessary
for large alignments, but reduces functionality.  See
\code{\link{msa}} for more details on MSA object storage options.}}
\examples{
exampleArchive <- system.file("extdata", "examples.zip", package="rphast")
files <- c("ENr334.maf", "ENr334.fa", "gencode.ENr334.gff")
unzip(exampleArchive, files)

# Read a fasta file, ENr334.fa
# this file represents a 4-way alignment of the encode region
# ENr334 starting from hg18 chr6 position 41405894
idx.offset <- 41405894
m1 <- read.msa("ENr334.fa", offset=idx.offset)
m1

# Now read in only a subset represented in a feature file
f <- read.feat("gencode.ENr334.gff")
f$seqname <- "hg18"  # need to tweak source name to match name in alignment
m1 <- read.msa("ENr334.fa", features=f, offset=idx.offset)

# Can also subset on certain features
do.cats <- c("CDS", "5'flank", "3'flank")
m1 <- read.msa("ENr334.fa", features=f, offset=idx.offset,
               do.cats=do.cats)

# Can read MAFs similarly, but don't need offset because
# MAF file is annotated with coordinates
m2 <- read.msa("ENr334.maf", features=f, do.cats=do.cats)
# Also, note that when features is given and the file is
# in MAF format, the first sequence is automatically
# stripped of gaps
ncol.msa(m1)
ncol.msa(m2)
ncol.msa(m1, "hg18")

unlink(files) # clean up
}
