% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/scan_negbin.R
\name{scan_negbin}
\alias{scan_negbin}
\title{Calculate the negative binomial scan statistic.}
\usage{
scan_negbin(table, zones, n_mcsim = 0, version = "ordinary")
}
\arguments{
\item{table}{A \code{data.table} with columns \code{location, duration, mu,
theta, count}. The \code{location} column should consist of integers that 
are unique to each location. The \code{duration} column should also 
consist of integers, starting at 1 for the most recent time period and 
increasing in reverse chronological order. 

A negative binomial distribution parametrized by \eqn{\mu} and 
\eqn{\theta} (columns \code{mu} and \code{theta} respectively) has 
expected value \eqn{\mu} and variance \eqn{\mu+\mu^2/\theta}. The 
parameter \eqn{\theta} is referred to as the \code{size} in 
\code{\link[stats]{NegBinomial}}, and \code{theta} in 
\code{\link[MASS]{negative.binomial}}.}

\item{zones}{A \code{set} of zones, each zone itself a 
set containing one or more locations of those found in \code{table}.}

\item{n_mcsim}{A non-negative integer; the number of replicate scan 
statistics to generate in order to calculate a p-value.}

\item{version}{Which version of the negative binomial score scan statistic to 
calculate: either "ordinary" (default) or "increasing". See details.}
}
\value{
An object of class \code{scanstatistics}. It has the following 
   fields:
   \describe{
    \item{observed}{A \code{data.table} containing the value of the 
                    statistic calculated for each zone-duration combination,
                    for the observed data. The scan statistic is the maximum
                    value of these calculated statistics.}
    \item{replicated}{A numeric vector of length \code{n_mcsim} containing 
                      the values of the scanstatistics calculated by Monte
                      Carlo simulation.}
    \item{mlc}{A \code{data.table} containing the zone, duration, and 
               scanstatistic.}
    \item{pvalue}{The p-value calculated from Monte Carlo replications.}
    \item{distribution}{The assumed distribution of the data; "negative 
                        binomial" in this case.}
    \item{type}{The type of scan statistic; "Expectation-based" in this 
                case.}
    \item{zones}{The set of zones that was passed to the function as input.}
    \item{n_locations}{The number of locations in the data.}
    \item{n_zones}{The number of zones.}
    \item{max_duration}{The maximum outbreak/event/anomaly duration 
                        considered.}
   }
}
\description{
Calculate the expectation-based negative binomial scan statistic by supplying 
a \code{data.table} of observed counts and pre-computed distribution 
parameters for each location and time. A p-value for the observed scan
statistic can be obtained by Monte Carlo simulation.
}
\details{
For the expectation-based negative binomial scan statistic (Tango
   et al., 2011), the null hypothesis of no anomaly holds that the count 
   observed at each location \eqn{i} and duration \eqn{t} (the number of time 
   periods before present) has a negative binomial distribution with expected 
   value \eqn{\mu_{it}} and dispersion parameter \eqn{\theta_{it}}:
   \deqn{
     H_0 : Y_{it} \sim \textrm{NegBin}(\mu_{it}, \theta_{it}).
   }
   This holds for all locations \eqn{i = 1, \ldots, m} and all durations 
   \eqn{t = 1, \ldots,T}, with \eqn{T} being the maximum duration considered.
   The alternative hypothesis depends on the version used: if \code{version
   == "ordinary"}, then the alternative hypothesis states that there is a 
   space-time window \eqn{W} consisting of a spatial zone \eqn{Z \subset \{1, 
   \ldots, m\}} and a time window \eqn{D \subseteq \{1, \ldots, T\}} such 
   that the counts in this window have their expected values inflated by a 
   factor \eqn{q_W > 1} compared to the null hypothesis:
   \deqn{
   H_1 : Y_{it} \sim \textrm{NegBin}(q_W \mu_{it}, \theta_{it}), 
         ~~(i,t) \in W.
   }
   If \code{version == "increasing"}, \eqn{q_W} is instead increasing over
   time (decreasing with \code{duration}).
   For locations and durations outside of this window, counts are assumed to
   be distributed as under the null hypothesis. The sets \eqn{Z} considered 
   are those specified in the argument \code{zones}, while the maximum 
   duration \eqn{T} is taken as the maximum value in the column 
   \code{duration} of the input \code{table}. For each space-time window
   \eqn{W} considered, a score statistic is computed using the score function
   and Fisher information under the null hypothesis of no anomaly.
   The scan statistic is calculated as the maximum of these quantities over 
   all space-time windows. Point estimates of the parameters \eqn{\mu_{it}} 
   and \eqn{\theta_{it}} must be specified in the column \code{mu} and 
   \code{theta} of the argument \code{table} before this function is called.
}
\examples{
# Simple example
set.seed(1)
table <- scanstatistics:::create_table(list(location = 1:4, duration = 1:4),
                                        keys = c("location", "duration"))
table[, mu := 3 * location]
table[, theta := 2]
table[, count := rnbinom(.N, mu = mu, size = theta)]
table[location \%in\% c(1, 4) & duration < 3, 
      count :=  rnbinom(.N, mu = 2 * mu, size = theta)]
zones <- scanstatistics:::powerset_zones(4)
result1 <- scan_negbin(table, zones, 100, "ordinary")
result2 <- scan_negbin(table, zones, 100, "increasing")
}
\references{
Tango, T., Takahashi, K. & Kohriyama, K. (2011), \emph{A space-time scan 
   statistic for detecting emerging outbreaks}, Biometrics 67(1), 106–115.
}
\concept{
negative binomial negbin nbinom scanstatistic
}

