\name{estimate.rds}
\alias{estimate.rds}
\alias{generate.rds.control}
\alias{var.theta}
\alias{estimate.rds.two.stage}

\title{ Degree distribution estimation in respondent driven samples }
\usage{
  	estimate.rds(sampled.degree.vector, Sij, method="BFGS", initial.values, arc=FALSE, control=generate.rds.control(), all.solutions=FALSE)
     
    var.theta(sampled.degree.vector, Sij, Njs, theta, beta)
    
    generate.rds.control(maxit=2000)
    
    estimate.rds.two.stage(sampled.degree.vector, Sij, method="BFGS", initial.values, arc=FALSE, control=generate.rds.control(), all.solutions=FALSE)

}


\arguments{
  \item{sampled.degree.vector}{An integer vector of the degree of each individual sampled (including zeroes).}

  \item{Sij}{An integer matrix of the counts of individuals sampled with rank i up to sampling period j. Row names are required and represent degrees.}
  
  \item{method}{Optimization method passed to \code{\link{optim}}.}
  
  \item{initial.values}{List of initialization values. See \emph{details} and \emph{examples}.}
  
   \item{arc}{Deprecated. }
   
   \item{control}{List of control parameters. Generated by \code{generate.rds.control}. }
  
  \item{theta}{ Numeric value of \eqn{\theta}.  }
  
  \item{Njs}{ Numeric vector of fhe frequecies of degree \eqn{j}. }
  
  \item{beta}{ Numeric value of \eqn{\beta} }
  
  \item{maxit}{Used by \code{generate.rds.control} to control the behavious of \code{\link{optim}}.}
  
  \item{all.solutions}{Should all local maxima be returned or just one.} 

}


\description{
    ML estimation of population size and degree distribution in respondent driven samples. 
}


\value{
\code{estimate.rds} returns a list with the estimates for each starting values. 
Each components includes:
	\item{beta}{See in \emph{details}}
	\item{theta}{See in \emph{details}}
	\item{initial.values}{Values used to initiate the optimization.}
	\item{Nj}{See in \emph{details}}
	\item{iterations}{The number of \code{\link{optim}} iterations until convergence.}
	\item{likelihood.optimum}{The (log) likelhood at convergence.}
	\item{call}{The function call.}	
  
\code{var.theta} returns an estimate of the asymptotic variance of \eqn{\theta}.

\code{estimate.rds.two.stage}  will not return \eqn{\theta} and \eqn{\beta} but rather a vector of \eqn{\beta_j} for all observed degrees.  

\code{generate.rds.control} returns a list of control values needed by \code{estimate.rds}. At present this includes only the number of \code{\link{optim}} iterations.
}


\details{
\code{estimate.rds} performs maximum likelihood estimation of the population
  size, degree distribution and "coefficient of discoverability" as described in the reference.   
  The method assumes the probability of sampling an individual with degree i at time j is given by: 
  \deqn{\beta I[j] (N[i]-S[i,j]) i^\theta }
  where   \eqn{\beta} is the base rate of sampling, \eqn{\theta} is the "coefficient of discoverability", \eqn{I[j]} it the number of individuals sampled up to time \eqn{j}, 
   \eqn{N[i]} is the number of individuals with degree \eqn{i} in the population, and \eqn{S[i,j]} is the number of individuals with degree \eqn{i} sampled up to time \eqn{j}.
   The function \code{estimate.rds} has to be given some initialization values. By default, it is given a grid of \eqn{\theta} values, but it can also be given a list including other parameter values (as in the example).
   \code{estimate.rds.two.stage} works like \code{estimate.rds} but does not assume \eqn{\beta_j=\beta * j^ \theta }, so tries to estimate \eqn{\beta_j} directly. 
   It does so in two stages: in the first it calls \code{estimate.rds}, and once the optimal values have been found, it relaxes the \eqn{\beta_j=\beta * j^ \theta } assumptions and optimizes for \eqn{\beta_j)}
   }
   
   
\examples{
\dontrun{
#### Commented due to long execution time.####

#### Estimating assuming beta_j=beta * j^theta:
data(simulation)
temp.data<- unlist(data3[1,7000:7500])
(rds.result<- estimate.rds(sampled.degree.vector=temp.data , Sij=make.Sij(temp.data), initial.values=list(theta=c(1,2))))
plot(rds.result$Nj, type='h', xlab='Degree', ylab=expression(N[j]), main='Estimated Degree Distribution')	
var.theta(temp.data, Njs=rds.result$Nj, theta=rds.result$theta, beta=rds.result$beta)

#### Example of the two-stage estimation of beta_j ####
estimation3<- estimate.rds.two.stage(sampled.degree.vector=temp.data, Sij = make.Sij(temp.data), 
											initial.values=list(theta=c(0.5,1,1.5)),
											control=generate.rds.control(maxit=5))
plot(estimation3$beta_js~estimation3$observed_js)
lines(lowess(estimation3$beta_js~estimation3$observed_js), col='red')


### Example using a full set of initialization values:
Njs<- c(100,100,500,1000); names(Njs)<- c("1","50","100","1000"); Njs<- as.table(Njs)
theta<- 1
beta<- 1e-10
tail(degree.sampled.vec<- generate.sample(theta, Njs, beta, sample.length=1e5))
str(rds.result<- estimate.rds(degree.sampled.vec, Sij = make.Sij(degree.sampled.vec), 				
				initial.values = list(theta=c(theta), beta=c(beta), Njs=list(Njs))))		
	}
}


\author{
  Jonathan D. Rosenblatt \email{john.ros@gmail.com}
}


\references{
	Respondent Driven Sampling as an Epidemic Process
	Berchenko Y., Rosenblatt J.D., White R.G.,Frost S.D.W. (Submitted) 
}


