% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ddml_plm.R
\name{ddml_plm}
\alias{ddml_plm}
\title{Estimator for the Partially Linear Model.}
\usage{
ddml_plm(
  y,
  D,
  X,
  learners,
  learners_DX = learners,
  sample_folds = 2,
  ensemble_type = "nnls",
  shortstack = FALSE,
  cv_folds = 5,
  custom_ensemble_weights = NULL,
  custom_ensemble_weights_DX = custom_ensemble_weights,
  subsamples = NULL,
  cv_subsamples_list = NULL,
  silent = FALSE
)
}
\arguments{
\item{y}{The outcome variable.}

\item{D}{A matrix of endogenous variables.}

\item{X}{A (sparse) matrix of control variables.}

\item{learners}{May take one of two forms, depending on whether a single
learner or stacking with multiple learners is used for estimation of the
conditional expectation functions.
If a single learner is used, \code{learners} is a list with two named
elements:
\itemize{
\item{\code{what} The base learner function. The function must be
such that it predicts a named input \code{y} using a named input
\code{X}.}
\item{\code{args} Optional arguments to be passed to \code{what}.}
}
If stacking with multiple learners is used, \code{learners} is a list of
lists, each containing four named elements:
\itemize{
\item{\code{fun} The base learner function. The function must be
such that it predicts a named input \code{y} using a named input
\code{X}.}
\item{\code{args} Optional arguments to be passed to \code{fun}.}
\item{\code{assign_X} An optional vector of column indices
corresponding to control variables in \code{X} that are passed to
the base learner.}
}
Omission of the \code{args} element results in default arguments being
used in \code{fun}. Omission of \code{assign_X} results in inclusion of
all variables in \code{X}.}

\item{learners_DX}{Optional argument to allow for different estimators of
\eqn{E[D|X]}. Setup is identical to \code{learners}.}

\item{sample_folds}{Number of cross-fitting folds.}

\item{ensemble_type}{Ensemble method to combine base learners into final
estimate of the conditional expectation functions. Possible values are:
\itemize{
\item{\code{"nnls"} Non-negative least squares.}
\item{\code{"nnls1"} Non-negative least squares with the constraint
that all weights sum to one.}
\item{\code{"singlebest"} Select base learner with minimum MSPE.}
\item{\code{"ols"} Ordinary least squares.}
\item{\code{"average"} Simple average over base learners.}
}
Multiple ensemble types may be passed as a vector of strings.}

\item{shortstack}{Boolean to use short-stacking.}

\item{cv_folds}{Number of folds used for cross-validation in ensemble
construction.}

\item{custom_ensemble_weights}{A numerical matrix with user-specified
ensemble weights. Each column corresponds to a custom ensemble
specification, each row corresponds to a base learner in \code{learners}
(in chronological order). Optional column names are used to name the
estimation results corresponding the custom ensemble specification.}

\item{custom_ensemble_weights_DX}{Optional argument to allow for different
custom ensemble weights for \code{learners_DX}. Setup is identical to
\code{custom_ensemble_weights}. Note: \code{custom_ensemble_weights} and
\code{custom_ensemble_weights_DX} must have the same number of columns.}

\item{subsamples}{List of vectors with sample indices for cross-fitting.}

\item{cv_subsamples_list}{List of lists, each corresponding to a subsample
containing vectors with subsample indices for cross-validation.}

\item{silent}{Boolean to silence estimation updates.}
}
\value{
\code{ddml_plm} returns an object of S3 class
\code{ddml_plm}. An object of class \code{ddml_plm} is a list containing
the following components:
\describe{
\item{\code{coef}}{A vector with the \eqn{\theta_0} estimates.}
\item{\code{weights}}{A list of matrices, providing the weight
assigned to each base learner (in chronological order) by the
ensemble procedure.}
\item{\code{mspe}}{A list of matrices, providing the MSPE of each
base learner (in chronological order) computed by the
cross-validation step in the ensemble construction.}
\item{\code{ols_fit}}{Object of class \code{lm} from the second
stage regression of \eqn{Y - \hat{E}[Y|X]} on
\eqn{D - \hat{E}[D|X]}.}
\item{\code{learners},\code{learners_DX},\code{subsamples},
\code{cv_subsamples_list},\code{ensemble_type}}{Pass-through of
selected user-provided arguments. See above.}
}
}
\description{
Estimator for the partially linear model.
}
\details{
\code{ddml_plm} provides a double/debiased machine learning
estimator for the parameter of interest \eqn{\theta_0} in the partially
linear model given by

\eqn{Y = \theta_0D + g_0(X) + U,}

where \eqn{(Y, D, X, U)} is a random vector such that
\eqn{E[Cov(U, D\vert X)] = 0} and \eqn{E[Var(D\vert X)] \neq 0}, and
\eqn{g_0} is an unknown nuisance function.
}
\examples{
# Construct variables from the included Angrist & Evans (1998) data
y = AE98[, "worked"]
D = AE98[, "morekids"]
X = AE98[, c("age","agefst","black","hisp","othrace","educ")]

# Estimate the partially linear model using a single base learner, ridge.
plm_fit <- ddml_plm(y, D, X,
                    learners = list(what = mdl_glmnet,
                                    args = list(alpha = 0)),
                    sample_folds = 2,
                    silent = TRUE)
summary(plm_fit)

# Estimate the partially linear model using short-stacking with base learners
#     ols, lasso, and ridge. We can also use custom_ensemble_weights
#     to estimate the ATE using every individual base learner.
weights_everylearner <- diag(1, 3)
colnames(weights_everylearner) <- c("mdl:ols", "mdl:lasso", "mdl:ridge")
plm_fit <- ddml_plm(y, D, X,
                    learners = list(list(fun = ols),
                                    list(fun = mdl_glmnet),
                                    list(fun = mdl_glmnet,
                                         args = list(alpha = 0))),
                    ensemble_type = 'nnls',
                    custom_ensemble_weights = weights_everylearner,
                    shortstack = TRUE,
                    sample_folds = 2,
                    silent = TRUE)
summary(plm_fit)
}
\references{
Ahrens A, Hansen C B, Schaffer M E, Wiemann T (2023). "ddml: Double/debiased
machine learning in Stata." \url{https://arxiv.org/abs/2301.09397}

Chernozhukov V, Chetverikov D, Demirer M, Duflo E, Hansen C B, Newey W,
Robins J (2018). "Double/debiased machine learning for treatment and
structural parameters." The Econometrics Journal, 21(1), C1-C68.

Wolpert D H (1992). "Stacked generalization." Neural Networks, 5(2), 241-259.
}
\seealso{
\code{\link[=summary.ddml_plm]{summary.ddml_plm()}}

Other ddml: 
\code{\link{ddml_ate}()},
\code{\link{ddml_fpliv}()},
\code{\link{ddml_late}()},
\code{\link{ddml_pliv}()}
}
\concept{ddml}
