% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bijectors.R
\name{tfb_batch_normalization}
\alias{tfb_batch_normalization}
\title{Computes\code{Y = g(X)} s.t. \code{X = g^-1(Y) = (Y - mean(Y)) / std(Y)}}
\usage{
tfb_batch_normalization(batchnorm_layer = NULL, training = TRUE,
  validate_args = FALSE, name = "batch_normalization")
}
\arguments{
\item{batchnorm_layer}{\code{tf$layers$BatchNormalization} layer object. If NULL, defaults to
\code{tf$layers$BatchNormalization(gamma_constraint=tf$nn$relu(x) + 1e-6)}.
This ensures positivity of the scale variable.}

\item{training}{If TRUE, updates running-average statistics during call to inverse().}

\item{validate_args}{Logical, default FALSE. Whether to validate input with asserts. If validate_args is
FALSE, and the inputs are invalid, correct behavior is not guaranteed.}

\item{name}{name prefixed to Ops created by this class.}
}
\value{
a bijector instance.
}
\description{
Applies Batch Normalization (Ioffe and Szegedy, 2015) to samples from a
data distribution. This can be used to stabilize training of normalizing
flows (Papamakarios et al., 2016; Dinh et al., 2017)
}
\details{
When training Deep Neural Networks (DNNs), it is common practice to
normalize or whiten features by shifting them to have zero mean and
scaling them to have unit variance.

The \code{inverse()} method of the BatchNormalization bijector, which is used in
the log-likelihood computation of data samples, implements the normalization
procedure (shift-and-scale) using the mean and standard deviation of the
current minibatch.

Conversely, the \code{forward()} method of the bijector de-normalizes samples (e.g.
\code{X*std(Y) + mean(Y)} with the running-average mean and standard deviation
computed at training-time. De-normalization is useful for sampling.

During training time, BatchNormalization.inverse and BatchNormalization.forward are not
guaranteed to be inverses of each other because \code{inverse(y)} uses statistics of the current minibatch,
while \code{forward(x)} uses running-average statistics accumulated from training.
In other words, \code{tfb_batch_normalization()$inverse(tfb_batch_normalization()$forward(...))} and
\code{tfb_batch_normalization()$forward(tfb_batch_normalization()$inverse(...))} will be identical when
training=FALSE but may be different when training=TRUE.
}
\section{References}{

\itemize{
\item \href{https://arxiv.org/abs/1502.03167}{Sergey Ioffe and Christian Szegedy. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. In International Conference on Machine Learning, 2015.}
\item \href{https://arxiv.org/abs/1605.08803}{Laurent Dinh, Jascha Sohl-Dickstein, and Samy Bengio. Density Estimation using Real NVP. In International Conference on Learning Representations, 2017.}
\item \href{https://arxiv.org/abs/1705.07057}{George Papamakarios, Theo Pavlakou, and Iain Murray. Masked Autoregressive Flow for Density Estimation. In Neural Information Processing Systems, 2017.}
}
}

\seealso{
For usage examples see \code{\link[=tfb_forward]{tfb_forward()}}, \code{\link[=tfb_inverse]{tfb_inverse()}}, \code{\link[=tfb_inverse_log_det_jacobian]{tfb_inverse_log_det_jacobian()}}.

Other bijectors: \code{\link{tfb_absolute_value}},
  \code{\link{tfb_affine_linear_operator}},
  \code{\link{tfb_affine_scalar}},
  \code{\link{tfb_affine}}, \code{\link{tfb_blockwise}},
  \code{\link{tfb_chain}},
  \code{\link{tfb_cholesky_outer_product}},
  \code{\link{tfb_cholesky_to_inv_cholesky}},
  \code{\link{tfb_correlation_cholesky}},
  \code{\link{tfb_cumsum}},
  \code{\link{tfb_discrete_cosine_transform}},
  \code{\link{tfb_expm1}}, \code{\link{tfb_exp}},
  \code{\link{tfb_ffjord}},
  \code{\link{tfb_fill_scale_tri_l}},
  \code{\link{tfb_fill_triangular}},
  \code{\link{tfb_gumbel_cdf}}, \code{\link{tfb_gumbel}},
  \code{\link{tfb_identity}}, \code{\link{tfb_inline}},
  \code{\link{tfb_invert}},
  \code{\link{tfb_iterated_sigmoid_centered}},
  \code{\link{tfb_kumaraswamy_cdf}},
  \code{\link{tfb_kumaraswamy}},
  \code{\link{tfb_masked_autoregressive_default_template}},
  \code{\link{tfb_masked_autoregressive_flow}},
  \code{\link{tfb_masked_dense}},
  \code{\link{tfb_matrix_inverse_tri_l}},
  \code{\link{tfb_matvec_lu}},
  \code{\link{tfb_normal_cdf}}, \code{\link{tfb_ordered}},
  \code{\link{tfb_pad}}, \code{\link{tfb_permute}},
  \code{\link{tfb_power_transform}},
  \code{\link{tfb_rational_quadratic_spline}},
  \code{\link{tfb_real_nvp_default_template}},
  \code{\link{tfb_real_nvp}}, \code{\link{tfb_reciprocal}},
  \code{\link{tfb_reshape}},
  \code{\link{tfb_scale_matvec_diag}},
  \code{\link{tfb_scale_matvec_linear_operator}},
  \code{\link{tfb_scale_matvec_lu}},
  \code{\link{tfb_scale_matvec_tri_l}},
  \code{\link{tfb_scale_tri_l}}, \code{\link{tfb_scale}},
  \code{\link{tfb_shift}}, \code{\link{tfb_sigmoid}},
  \code{\link{tfb_sinh_arcsinh}},
  \code{\link{tfb_softmax_centered}},
  \code{\link{tfb_softplus}}, \code{\link{tfb_softsign}},
  \code{\link{tfb_square}}, \code{\link{tfb_tanh}},
  \code{\link{tfb_transform_diagonal}},
  \code{\link{tfb_transpose}},
  \code{\link{tfb_weibull_cdf}}, \code{\link{tfb_weibull}}
}
\concept{bijectors}
