% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/edf.R
\name{edf}
\alias{edf}
\title{Grouped empirical cumulative distribution function applied to data}
\usage{
edf(x, g = NULL, wt = NULL)
}
\arguments{
\item{x}{Numeric vector.}

\item{g}{Numeric vector of group IDs.}

\item{wt}{Frequency weights.}
}
\value{
A numeric vector the same length as \code{x}.
}
\description{
Like \code{dplyr::cume_dist(x)} and \code{ecdf(x)(x)}
but with added grouping and weighting functionality.\cr
You can calculate the empirical distribution of x using
aggregated data by supplying frequency weights.
No expansion occurs which makes this function extremely efficient
for this type of data, of which plotting is a common application.
}
\examples{
library(timeplyr)
library(dplyr)
library(ggplot2)
\dontshow{
.n_dt_threads <- data.table::getDTthreads()
.n_collapse_threads <- collapse::get_collapse()$nthreads
data.table::setDTthreads(threads = 2L)
collapse::set_collapse(nthreads = 1L)
}
set.seed(9123812)
x <- sample(seq(-10, 10, 0.5), size = 10^2, replace = TRUE)
plot(sort(edf(x)))
all.equal(edf(x), ecdf(x)(x))
all.equal(edf(x), cume_dist(x))

# Manual ECDF plot using only aggregate data
y <- rnorm(100, 10)
grid <- time_span(y, time_by = 0.1, time_floor = TRUE)
counts <- time_countv(y, time_by = 0.1, time_floor = TRUE)
edf <- edf(grid, wt = counts)
# Trivial here as this is the same
all.equal(unname(cumsum(counts)/sum(counts)), edf)

# Full ecdf
tibble(x) \%>\%
  ggplot(aes(x = y)) +
  stat_ecdf()
# Approximation using aggregate only data
tibble(grid, edf) \%>\%
  ggplot(aes(x = grid, y = edf)) +
  geom_step()

# Grouped example
g <- sample(letters[1:3], size = 10^2, replace = TRUE)

edf1 <- tibble(x, g) \%>\%
  mutate(edf = cume_dist(x),
         .by = g) \%>\%
  pull(edf)
edf2 <- edf(x, g = g)
all.equal(edf1, edf2)
\dontshow{
data.table::setDTthreads(threads = .n_dt_threads)
collapse::set_collapse(nthreads = .n_collapse_threads)
}
}
