% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/random_sample_imputation.R
\name{impute_with_random_samples}
\alias{impute_with_random_samples}
\title{Random Sample Imputation function}
\usage{
impute_with_random_samples(sc, sdf, column = NULL)
}
\arguments{
\item{sc}{A Spark connection}

\item{sdf}{A Spark DataFrame}

\item{column}{The column(s) to impute. If NULL, all columns will be imputed}
}
\value{
The Spark DataFrame with missing values imputed
}
\description{
This function imputes missing values in a Spark DataFrame using random samples from the observed values.
}
\examples{
# This example is not executed since it needs additional software (Apache Spark)
\dontrun{
# Create a dataset with various types of missing values
library(sparklyr)
library(dplyr)

# Connect to Spark
# Assumes that you have already installed Spark with sparklyr::spark_install()
sc <- spark_connect(master = "local")

# Create sample data with missing values in different columns
sample_data <- data.frame(
  age = c(25, NA, 35, 28, NA, 45),
  salary = c(50000, 60000, NA, 55000, 80000, NA),
  department = c("Sales", NA, "IT", "Sales", "HR", "IT"),
  rating = c(4.2, 3.8, NA, 4.5, 3.9, NA)
)

# Copy to Spark DataFrame
sdf <- copy_to(sc, sample_data, "sample_data")

# Impute different columns using different methods
imputed_sdf <- impute_with_random_samples(
  sc = sc,
  sdf = sdf,
  column = c("age", "salary", "department", "rating")
)

# View results
imputed_sdf \%>\% collect()

# Clean up
spark_disconnect(sc)
}
}
