% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data-sim.R
\name{sim_dgp_ewald}
\alias{sim_dgp_ewald}
\title{Simulate data as in Ewald et al. (2024)}
\usage{
sim_dgp_ewald(n = 500)
}
\arguments{
\item{n}{(\code{integer(1)}) Number of samples to create.}
}
\value{
A regression task (\link[mlr3:TaskRegr]{mlr3::TaskRegr}) with \link[data.table:data.table]{data.table} backend.
}
\description{
Reproduces the data generating process from Ewald et al. (2024) for benchmarking
feature importance methods. Includes correlated features and interaction effects.
}
\details{
\strong{Mathematical Model:}
\deqn{X_1, X_3, X_5 \sim \text{Uniform}(0,1)}
\deqn{X_2 = X_1 + \varepsilon_2, \quad \varepsilon_2 \sim N(0, \sqrt{0.001})}
\deqn{X_4 = X_3 + \varepsilon_4, \quad \varepsilon_4 \sim N(0, \sqrt{0.1})}
\deqn{Y = X_4 + X_5 + X_4 \cdot X_5 + \varepsilon, \quad \varepsilon \sim N(0, \sqrt{0.1})}

\strong{Feature Properties:}
\itemize{
\item X1, X3, X5: Independent uniform(0,1) distributions
\item X2: Nearly perfect copy of X1 (correlation approximately 0.99)
\item X4: Noisy copy of X3 (correlation approximately 0.94)
\item Y depends on X4, X5, and their interaction
}
}
\examples{
sim_dgp_ewald(100)

}
\references{
Ewald F, Bothmann L, Wright M, Bischl B, Casalicchio G, König G (2024).
\dQuote{A Guide to Feature Importance Methods for Scientific Inference.}
In Longo L, Lapuschkin S, Seifert C (eds.), \emph{Explainable Artificial Intelligence}, 440--464.
ISBN 978-3-031-63797-1, \doi{10.1007/978-3-031-63797-1_22}.
}
\seealso{
Other simulation: 
\code{\link{sim_dgp_scenarios}}
}
\concept{simulation}
