% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/exp_stats.R
\name{exp_stats}
\alias{exp_stats}
\alias{summary.exp_df}
\title{Summarize experience study records}
\usage{
exp_stats(
  .data,
  target_status = attr(.data, "target_status"),
  expected,
  col_exposure = "exposure",
  col_status = "status",
  wt = NULL,
  credibility = FALSE,
  conf_level = 0.95,
  cred_r = 0.05,
  conf_int = FALSE,
  control_vars,
  control_distinct_max = 25L
)

\method{summary}{exp_df}(object, ...)
}
\arguments{
\item{.data}{A data frame with exposure-level records, ideally of type
\code{exposed_df}}

\item{target_status}{A character vector of target status values}

\item{expected}{A character vector containing column names in \code{.data}
with expected values}

\item{col_exposure}{Name of the column in \code{.data} containing exposures}

\item{col_status}{Name of the column in \code{.data} containing the policy status}

\item{wt}{Optional. Length 1 character vector. Name of the column in
\code{.data} containing weights to use in the calculation of claims,
exposures, partial credibility, and confidence intervals.}

\item{credibility}{If \code{TRUE}, the output will include partial credibility
weights and credibility-weighted termination rates.}

\item{conf_level}{Confidence level used for the Limited Fluctuation
credibility method and confidence intervals}

\item{cred_r}{Error tolerance under the Limited Fluctuation credibility
method}

\item{conf_int}{If \code{TRUE}, the output will include confidence intervals
around the observed termination rates and any actual-to-expected ratios.}

\item{control_vars}{\code{".none"} or a character vector containing column names
in \code{.data} to use as control variables}

\item{control_distinct_max}{Maximum number of unique values allowed for
control variables}

\item{object}{An \code{exp_df} object}

\item{...}{Groups to retain after \code{summary()} is called}
}
\value{
A tibble with class \code{exp_df}, \code{tbl_df}, \code{tbl},
and \code{data.frame}. The results include columns for any grouping variables,
claims, exposures, and observed termination rates (\code{q_obs}).
\itemize{
\item If any values are passed to \code{expected} or \code{control_vars}, additional
columns are added for expected termination rates and actual-to-expected
(A/E) ratios. A/E ratios are prefixed by \code{ae_}.
\item If \code{credibility} is set to \code{TRUE}, additional columns are added
for partial credibility and credibility-weighted termination rates
(assuming values are passed to \code{expected}). Credibility-weighted termination
rates are prefixed by \code{adj_}.
\item If \code{conf_int} is set to \code{TRUE}, additional columns are added for lower and
upper confidence interval limits around the observed termination rates and
any actual-to-expected ratios. Additionally, if \code{credibility} is \code{TRUE} and
expected values are passed to \code{expected}, the output will contain confidence
intervals around credibility-weighted termination rates. Confidence interval
columns include the name of the original output column suffixed by either
\verb{_lower} or \verb{_upper}.
\item If a value is passed to \code{wt}, additional columns are created containing
the the sum of weights (\code{.weight}), the sum of squared weights
(\code{.weight_qs}), and the number of records (\code{.weight_n}).
}
}
\description{
Create a summary data frame of termination experience for a
given target status.
}
\details{
If \code{.data} is grouped, the resulting data frame will contain
one row per group.

If \code{target_status} isn't provided, \code{\link[=exp_stats]{exp_stats()}} will use the same
target status from \code{.data} if it has the class \code{exposed_df}.
Otherwise, all status values except the first level will be assumed.
This will produce a warning message.
}
\section{Expected values}{
The \code{expected} argument is optional. If provided, this argument must
be a character vector with values corresponding to column names in \code{.data}
containing expected experience. More than one expected basis can be provided.
}

\section{Control variables}{
The \code{control_vars} argument is optional. If provided, this argument must
be \code{".none"} (more on this below) or a character vector with values
corresponding to column names in \code{.data}. Control variables are used to
estimate the impact of any grouping variables on observed experience
\emph{after accounting for} the impact of control variables.

Mechanically, when values are passed to \code{control_vars}, a separate call
is made to \code{\link[=exp_stats]{exp_stats()}} using the control variables as grouping variables.
This is used to derive a new expected values basis called \code{control}, which is
both added to \code{.data} and appended to the \code{expected} argument. In the final
output, a column called \code{ae_control} shows the relative impact of any
grouping variables after accounting for the control variables.

\strong{About \code{".none"}}: If \code{".none"} is passed to \code{control_vars}, a single
aggregate termination rate is calculated for the entire data set and used to
compute \code{control} and \code{ae_control}.

The \code{control_distinct_max} argument places an upper limit on the number of
unique values that a control variable is allowed to have. This limit exists
to prevent an excessive number of groups on continuous or high-cardinality
features.

It should be noted that usage of control variables is a rough approximation
and not a substitute for rigorous statistical models. The impact of control
variables is calculated in isolation and does consider other features or
possible confounding variables. As such, control variables are most useful
for exploratory data analysis.
}

\section{Credibility}{
If \code{credibility} is set to \code{TRUE}, the output will contain a
\code{credibility} column equal to the partial credibility estimate under
the Limited Fluctuation credibility method (also known as Classical
Credibility) assuming a binomial distribution of claims.
}

\section{Confidence intervals}{
If \code{conf_int} is set to \code{TRUE}, the output will contain lower and upper
confidence interval limits for the observed termination rate and any
actual-to-expected ratios. The confidence level is dictated
by \code{conf_level}. If no weighting variable is passed to \code{wt}, confidence
intervals will be constructed assuming a binomial distribution of claims.
Otherwise, confidence intervals will be calculated assuming that the
aggregate claims distribution is normal with a mean equal to observed claims
and a variance equal to:

\code{Var(S) = E(N) * Var(X) + E(X)^2 * Var(N)},

Where \code{S} is the aggregate claim random variable, \code{X} is the weighting
variable assumed to follow a normal distribution, and \code{N} is a binomial
random variable for the number of claims.

If \code{credibility} is \code{TRUE} and expected values are passed to \code{expected},
the output will also contain confidence intervals for any
credibility-weighted termination rates.
}

\section{\code{summary()} Method}{
Applying \code{summary()} to a \code{exp_df} object will re-summarize the
data while retaining any grouping variables passed to the "dots"
(\code{...}).
}

\examples{
toy_census |> expose("2022-12-31", target_status = "Surrender") |>
    exp_stats()

exp_res <- census_dat |>
           expose("2019-12-31", target_status = "Surrender") |>
           group_by(pol_yr, inc_guar) |>
           exp_stats(control_vars = "product")

exp_res
summary(exp_res)
summary(exp_res, inc_guar)

}
\references{
Herzog, Thomas (1999). Introduction to Credibility Theory
}
