#' @title exphaz function
#'
#' @description Calculate the expected hazard and survival.
#'
#' @param formula a formula object of the \code{Surv} function with the
#' response on the left of a \code{~} operator and the terms on the right. The
#' response must be a survival object as returned by the \code{Surv}
#' function (\code{time} in first and \code{status} in second).
#' @note \code{Time} is OBLIGATORY in YEARS.
#'
#'
#' @param data a data frame in which to interpret the variables named in the
#' formula
#'
#' @param ratetable a rate table stratified by \code{age}, \code{sex},
#' \code{year} (if missing, ratedata is used)
#'
#' @param rmap A named list mapping ratetable dimensions (e.g., \code{age}, \code{sex}, \code{year}, and any extras like \code{dept}, \code{EDI}) to column names in \code{data}.
#'
#' @param ratedata a data frame of the hazards mortality in general population.
#'
#' @param only_ehazard a boolean argument (by default, \code{only_ehazard=TRUE}).
#' If \code{TRUE}, the cumulative population hazard is not provided.
#'
#' @param subset an expression indicating which subset of the rows in data
#' should be used in the fit. All observations are included by default
#'
#' @param na.action a missing data filter function. The default is na.fail,
#' which returns an error if any missing values are found. An alternative is
#' na.exclude, which deletes observations that contain one or more missing
#' values.
#'
#'
#' @param scale a numeric argument specifying by default \code{scale = 365.2425}
#' (or using the value corresponding to \code{attributes(ratetable)$cutpoints[[1]][2]}, often equal
#'  to 365.25) if the user wants to extract a yearly hazard rate, or \code{scale = 1} if he
#'  wants to extract a daily hazard rate from a ratetable containing daily hazard rates for
#'  a matched subject from the population, defined as \code{-log(1-q)/365.25}
#'  where \code{q} is the \code{1-}year probability of death.
#'
#' @return An object of class \code{list} containing the following components:
#'
#'
#' \item{ehazard}{expected hazard calculated from the matching \code{ratetable}.}
#'
#' \item{ehazardInt}{cumulative expected hazard calculated from the matching \code{ratetable}. if \code{only_ehazard=TRUE}, this quantity is not provided.}
#'
#' \item{dateDiag}{date of diagnosis}
#'
#' @references Goungounga JA, Touraine C, Graff\'eo N, Giorgi R;
#' CENSUR working survival group. Correcting for misclassification
#' and selection effects in estimating net survival in clinical trials.
#' BMC Med Res Methodol. 2019 May 16;19(1):104.
#' doi: 10.1186/s12874-019-0747-3. PMID: 31096911; PMCID: PMC6524224.
#' (\href{https://pubmed.ncbi.nlm.nih.gov/31096911/}{PubMed})
#'
#' Therneau, T. M., Grambsch, P. M., Therneau, T. M., & Grambsch, P. M. (2000).
#' Expected survival. Modeling survival data: extending the Cox model, 261-287.
#'
#' @examples
#' \donttest{
#' library(survival)
#' library(survexp.fr)
#' library(xhaz)
#' fit.haz <- exphaz(
#'                 formula = Surv(obs_time_year, event) ~ 1,
#'                 data = dataCancer,
#'                 ratetable = survexp.fr, only_ehazard = TRUE,
#'                 rmap = list(age = 'age', sex = 'sexx', year = 'year_date')
#' )
#' }
#' @export
exphaz <- function(formula = formula(data),
                 data = sys.parent(),
                 ratetable, rmap = list(age = NULL, sex = NULL, year = NULL),
                 ratedata = sys.parent(),
                 only_ehazard = TRUE,
                 subset,
                 na.action,
                 scale = 365.2425) {
  Call <- match.call()

  mf <- match.call(expand.dots = FALSE)
  m_idx <- match(c("formula","data","subset","na.action"), names(mf), nomatch = 0)
  mf <- mf[c(1, m_idx)]
  mf[[1L]] <- quote(stats::model.frame)

  Terms <- if (missing(data)) stats::terms(formula) else stats::terms(formula, data = data)
  mf$formula <- Terms
  m <- eval(mf, parent.frame())

  Y <- stats::model.extract(m, "response")

  if (!inherits(Y, "Surv"))
    stop("Response must be a survival::Surv object.", call. = FALSE)

  if (ncol(Y) == 2L) {
    time  <- Y[,1]
  } else {
    time  <- Y[,2] - Y[,1]
  }

  mrn <- rownames(m)
  drn <- rownames(data)
  if (is.null(drn)) {
    drn <- as.character(seq_len(nrow(data)))
    rownames(data) <- drn
  }
  idx <- match(mrn, drn)
  if (anyNA(idx)) {
    stop("Internal alignment failed: could not match model.frame rows back to `data`.\n",
         "Check that rownames(data) are unique and unchanged.", call. = FALSE)
  }
  data_used <- data[idx, , drop = FALSE]


  if (length(time) != nrow(data_used)) {
    stop("Internal alignment failed: length(time) = ", length(time),
         " but nrow(data_used) = ", nrow(data_used),
         ". Check NA/subset handling.", call. = FALSE)
  }


  if (is.null(rmap$age) || is.null(rmap$sex) || is.null(rmap$year))
    stop("`rmap` must at least include age, sex, year.", call. = FALSE)
  ageDiag <- data_used[[ rmap$age ]]

  res <- exphaz_years(
    ageDiag   = ageDiag,
    time      = time,
    data      = data_used,
    rmap      = rmap,
    ratetable = ratetable,
    ratedata  = ratedata,
    scale     = scale,
    pophaz    = "rescaled",
    only_ehazard = only_ehazard,
    coerce_year_to_date = TRUE,
    max_age             = 115L,
    clamp_to_table      = TRUE,
    unknown_dim         = "warn"
  )

  if (isTRUE(only_ehazard)) {
    return(list(ehazard = res$ehazard,
                dateDiag = res$dateDiag))
  } else {
    return(list(ehazard    = res$ehazard,
                ehazardInt = res$ehazardInt,
                dateDiag   = res$dateDiag))
  }
}
