## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(quickOutlier)

## ----univariate---------------------------------------------------------------
# Create dummy data with one obvious outlier (500)
df <- data.frame(
  id = 1:10,
  revenue = c(10, 12, 11, 10, 12, 11, 13, 10, 500, 11)
)

# Detect using Interquartile Range (IQR)
outliers <- detect_outliers(df, column = "revenue", method = "iqr")
print(outliers)

## ----plot, fig.width=6, fig.height=4------------------------------------------
plot_outliers(df, column = "revenue", method = "iqr")

## ----treat--------------------------------------------------------------------
# Cap the outliers based on IQR limits
df_clean <- treat_outliers(df, column = "revenue", method = "iqr")

# The value 500 has been replaced by the upper bound
print(df_clean$revenue)

## ----multivariate-------------------------------------------------------------
# Generate data: y correlates with x
df_multi <- data.frame(x = rnorm(50), y = rnorm(50))
df_multi$y <- df_multi$x * 2 + rnorm(50, sd = 0.5)

# Add an anomaly: normal x, but impossible y given x
anomaly <- data.frame(x = 0, y = 10) 
df_multi <- rbind(df_multi, anomaly)

# Detect using Mahalanobis Distance
detect_multivariate(df_multi, columns = c("x", "y"))

## ----lof----------------------------------------------------------------------
# Create a dense cluster and one distant point
df_density <- data.frame(
  x = c(rnorm(50), 10), 
  y = c(rnorm(50), 10)
)

# Run LOF detection
detect_density(df_density, k = 5)

## ----scan---------------------------------------------------------------------
scan_data(df, method = "iqr")

