Cauchy-distribution / combined_power_tests.R
combined_power_tests.R
Raw
# Load required libraries
library(stats4)
library(VGAM)
library(stargazer)

# Define Maxwell Distribution functions
rcauchy <- function(n, location=0, scale=1) {
  u <- runif(n, min=0, max=1)
  X <- location + scale * tan(pi * (u - 0.5))
  return(X)
}

pcauchy <- function(X, location=0, scale=1) {
  return(1/pi * atan((X - location) / scale) + 0.5)
}

dcauchy <- function(X, location=0, scale=1) {
  return(1 / (pi * scale * (1 + ((X - location) / scale)^2)))
}

# Parameter estimation of Cauchy
CauchyPar <- function(X) {
  n <- length(X)
  X <-  sort(X)
  if (n %% 2 == 0) {
    mu_hat <- (X[n/2] + X[n/2 + 1]) / 2
  } else {
    mu_hat <- X[(n + 1) / 2]
  }
  q75 <- quantile(X, 0.75)
  q25 <- quantile(X, 0.25)
  sigma_hat <- 0.5 * (q75 - q25)
  return(list(mu_hat=mu_hat, sigma_hat=sigma_hat))
}

# Define Simulation Function
SimFromDist <- function(DistNum, n) {
  if (DistNum == 1) { X <- rcauchy(n, 0, 1); nm <- "C(0,1)" }
  else if (DistNum == 2) { X <- rt(n, 3); nm <- "t(3)" }
  else if (DistNum == 3) { X <- rnorm(n, 0, 1); nm <- "N(0,1)" }
  else if (DistNum == 4) { X <- rgamma(n, 2, 1); nm <- "Gamma(2,1)" }
  else if (DistNum == 5) { X <- rlaplace(n, 0, 1); nm <- "Lap(0,1)" }
  else if (DistNum == 6) { X <- rbeta(n, 2, 2); nm <- "Beta(2,2)" }
  else if (DistNum == 7) { X <- runif(n, 0, 1); nm <- "Uni(0,1)" }
  ans <- list(X = X, nm = nm)
  return(ans)
}

# Existing tests for power comparison
# ~~~~~~~~~~~~~~~~~~ (1) Kolmogorov-Smirnov ~~~~~~~~~~~~~~~~~~
KSn <- function(X) {
  n <- length(X)
  FH <- pcauchy(X, 0, 1)
  KSp <- max((1:n) / n - FH)
  KSm <- max(FH - (0:(n - 1)) / n)
  out <- max(KSp, KSm)
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~ (2) Cramér-von Mises ~~~~~~~~~~~~~~~~~~~
CVn <- function(X) {
  n <- length(X)
  FH <- pcauchy(X, 0, 1)
  out <- sum((FH - (2 * (1:n) - 1) / (2 * n))^2) + 1 / (12 * n)
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~ (3) Anderson-Darling ~~~~~~~~~~~~~~~~~~~
ADn <- function(X) {
  n <- length(X)
  z <- pcauchy(X, 0, 1)
  T1 <- 2 * (1:n) - 1
  T2 <- log(z) + log(1 - z[n:1])
  out <- -n - mean(T1 * T2)
  return(out)
}

# ~~~~~~~~~~~~~~ (4) Modified Anderson-Darling ~~~~~~~~~~~~~~~
MAn <- function(X) {
  n <- length(X)
  z <- pcauchy(X, 0, 1)
  S1 <- sum(z)
  T1 <- 2 - (2 * (1:n) - 1) / n
  T2 <- log(1 - z)
  S2 <- sum(T1 * T2)
  out <- n / 2 - 2 * S1 - S2
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~~~~ (5) Zhang's A ~~~~~~~~~~~~~~~~~~~~~~~
ZAn <- function(X, epsilon=1e-4) {
  n <- length(X)
  X[X == min(X)] <- X[X == min(X)] + epsilon
  j <- 1:n
  FH <- pcauchy(X, 0, 1)
  out <- -sum(log(FH) / (n - j + 0.5) + log(1 - FH) / (j - 0.5))
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~~~~ (6) Zhang's B ~~~~~~~~~~~~~~~~~~~~~~~
ZBn <- function(X, epsilon=1e-4) {
  n <- length(X)
  X[X == min(X)] <- X[X == min(X)] + epsilon
  j <- 1:n
  FH <- pcauchy(X, 0, 1)
  arg <- (1 / FH - 1) / ((n - 0.5) / (j - 0.75) - 1)
  out <- sum((log(arg))^2)
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~~~~ (7) Zhang's K ~~~~~~~~~~~~~~~~~~~~~~~
ZKn <- function(X) {
  n <- length(X)
  j <- 1:n
  FH <- pcauchy(X, 0, 1)
  T1 <- n * (j - 0.5) / (n - j + 0.5)^2 * log((j - 0.5) / (n * FH))
  T2 <- n / (n - j + 0.5) * log((n - j + 0.5) / (n * (1 - FH)))
  out <- 2 * sum(T1 + T2)
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~~~~ (8) Gurtler and Henze (Cha fun) ~~~~~~~~~~~~~~~~~~~
Dn <- function(X) {
  n <- length(X)  # Sample size
  j <- 1:n  # Index vector for elements in X
  k <- 1:n  # Index vector for elements in X
  T1 <- (2 / n) * sum(sum(5 / (25 + outer(X, X, "-")^2)))
  T2 <- sum(6 / (36 + X^2))
  T3 <- (2 * n) / 7
  out <- T1 - 4 * T2 + T3
  return(out)  # Return the computed statistic
}

# ~~~~~~~~~~~~~~~~~~~ (9) Kullback-Leibler ~~~~~~~~~~~~~~~~~~~
HVm <- function(X, m) {
  n <- length(X)
  j <- 1:n
  ind1 <- pmin(j + m, n)
  ind2 <- pmax(j - m, 1)
  out <- mean(log(n / (2 * m) * (X[ind1] - X[ind2])))
  return(out)
}

KLn <- function(X, m) {
  n <- length(X)
  out <- exp(-HVm(X, m) - mean(log(dcauchy(X, 0, 1))))
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~~~~ (10) Bowman ~~~~~~~~~~~~~~~~~~~~~~
fhHat <- function(X) {
  n <- length(X)
  s <- sd(X)
  h <- 1.06 * s * n^(-1 / 5)
  out <- rep(0, n)
  for (j in 1:n) {
    out[j] <- mean(dnorm((X - X[j]) / h)) / h
  }
  return(out)
}

HBn <- function(X) {
  fH <- fhHat(X)
  out <- -mean(log(fH))
  return(out)
}

# ~~~~~~~~~~~~~~~~~~~~~~ (11) Van (1992)  ~~~~~~~~~~~~~~~~~~~~~~
HEn <- function(X, m) {
  n <- length(X)
  X  <- sort(X)  # Ensure the sample is sorted
  j <- 1:n
  X1 <- X[1]
  Xn <- X[n]
  x_bar <- mean(X)
  s_squared <- var(X)
  k <- 4  # Assuming k is 4
  a <- x_bar - k * sqrt(s_squared)
  b <- x_bar + k * sqrt(s_squared)
  Y <- numeric(n)
  for (i in 1:n) {
    if (i <= m) {
      Y[i] <- a + ((i - 1) / m) * (X1 - a)
    } else if (i <= n - m) {
      Y[i] <- X[i]
    } else {
      Y[i] <- b - ((n - i) / m) * (b - Xn)
    }
  }
  d <- numeric(n)
  for (i in 1:n) {
    if (i <= m) {
      d[i] <- 1 + ((i + 1) / m) - (i / m^2)
    } else if (i <= n - m) {
      d[i] <- 2
    } else {
      d[i] <- 1 + ((n - i) / (m + 1))
    }
  }
  ind1 <- pmin(j + m, n)
  ind2 <- pmax(j - m, 1)
  out <- mean(log(n / (d[(m + 1):(n - m)] * m) * (Y[ind1] - Y[ind2])))
  return(out)
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~~  Function to calculate power statistics ~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

powersWS <- function(TS, alpha) {
  TS_X <- TS[, 1]
  TS_S <- TS[, 2]
  
  # Clean TS_S of NA values
  TS_S_clean <- na.omit(TS_S)
  
  if (length(TS_S_clean) == 0) {
    stop("TS_S contains only NA values.")
  }
  #cv <- quantile(TS_S_clean,  1-alpha , na.rm = TRUE)            # one sided test
   cv1 <- quantile(TS_S_clean,  alpha/2 , na.rm = TRUE)
   cv2 <- quantile(TS_S_clean, 1 - (alpha/2) , na.rm = TRUE)
   out <-mean(as.integer(TS_X< cv1))+mean(as.integer(TS_X> cv2))  # two sided test
   #out <- mean(TS_X > cv)
  return(out)
}

# Parameters for Simulation
sample_sizes <- c( 20, 40, 60, 80, 100)
Range <- 1:7
MC <- 10000
alpha <- 0.05

meth <- "MLE"
ntests0 <- 11  # Updated number of tests

set.seed(1234)

# Store results in a data frame
Results_table <- data.frame()

for (DistNum in Range) {
  dist_name <- switch(DistNum,
                      "1" = "Cauchy",
                      "2" = "t",
                      "3" = "Normal",
                      "4" = "Gamma",
                      "5" = "Laplace",
                      "6" = "Beta",
                      "7" = "Uniform")
  
  for (n in sample_sizes) {
    m <- n / 2    # Window size 
    p <- numeric(ntests0)
    TS_KS <- matrix(0, MC, 2)
    TS_CV <- matrix(0, MC, 2)
    TS_AD <- matrix(0, MC, 2)
     TS_MA <- matrix(0, MC, 2)
    TS_ZA <- matrix(0, MC, 2)
    TS_ZB <- matrix(0, MC, 2)
    TS_ZK <- matrix(0, MC, 2)
    TS_D  <- matrix(0, MC, 2)
    TS_KL <- matrix(0, MC, 2)
    TS_HE <- matrix(0, MC, 2)
    
    for (j in 1:MC) {
      simdata <- SimFromDist(DistNum, n)
      X <- sort(simdata$X)
      if (meth == "MLE") {
        parmH <- CauchyPar(X)
        mu_hat <- parmH$mu_hat
        sigma_hat <- parmH$sigma_hat
      } else {
        mu_hat <- 0
        sigma_hat <- 1
      }
      
      Xstar <- sort(rcauchy(n, mu_hat, sigma_hat))
      if (meth == "MLE") {
        parmHS <- CauchyPar(Xstar)
        mu_hat_star <- parmHS$mu_hat
        sigma_hat_star <- parmHS$sigma_hat
      } else {
        mu_hat_star <- 0
        sigma_hat_star <- 1
      }
      
      TS_KS[j, ] <- c(KSn(X), KSn(Xstar))
      TS_CV[j, ] <- c(CVn(X), CVn(Xstar))
      TS_AD[j, ] <- c(ADn(X), ADn(Xstar))
      TS_MA[j, ] <- c(MAn(X), MAn(Xstar))
      TS_ZA[j, ] <- c(ZAn(X), ZAn(Xstar))
      TS_ZB[j, ] <- c(ZBn(X), ZBn(Xstar))
      TS_ZK[j, ] <- c(ZKn(X), ZKn(Xstar))
      TS_D[j, ]  <- c(Dn(X), Dn(Xstar))
      TS_KL[j, ] <- c(KLn(X, m), KLn(Xstar, m))
      TS_HE[j, ] <- c(HEn(X, m), HEn(Xstar, m))
    }
    
    p[1]  <- powersWS(TS_KS, alpha)
    p[2]  <- powersWS(TS_CV, alpha)
    p[3]  <- powersWS(TS_AD, alpha)
     p[4]  <- powersWS(TS_MA, alpha)
    p[5]  <- powersWS(TS_ZA, alpha)
    p[6]  <- powersWS(TS_ZB, alpha)
    p[7]  <- powersWS(TS_ZK, alpha)
    p[8]  <- powersWS(TS_D, alpha)
    p[9]  <- powersWS(TS_KL, alpha)
    p[10] <-  powersWS(TS_HE, alpha)
    
    # Append to Results_table
    dist_results <- data.frame(
      Distribution = dist_name,
      n = n,
      TS_KS = p[1],
      TS_CV = p[2],
      TS_AD = p[3],
       TS_MA = p[4],
      TS_ZA = p[5],
      TS_ZB = p[6],
      TS_ZK = p[7],
      TS_D  = p[8],
      TS_KL = p[9],
      TS_HE = p[10]
    )
    Results_table <- rbind(Results_table, dist_results)
  }
}

# Print the final table
print(Results_table)

# Optionally save the table to a CSV file
# write.csv(Results_table, file = "Results_GOF_Maxwell_Distributions.csv", row.names = FALSE)