# Load required libraries
library(stats4)
library(VGAM)
library(stargazer)
# Define Maxwell Distribution functions
rcauchy <- function(n, location=0, scale=1) {
u <- runif(n, min=0, max=1)
X <- location + scale * tan(pi * (u - 0.5))
return(X)
}
pcauchy <- function(X, location=0, scale=1) {
return(1/pi * atan((X - location) / scale) + 0.5)
}
dcauchy <- function(X, location=0, scale=1) {
return(1 / (pi * scale * (1 + ((X - location) / scale)^2)))
}
# Parameter estimation of Cauchy
CauchyPar <- function(X) {
n <- length(X)
X <- sort(X)
if (n %% 2 == 0) {
mu_hat <- (X[n/2] + X[n/2 + 1]) / 2
} else {
mu_hat <- X[(n + 1) / 2]
}
q75 <- quantile(X, 0.75)
q25 <- quantile(X, 0.25)
sigma_hat <- 0.5 * (q75 - q25)
return(list(mu_hat=mu_hat, sigma_hat=sigma_hat))
}
# Define Simulation Function
SimFromDist <- function(DistNum, n) {
if (DistNum == 1) { X <- rcauchy(n, 0, 1); nm <- "C(0,1)" }
else if (DistNum == 2) { X <- rt(n, 3); nm <- "t(3)" }
else if (DistNum == 3) { X <- rnorm(n, 0, 1); nm <- "N(0,1)" }
else if (DistNum == 4) { X <- rgamma(n, 2, 1); nm <- "Gamma(2,1)" }
else if (DistNum == 5) { X <- rlaplace(n, 0, 1); nm <- "Lap(0,1)" }
else if (DistNum == 6) { X <- rbeta(n, 2, 2); nm <- "Beta(2,2)" }
else if (DistNum == 7) { X <- runif(n, 0, 1); nm <- "Uni(0,1)" }
ans <- list(X = X, nm = nm)
return(ans)
}
# Existing tests for power comparison
# ~~~~~~~~~~~~~~~~~~ (1) Kolmogorov-Smirnov ~~~~~~~~~~~~~~~~~~
KSn <- function(X) {
n <- length(X)
FH <- pcauchy(X, 0, 1)
KSp <- max((1:n) / n - FH)
KSm <- max(FH - (0:(n - 1)) / n)
out <- max(KSp, KSm)
return(out)
}
# ~~~~~~~~~~~~~~~~~~~ (2) Cramér-von Mises ~~~~~~~~~~~~~~~~~~~
CVn <- function(X) {
n <- length(X)
FH <- pcauchy(X, 0, 1)
out <- sum((FH - (2 * (1:n) - 1) / (2 * n))^2) + 1 / (12 * n)
return(out)
}
# ~~~~~~~~~~~~~~~~~~~ (3) Anderson-Darling ~~~~~~~~~~~~~~~~~~~
ADn <- function(X) {
n <- length(X)
z <- pcauchy(X, 0, 1)
T1 <- 2 * (1:n) - 1
T2 <- log(z) + log(1 - z[n:1])
out <- -n - mean(T1 * T2)
return(out)
}
# ~~~~~~~~~~~~~~ (4) Modified Anderson-Darling ~~~~~~~~~~~~~~~
MAn <- function(X) {
n <- length(X)
z <- pcauchy(X, 0, 1)
S1 <- sum(z)
T1 <- 2 - (2 * (1:n) - 1) / n
T2 <- log(1 - z)
S2 <- sum(T1 * T2)
out <- n / 2 - 2 * S1 - S2
return(out)
}
# ~~~~~~~~~~~~~~~~~~~~~~ (5) Zhang's A ~~~~~~~~~~~~~~~~~~~~~~~
ZAn <- function(X, epsilon=1e-4) {
n <- length(X)
X[X == min(X)] <- X[X == min(X)] + epsilon
j <- 1:n
FH <- pcauchy(X, 0, 1)
out <- -sum(log(FH) / (n - j + 0.5) + log(1 - FH) / (j - 0.5))
return(out)
}
# ~~~~~~~~~~~~~~~~~~~~~~ (6) Zhang's B ~~~~~~~~~~~~~~~~~~~~~~~
ZBn <- function(X, epsilon=1e-4) {
n <- length(X)
X[X == min(X)] <- X[X == min(X)] + epsilon
j <- 1:n
FH <- pcauchy(X, 0, 1)
arg <- (1 / FH - 1) / ((n - 0.5) / (j - 0.75) - 1)
out <- sum((log(arg))^2)
return(out)
}
# ~~~~~~~~~~~~~~~~~~~~~~ (7) Zhang's K ~~~~~~~~~~~~~~~~~~~~~~~
ZKn <- function(X) {
n <- length(X)
j <- 1:n
FH <- pcauchy(X, 0, 1)
T1 <- n * (j - 0.5) / (n - j + 0.5)^2 * log((j - 0.5) / (n * FH))
T2 <- n / (n - j + 0.5) * log((n - j + 0.5) / (n * (1 - FH)))
out <- 2 * sum(T1 + T2)
return(out)
}
# ~~~~~~~~~~~~~~~~~~~~~~ (8) Gurtler and Henze (Cha fun) ~~~~~~~~~~~~~~~~~~~
Dn <- function(X) {
n <- length(X) # Sample size
j <- 1:n # Index vector for elements in X
k <- 1:n # Index vector for elements in X
T1 <- (2 / n) * sum(sum(5 / (25 + outer(X, X, "-")^2)))
T2 <- sum(6 / (36 + X^2))
T3 <- (2 * n) / 7
out <- T1 - 4 * T2 + T3
return(out) # Return the computed statistic
}
# ~~~~~~~~~~~~~~~~~~~ (9) Kullback-Leibler ~~~~~~~~~~~~~~~~~~~
HVm <- function(X, m) {
n <- length(X)
j <- 1:n
ind1 <- pmin(j + m, n)
ind2 <- pmax(j - m, 1)
out <- mean(log(n / (2 * m) * (X[ind1] - X[ind2])))
return(out)
}
KLn <- function(X, m) {
n <- length(X)
out <- exp(-HVm(X, m) - mean(log(dcauchy(X, 0, 1))))
return(out)
}
# ~~~~~~~~~~~~~~~~~~~~~~ (10) Bowman ~~~~~~~~~~~~~~~~~~~~~~
fhHat <- function(X) {
n <- length(X)
s <- sd(X)
h <- 1.06 * s * n^(-1 / 5)
out <- rep(0, n)
for (j in 1:n) {
out[j] <- mean(dnorm((X - X[j]) / h)) / h
}
return(out)
}
HBn <- function(X) {
fH <- fhHat(X)
out <- -mean(log(fH))
return(out)
}
# ~~~~~~~~~~~~~~~~~~~~~~ (11) Van (1992) ~~~~~~~~~~~~~~~~~~~~~~
HEn <- function(X, m) {
n <- length(X)
X <- sort(X) # Ensure the sample is sorted
j <- 1:n
X1 <- X[1]
Xn <- X[n]
x_bar <- mean(X)
s_squared <- var(X)
k <- 4 # Assuming k is 4
a <- x_bar - k * sqrt(s_squared)
b <- x_bar + k * sqrt(s_squared)
Y <- numeric(n)
for (i in 1:n) {
if (i <= m) {
Y[i] <- a + ((i - 1) / m) * (X1 - a)
} else if (i <= n - m) {
Y[i] <- X[i]
} else {
Y[i] <- b - ((n - i) / m) * (b - Xn)
}
}
d <- numeric(n)
for (i in 1:n) {
if (i <= m) {
d[i] <- 1 + ((i + 1) / m) - (i / m^2)
} else if (i <= n - m) {
d[i] <- 2
} else {
d[i] <- 1 + ((n - i) / (m + 1))
}
}
ind1 <- pmin(j + m, n)
ind2 <- pmax(j - m, 1)
out <- mean(log(n / (d[(m + 1):(n - m)] * m) * (Y[ind1] - Y[ind2])))
return(out)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~~ Function to calculate power statistics ~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
powersWS <- function(TS, alpha) {
TS_X <- TS[, 1]
TS_S <- TS[, 2]
# Clean TS_S of NA values
TS_S_clean <- na.omit(TS_S)
if (length(TS_S_clean) == 0) {
stop("TS_S contains only NA values.")
}
#cv <- quantile(TS_S_clean, 1-alpha , na.rm = TRUE) # one sided test
cv1 <- quantile(TS_S_clean, alpha/2 , na.rm = TRUE)
cv2 <- quantile(TS_S_clean, 1 - (alpha/2) , na.rm = TRUE)
out <-mean(as.integer(TS_X< cv1))+mean(as.integer(TS_X> cv2)) # two sided test
#out <- mean(TS_X > cv)
return(out)
}
# Parameters for Simulation
sample_sizes <- c( 20, 40, 60, 80, 100)
Range <- 1:7
MC <- 10000
alpha <- 0.05
meth <- "MLE"
ntests0 <- 11 # Updated number of tests
set.seed(1234)
# Store results in a data frame
Results_table <- data.frame()
for (DistNum in Range) {
dist_name <- switch(DistNum,
"1" = "Cauchy",
"2" = "t",
"3" = "Normal",
"4" = "Gamma",
"5" = "Laplace",
"6" = "Beta",
"7" = "Uniform")
for (n in sample_sizes) {
m <- n / 2 # Window size
p <- numeric(ntests0)
TS_KS <- matrix(0, MC, 2)
TS_CV <- matrix(0, MC, 2)
TS_AD <- matrix(0, MC, 2)
TS_MA <- matrix(0, MC, 2)
TS_ZA <- matrix(0, MC, 2)
TS_ZB <- matrix(0, MC, 2)
TS_ZK <- matrix(0, MC, 2)
TS_D <- matrix(0, MC, 2)
TS_KL <- matrix(0, MC, 2)
TS_HE <- matrix(0, MC, 2)
for (j in 1:MC) {
simdata <- SimFromDist(DistNum, n)
X <- sort(simdata$X)
if (meth == "MLE") {
parmH <- CauchyPar(X)
mu_hat <- parmH$mu_hat
sigma_hat <- parmH$sigma_hat
} else {
mu_hat <- 0
sigma_hat <- 1
}
Xstar <- sort(rcauchy(n, mu_hat, sigma_hat))
if (meth == "MLE") {
parmHS <- CauchyPar(Xstar)
mu_hat_star <- parmHS$mu_hat
sigma_hat_star <- parmHS$sigma_hat
} else {
mu_hat_star <- 0
sigma_hat_star <- 1
}
TS_KS[j, ] <- c(KSn(X), KSn(Xstar))
TS_CV[j, ] <- c(CVn(X), CVn(Xstar))
TS_AD[j, ] <- c(ADn(X), ADn(Xstar))
TS_MA[j, ] <- c(MAn(X), MAn(Xstar))
TS_ZA[j, ] <- c(ZAn(X), ZAn(Xstar))
TS_ZB[j, ] <- c(ZBn(X), ZBn(Xstar))
TS_ZK[j, ] <- c(ZKn(X), ZKn(Xstar))
TS_D[j, ] <- c(Dn(X), Dn(Xstar))
TS_KL[j, ] <- c(KLn(X, m), KLn(Xstar, m))
TS_HE[j, ] <- c(HEn(X, m), HEn(Xstar, m))
}
p[1] <- powersWS(TS_KS, alpha)
p[2] <- powersWS(TS_CV, alpha)
p[3] <- powersWS(TS_AD, alpha)
p[4] <- powersWS(TS_MA, alpha)
p[5] <- powersWS(TS_ZA, alpha)
p[6] <- powersWS(TS_ZB, alpha)
p[7] <- powersWS(TS_ZK, alpha)
p[8] <- powersWS(TS_D, alpha)
p[9] <- powersWS(TS_KL, alpha)
p[10] <- powersWS(TS_HE, alpha)
# Append to Results_table
dist_results <- data.frame(
Distribution = dist_name,
n = n,
TS_KS = p[1],
TS_CV = p[2],
TS_AD = p[3],
TS_MA = p[4],
TS_ZA = p[5],
TS_ZB = p[6],
TS_ZK = p[7],
TS_D = p[8],
TS_KL = p[9],
TS_HE = p[10]
)
Results_table <- rbind(Results_table, dist_results)
}
}
# Print the final table
print(Results_table)
# Optionally save the table to a CSV file
# write.csv(Results_table, file = "Results_GOF_Maxwell_Distributions.csv", row.names = FALSE)