# [R] ks.test()

franck allaire franck151 at hotmail.com
Thu Aug 28 13:50:14 CEST 2003

```Dear All

I am trying to replicate a numerical application (not computed on R) from an
article. Using, ks.test() I computed the exact D value shown in the article
but the p-values I obtain are quite different from the one shown in the
article.

The tests are performed on a sample of 37 values (please see "[0] DATA"
below) for truncated Exponential, Pareto and truncated LogNormal (please see
"[1] FUNCTIONS" below). You can find below the commands I use to compute the
tests ("[2] COMMANDS") and the p-values presented in the article.

Can anyone explain the difference between these two set of p-values? Maybe
the explanation is linked to my second question: Can anyone confirm me that
p-values calculated in the Kolmogrov-Smirnov g-o-f are independent from the
family of the distribution assumed in H0?

Anderson-Darling test is also performed in the article. would anyone have
functions to calculate pvalues for exponential, lognormal, weibull, etc?

Franck Allaire
R 1.6.2

###############
# [0] DATA # truncation point is 30 i.e. all values are above 30
###############
39.7
582
439.9
47.1
83.9
41.2
893.1
192
106.2
216.7
1243.4
52.8
351.6
36.2
431.5
57.3
1602.1
822.2
260.1
58.7
6299.9
814.9
137.2
203.8
1263.5
53.7
1313
118.4
167.8
70.1
503.7
64.8
529.9
87.8
2465.4
317.9
2753.9

###############
# [1] FUNCTIONS
###############

#EXP
exptfit_function(x,b, plot.it = F, lty = 1){

if (mode(x) != "numeric")
stop("need numeric data")
x <- x[!is.na(x)]
x <- sort(x)
lambda <- length(x)/sum(x-b)
if(plot.it) { lines(x, dexpt(x, lambda = lambda, b = b), lty =
lty,col="red")}
result <- list(lambda = lambda, b = b)
class(result) <- "expt"
result}

dexpt<-function(x,lambda,b) dexp(x-b,rate=lambda)
pexpt<-function(x,lambda,b) pexp(x-b,rate=lambda)
qexpt<-function(p,lambda,b) qexp(p,rate=lambda)+b
rexpt<-function(n,lambda,b) rexp(n,rate=lambda)+b

#PARETO
paretofit<-function(x,b, plot.it = F, lty = 1){

x <- sort(x)
alpha <- length(x)/sum(log(x/b))
if(plot.it) { lines(x, dpareto(x, alpha = alpha, b = b), lty =
lty,col="red")}
result <- list(alpha = alpha, b = b)
class(result) <- "pareto"
result}

dpareto<-function(x, alpha,b) ifelse(x<b,0,alpha*(b^alpha)/(x^(alpha+1)))
ppareto<-function(x, alpha,b) ifelse(x<b,0,1-(b/x)^alpha)
qpareto<-function(p, alpha,b) b*exp(-log(1-p)/alpha)
rpareto<-function(n, alpha,b) qpareto(runif(n),alpha=alpha,b=b)

#LN
lnormtfit_function(x,b, plot.it = F, lty = 1){

if (mode(x) != "numeric")
stop("need numeric data")
x <- x[!is.na(x)]
x <- sort(x)
y <- log(x-b)
n <- length(y)
mu <- mean(y)
sigma <- sd(y)
if(plot.it) { lines(x, dlnormt(x, meanlog=mu,sdlog=sigma,b=b), lty
= lty,col="red")}
result <- list(mu=mu,sigma=sigma,b=b)
class(result) <- "lnormt"
result}

dlnormt<-function(x,mu,sigma,b) dlnorm(x-b,meanlog=mu,sdlog=sigma)
plnormt<-function(x,mu,sigma,b) plnorm(x-b,meanlog=mu,sdlog=sigma)
qlnormt<-function(p,mu,sigma,b) qlnorm(p,meanlog=mu,sdlog=sigma)+b
rlnormt<-function(n,mu,sigma,b) rlnorm(n,meanlog=mu,sdlog=sigma)+b

###############
# [2] COMMANDS
###############

# EXP
tmp <- exptfit(data,b=30)
ks.test(data,"pexpt",lambda=tmp\$lambda,b=tmp\$b)
# Article: D= 0.2599 pvalue<<0.005

# PARETO
tmp <- paretofit(data,b=30)
ks.test(data, "ppareto", alpha=tmp\$alpha,b=tmp\$b)
# Article: D=0.14586 pvalue=0.16

# LN
tmp <- lnormtfit(data,b=30)
ks.test(data,"plnormt",mu=tmp\$mu,sigma=tmp\$sigma,b=tmp\$b)
# Article: D=0.07939 pvalue>>0.15

```