[Rd] rhyper has too high variance (PR#7314)

oehl_list at gmx.de oehl_list at gmx.de
Tue Oct 26 13:38:21 CEST 2004


Dear all,

it looks like rhyper() gives wrong results compared to theory and compared
to sample() and rghyper(SuppDists).

Best regards


Jens



K <- 100
J <- 60
N <- K+J
p <- K/N
n <- 50

nn <- 100000

urn <- rep(0:1, c(J,K))
x <- sapply(1:nn, function(i){
	sum(sample(urn, n))
})
y <- rhyper(nn, K,J, n)

require(SuppDists)
z <- rghyper(nn, a=K, k=n, N=N)  


# hypergeometric mean and variance
p*n
p*(1-p)*n*(N-n)/(N-1)

# check we have parametrized ghyper correctly
sghyper(a=K, k=n, N=N)[c("Mean", "Variance")]

var(x)
var(y)  # wrong
var(z)

version



> K <- 100
> J <- 60
> N <- K+J
> p <- K/N
> n <- 50
> 
> nn <- 100000
> 
> urn <- rep(0:1, c(J,K))
> x <- sapply(1:nn, function(i){
+ sum(sample(urn, n))
+ })
> y <- rhyper(nn, K,J, n)
> 
> require(SuppDists)
[1] TRUE
> z <- rghyper(nn, a=K, k=n, N=N)  
> 
> 
> # hypergeometric mean and variance
> p*n
[1] 31.25
> p*(1-p)*n*(N-n)/(N-1)
[1] 8.107311
> 
> # check we have parametrized ghyper correctly
> sghyper(a=K, k=n, N=N)[c("Mean", "Variance")]
$Mean
[1] 31.25

$Variance
[1] 8.107311

> 
> var(x)
[1] 8.060095
> var(y)  # wrong
[1] 8.61289
> var(z)
[1] 8.150472
> 
> version
         _              
platform i386-pc-mingw32
arch     i386           
os       mingw32        
system   i386, mingw32  
status                  
major    2              
minor    0.0            
year     2004           
month    10             
day      04             
language R






--



More information about the R-devel mailing list