[R] Using R to illustrate the Central Limit Theorem

Sat Apr 23 14:05:46 CEST 2005

(Ted Harding) wrote:

>On 21-Apr-05 Bill.Venables at csiro.au wrote:
>  
>
>>Here's a bit of a refinement on Ted's first suggestion.
>>[ corrected from runif(M*k), N, k) to runif(N*k), N, k) ]
>> 
>> N <- 10000
>> graphics.off()
>> par(mfrow = c(1,2), pty = "s")
>> for(k in 1:20) {
>>    m <- (rowMeans(matrix(runif(N*k), N, k)) - 0.5)*sqrt(12*k)
>>    hist(m, breaks = "FD", xlim = c(-4,4), main = k,
>>            prob = TRUE, ylim = c(0,0.5), col = "lemonchiffon")
>>    pu <- par("usr")[1:2]
>>    x <- seq(pu[1], pu[2], len = 500)
>>    lines(x, dnorm(x), col = "red")
>>    qqnorm(m, ylim = c(-4,4), xlim = c(-4,4), pch = ".", col = "blue")
>>    abline(0, 1, col = "red")
>>    Sys.sleep(1)
>>  }
>>    
>>
>
>Very nice! (I can better keep up with it mentally, though, with
>Sys.sleep(2) or Sys.sleep(3), which moght be better for classroom
>demo).
>
>One thing occurred to me, watching it: people might say "Yes,
>we can see how the distribution -> Normal, nice and smooth,
>especially in the tails and side-arms; but the peaks always look
>a bit rough."
>
>Which could be the cue for introducing "SD(ni) = sqrt(E[ni])",
>and the following hack of the above code seems to show this OK
>in the "rootograms":
>
>N <- 10000
>graphics.off()
>par(mfrow = c(1,2), pty = "s")
>for(k in 1:20) {
>   m <- (rowMeans(matrix(runif(N*k), N, k)) - 0.5)*sqrt(12*k)
>   hm <- hist(m, breaks = "FD", xlim = c(-4,4), main = k, plot=FALSE,
>           prob = TRUE, ylim = c(0,0.5), col = "lemonchiffon")
>   hm$counts<-sqrt(hm$counts) ; 
>   plot(hm,xlim = c(-4,4),main = k,ylab="sqrt(Frequency)")
>   pu <- par("usr")[1:2]
>   x <- seq(pu[1], pu[2], len = 500)
>   lines(x, sqrt(N*dnorm(x)*(hm$breaks[2]-hm$breaks[1])), col = "red")
>   qqnorm(m, ylim = c(-4,4), xlim = c(-4,4), pch = ".", col = "blue")
>   abline(0, 1, col = "red")
>   Sys.sleep(2)
>}
>
>(and also shows clearly how the tails of the sample move outwards
>into the tails of the Normal, as in fact you expect from the finite
>range of mean(runif(k)), especially initially: very visible for
>k up to about 5, and not really settled down for k<10).
>
>Next stop: Hanging rootograms!
>
>Best wishes,
>Ted.
>
>
>--------------------------------------------------------------------
>E-Mail: (Ted Harding) <Ted.Harding at nessie.mcc.ac.uk>
>Fax-to-email: +44 (0)870 094 0861
>Date: 22-Apr-05                                       Time: 13:10:19
>------------------------------ XFMail ------------------------------
>
>______________________________________________
>R-help at stat.math.ethz.ch mailing list
>https://stat.ethz.ch/mailman/listinfo/r-help
>PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
>  
>
Hello,

here is another idea to illustrate the Central Limit Theorem which is 
based on our package "distr".

# Illustration of the Central Limit Theorem
# using package "distr"
require(distr)
CLT <- function(Distr, n, sleep = 1){
# Distr: object of class "AbscontDistribution"
# n: iterations
# sleep: time to sleep
    graphics.off()
    par(mfrow = c(1,2))

    # expectation of Distr
    fun1 <- function(x, Distr){x*d(Distr)(x)}
    E <- try(integrate(fun1, lower = q(Distr)(0), upper = q(Distr)(1), 
Distr = Distr)$value,
             silent = TRUE)
    if(!is.numeric(E))
        E <- try(integrate(fun1, lower = q(Distr)(distr::TruncQuantile),
                           upper = q(Distr)(1-distr::TruncQuantile), 
Distr = Distr)$value,
                 silent = TRUE)
    # standard deviation of Distr
    fun2 <- function(x, Distr){x^2*d(Distr)(x)}
    E2 <- try(integrate(fun2, lower = q(Distr)(0), upper = q(Distr)(1), 
Distr = Distr)$value,
              silent = TRUE)
    if(!is.numeric(E2))
        E2 <- try(integrate(fun2, lower = q(Distr)(distr::TruncQuantile),
                            upper = q(Distr)(1-distr::TruncQuantile), 
Distr = Distr)$value,
                  silent = TRUE)
    std <- sqrt(E2 - E^2)

    Sn <- 0
    N <- Norm()
    for(k in 1:n) {
        Sn <- Sn + Distr
        Tn <- (Sn - k*E)/(std*sqrt(k))

        x <- seq(-5,5,0.01)
        dTn <- d(Tn)(x)
        ymax <- max(1/sqrt(2*pi), dTn)
        plot(x, d(Tn)(x), ylim = c(0, ymax), type = "l", ylab = 
"densities", main = k, lwd = 4)
        lines(x, d(N)(x), col = "orange", lwd = 2)
        plot(x, p(Tn)(x), ylim = c(0, 1), type = "l", ylab = "cdfs", 
main = k, lwd = 4)
        lines(x, p(N)(x), col = "orange", lwd = 2)
        Sys.sleep(sleep)
    }
}

# some examples
distroptions("DefaultNrFFTGridPointsExponent", 13)
CLT(Distr = Unif(), n = 20, sleep = 0)
CLT(Distr = Exp(), n = 20, sleep = 0)
CLT(Distr = Chisq(), n = 20, sleep = 0)
CLT(Distr = Td(df = 5), n = 20, sleep = 0)
CLT(Distr = Beta(), n = 20, sleep = 0)
distroptions("DefaultNrFFTGridPointsExponent", 14)
CLT(Distr = Lnorm(), n = 20, sleep = 0)