# -----------------------------------------------------------------
# Alle Beispiele aus dem NDK-Skript Varianzanalyse IIa
#
# Autor: H.-R. Roth
# Version vom 17.4.2002
# 
# -----------------------------------------------------------------
#
# 
# -----------------------------------------------------------------
#  Beispiel 2.1 (BIB)
# 
# -----------------------------------------------------------------
Zeit <- cbind(
         c(73,NA,73,75), c(74,75,75,NA), c(NA,67,68,72), c(71,72,NA,75))

d21 <- data.frame(
     Zeit  = as.vector(Zeit),
     Block = gl(n=4,k=4,length=16, labels=c("B1","B2","B3","B4")),
     Kata  = gl(n=4,k=1,length=16, labels=c("Kat1","Kat2","Kat3","Kat4")))
d21 <- subset(d21, subset=!is.na(Zeit))

par(mfrow=c(1,1))
y <- d21$Zeit + 0.2*(runif(12)-0.5)
plot.default(d21$Kata, y, xlab="", ylab="Reaktionszeit",xaxt="n") 
axis(1,at=1:4,labels=levels(d21$Kata))


##  Type I SS (von Reihenfolge abhngig)
##  ------------------------------------
formula1 <- Zeit ~ Block + Kata
formula2 <- Zeit ~ Kata + Block

fit1 <- aov(formula1, data=d21); anova(fit1)

fit2 <- aov(formula2, data=d21); anova(fit2)

##  Type III SS (von Reihenfolge unabh.)
##  ------------------------------------
drop1(fit2, .~. , test="F")


##  unbereinigte Behandlungsmittelwerte 
##  -----------------------------------
##  Der Output von model.tables(fit2, type="means", se=T) ist von
##  der Reihenfolge der Faktoren in der Formel abhngig!!!
tapply(d21$Zeit,d21$Kata,mean)


##  geschtzte Effekte, Version 1
##  (Koeff. der 1. Stufe wird gleich 0 gesetzt)
##  -------------------------------------------
options(contrasts = c(factor="contr.treatment", ordered="contr.poly"))

dummy.coef(aov(formula1, data=d21))

##  geschtzte Effekte, Version 2
##  (Summe der Koeff. ist gleich null)
##  ----------------------------------
options(contrasts = c(factor="contr.sum", ordered="contr.poly"))

dummy.coef(aov(formula1, data=d21))

##  Regressions-Output
##  ------------------
summary.lm(fit1)

##  Residuenanalyse
##  ---------------
par(mfrow=c(1,2))
tap <- "Tukey-Anscombe Plot"
plot(fit1$fitted, fit1$residuals, main=tap, xlab="angepasste Werte", 
     ylab="Residuen")
abline(h=0,lty=2)
qqnorm(fit1$residuals, ylab="Residuen"); qqline(fit1$residuals)
par(mfrow=c(1,1))


# 
# -----------------------------------------------------------------
#  Beispiel 3.1 (Split Plot)
#
# -----------------------------------------------------------------
y <- cbind(
c(11.8, 7.5, 9.7, 6.4, 8.3, 8.4,11.8, 8.5, 9.2,10.6,11.4, 7.2),
c(15.6,10.8,10.3,14.7,16.2,11.2,14.0,11.5, 9.9,10.8, 4.8, 9.8),
c( 9.7, 8.8,12.5, 9.4, 5.4,12.9,11.2, 7.8,12.1,15.7, 7.6, 9.4),
c(13.2,11.3,11.0,10.7,16.5,11.1,10.8, 8.5,12.5,14.3,15.9, 7.5),
c( 7.0, 9.1, 7.1, 6.3, 5.7, 8.4, 6.1, 8.8, 3.3, 6.9, 1.0, 2.6),
c(12.6,15.4,14.2,11.3,12.6,12.3,14.4,14.1,10.2,11.6,10.4,12.2))

# Die Stufen haben etwas andere Namen als im Skript.
d31 <- data.frame(
     Ertrag = as.vector(y),
     Datum=gl(3,24,length=72,labels=c("D1","D2","D3")),
     Sorte=gl(6, 4,length=72,labels=c("S1","S2","S3","S4","S5","S6")),
     Block=gl(4, 1,length=72,labels=c("B1","B2","B3","B4")))

##  graphische Darstellung der Daten
##  --------------------------------
x  <- as.numeric(d31$Datum) + seq(-.3,.2,0.1)[d31$Sorte]
plot(x, d31$Ertrag, xlab="Abdeckdatum", ylab="Ertrag", xlim=c(0.5,4),
                     pch=c(1,2,3,4,5,6)[d31$Sorte],xaxt="n",cex=0.8)
axis(1,at=1:3,labels=levels(d31$Datum)) 
legend(3.5,10,levels(d31$Sorte),pch=c(1,2,3,4,5,6))

interaction.plot(d31$Datum, d31$Sorte ,d31$Ertrag, main="Interaction Plot",
                 xlab="Abdeckdatum", ylab="Ertrag", trace.label="Sorte")

##  vorlufige Analyse: ANOVA fr feste Effekte
##  -------------------------------------------
fit  <- aov(Ertrag ~ Datum*Sorte + Block + Datum:Block, data=d31)
(su  <- anova(fit))

model.tables(fit,type="means",se=F)

# Analyse des Main Plot Faktors
# (berechnet nach der Formel auf Seite 23 oben)
# ---------------------------------------------
means.mplot <- tapply(d31$Ertrag,d31$Datum,mean)
mse.mplot   <- su$"Mean Sq"[5]
df.mplot    <- su$"Df"[5]
n.mplot     <- table(d31$Datum)
var.mplot   <- mse.mplot/n.mplot

# herkmmliche Kontraste der Mittelwerte
# --------------------------------------
# (sind hier nicht besonders sinnvoll wegen der sign. Interaktion)
con <- matrix(c(
   1,-1, 0,
   1, 0,-1,
   0, 1,-1),3,3)

# Differenzen zwischen D1 und D2 etc.
(estcon <- means.mplot%*%con)

# empirische Varianz und se der Kontraste
# (analog zur Formel (1.12) aus dem Va-1 Skript mit "Fehler 1"
#  berechnet).
D <- diag(1/n.mplot)
var.estcon <- mse.mplot*t(con)%*%D%*%con
(se.estcon  <- sqrt(diag(var.estcon)))

# entspr. t- und p-Werte (ohne Alpha-Korrektur)
(t.estcon <- estcon / se.estcon)

(p.estcon <- 2*(1-pt(abs(t.estcon),df.mplot)))


## Main Plot Analyse mit aggregierten Daten
## ----------------------------------------
mp <- aggregate(d31$Ertrag,list(d31$Block,d31$Datum), mean)
dimnames(mp)[[2]] <- c("Block","Datum","Ertrag")

summary( aov(Ertrag ~ Block + Datum, data=mp))

## Analyse mit lme()
## -----------------
library(grid)
library(lattice)
library(nlme)
fitm <- lme(Ertrag ~ Datum*Sorte, data=d31, random = ~1 | Block/Datum)

# feste Effekte
# -------------
anova(fitm)

# zufllige Effekte
# -----------------
intervals(fitm, which="var-cov")


# Residuenanalyse
# ---------------
tap <- "Tukey-Anscombe Plot"
plot.lme(fitm, main=tap)
qqnorm.lme(fitm)

## einige Kontraste (Vorsicht auf Reihenfolge)
## -------------------------------------------
options(contrasts = c("contr.sum", "contr.poly"))   # nicht vergessen!!!

fitD <- lme(Ertrag ~ -1 + Datum*Sorte, data=d31, random = ~1 | Block/Datum)

anova(fitD, L =c(-1,1,0))

anova(fitD, L =c(-1,0,1))

anova(fitD, L =c(0,-1,1))

fitS <- lme(Ertrag ~ -1 + Sorte*Datum, data=d31, random = ~1 | Block/Datum)

anova(fitS, L =c(-1,1,0,0,0,0))

anova(fitS, L =c(0,0,-1,0,0,1))


# 
# -----------------------------------------------------------------
#  Beispiel 4.1 (2^3 Versuch)
# 
# -----------------------------------------------------------------
d41   <- expand.grid(A = c(-1,+1), B = c(-1, +1), C = c(-1,+1))

d41$Y <- c(297,300,106,131,177,178,76,109)

##  diverse Modelle
##  ---------------
formula0 <- Y ~ A*B*C
formula1 <- Y ~ (A+B+C)^2
formula2 <- Y ~ (A+B+C)^2 - A:C

##  Effekte mit Regression
##  ----------------------

# alle Effekte
fit <- lm(formula0, data=d41); summary(fit)

(Effekte  <- 2*fit$coefficients[2:8])

# ohne Interaktion ABC
summary( lm(formula1, data=d41))

##  Normal plot der geschtzen Effekte
##  ----------------------------------
p  <- ppoints(length(Effekte))
y  <- qnorm(p)
x  <- sort(Effekte)
plot(x, y, xlab="Effekt", main="Normal plot")
text(x, y, labels=names(x), pos=4, cex=0.7)

##  Half-normal plot der geschtzen Effekte
##  ---------------------------------------
p  <- 0.5 + ppoints(length(Effekte))/2
y  <- qnorm(p)
x  <- sort(abs(Effekte))
plot(x, y, xlab="absoluter Effekt", main="Half-normal plot")
text(x, y, labels=names(x),pos=4, cex=0.7)
#   Gerade im Half-normal plot
t.medy <- median(y)
t.medx <- median(x)
abline(a=0,b=t.medy/t.medx)

##  Pareto chart der geschtzen Effekte
##  -----------------------------------
x  <- sort(abs(Effekte))
par(las=1)
barplot(x, names.arg=names(x), space=0.8, horiz=T, main="Pareto chart",
        xlab="abs(Effekt)",xlim=c(-1,150))
par(las=0)

##  ANOVA vorbereiten
##  -----------------
d41$A <- factor(d41$A, labels=c("A-","A+"))
d41$B <- factor(d41$B, labels=c("B-","B+"))
d41$C <- factor(d41$C, labels=c("C-","C+"))

##  alle Interaction plots
##  ----------------------
par(mfrow=c(2,2))
interaction.plot(d41$A, d41$B, d41$Y, xlab="", ylab="Y", trace.label="")
interaction.plot(d41$A, d41$C, d41$Y, xlab="", ylab="Y", trace.label="")
interaction.plot(d41$B, d41$C, d41$Y, xlab="", ylab="Y", trace.label="")
par(mfrow=c(1,1))

##  "Sum"-Kontrast (empfohlen)
##  --------------------------
options(contrasts=c("contr.sum","contr.poly"))

fit1 <- aov(formula1, data=d41); anova(fit1)
dummy.coef(fit1)

fit2 <- aov(formula2, data=d41); anova(fit2)
dummy.coef(fit2)

##  "Treatment"-Kontrast (nicht empfohlen)
##  --------------------------------------
options(contrasts=c("contr.treatment","contr.poly"))

dummy.coef( aov(formula1, data=d41))
dummy.coef( aov(formula2, data=d41))


# 
# -----------------------------------------------------------------
#  Beispiel 4.2 (2^(5-1) Versuch)
# 
# -----------------------------------------------------------------
d42 <- data.frame(
       Y = c(4.8, 5.0, 5.8, 2.2, 4.6, 4.2, 3.0, 5.2,
             2.9, 2.2, 8.4, 6.6, 5.3, 2.7, 7.0, 8.9),
       A = rep(c(-1,+1),8),
       B = rep(c(-1,-1,+1,+1),4),
       C = rep(c(-1,-1,-1,-1,+1,+1,+1,+1),2),
       D = c(rep(-1,8),rep(+1,8)))

d42$E <- -d42$A*d42$B*d42$C*d42$D

d42$AC <- d42$A*d42$C
d42$DE <- d42$D*d42$E

##  diverse Modelle
##  ---------------
formula0  <- Y ~ A*B*C*D*E
formula1  <- Y ~ A+B+C+D+E + D:B + D:E + A:C
formula2  <- Y ~ B*D + AC + DE


##  alle Effekte mit Regression
##  ---------------------------
fit  <- lm(formula0, data=d42); summary(fit)

(Effekte  <- 2*fit$coefficients[2:16])

##  Normal plot der geschtzen Effekte
##  ----------------------------------
p  <- ppoints(length(Effekte))
y  <- qnorm(p)
x  <- sort(Effekte)
plot(x, y, xlab="Effekt", main="Normal plot")
text(x, y, labels=names(x), pos=4, cex=0.7)

##  Half-normal plot der geschtzen Effekte
##  ---------------------------------------
p <- 0.5 + ppoints(length(Effekte))/2
y <- qnorm(p)
x <- sort(abs(Effekte))
plot(x, y, xlab="absoluter Effekt", main="Half-normal plot")
text(x, y, labels=names(x),pos=4, cex=0.7)
#   Gerade im Half-normal plot
t.medy <- median(y)
t.medx <- median(x)
abline(a=0,b=t.medy/t.medx)

##  Pareto chart der geschtzen Effekte
##  -----------------------------------
x <- sort(abs(Effekte))
par(las=1)
barplot(x, names.arg=names(x), space=0.8, horiz=T, main="Pareto chart", 
xlab="abs(Effekt)")
par(las=0)

##  wichtige Effekte mit Regression
##  -------------------------------
summary( lm(formula2, data=d42))

##  ANOVA vorbereiten
##  -----------------
d42$A <- factor(d42$A, labels=c("A-","A+"))
d42$B <- factor(d42$B, labels=c("B-","B+"))
d42$C <- factor(d42$C, labels=c("C-","C+"))
d42$D <- factor(d42$D, labels=c("D-","D+"))
d42$E <- factor(d42$E, labels=c("E-","E+"))

##  wichtige Interaction plots
##  --------------------------
par(mfrow=c(2,2))
interaction.plot(d42$A, d42$C, d42$Y, xlab="", ylab="Y", trace.label="")
interaction.plot(d42$B, d42$D, d42$Y, xlab="", ylab="Y", trace.label="")
interaction.plot(d42$D, d42$E, d42$Y, xlab="", ylab="Y", trace.label="")
par(mfrow=c(1,1))

##  "Sum"-Kontrast (empfohlen)
##  --------------------------
options(contrasts=c("contr.sum","contr.poly"))

fit1 <- aov(formula1, data=d42); anova(fit1)

fit2 <- aov(formula2, data=d42); anova(fit2)
dummy.coef(fit2)


# 
# -----------------------------------------------------------------
#  Beispiel Antikrper-Produktion, Kapitel 5
# 
# -----------------------------------------------------------------
mp1  <- c(-1,+1)
d51  <- expand.grid(CelNum = mp1, VolPrs = mp1, Prime1 = mp1, RadDos = mp1)

d51$Growth <- d51$CelNum * d51$VolPrs * d51$Prime1
d51$Prime2 <- d51$VolPrs * d51$Prime1 * d51$RadDos

d51$Ausbeute <- c(70,150,34,32,137.5,56,123,225,50,2.7,1.2,12,90,2.1,4,15)

anova(lm(Ausbeute ~ (CelNum + VolPrs + Prime1 + RadDos + Growth + Prime2)^2,
         data=d51))


#  Daten von Tabelle 5.6: Central Composite Design
#  -----------------------------------------------
a      <- sqrt(2)
RadDos <- c(100, 300, 100, 300,  59, 341, 200, 200, 200, 200, 200)
Prime1 <- c(  7,   7,  21,  21,  14,  14,   4,  24,  14,  14,  14)
x1     <- c( -1,  +1,  -1,  +1,  -a,  +a,   0,   0,   0,   0,   0)
x2     <- c( -1,  -1,  +1,  +1,   0,   0,  -a,  +a,   0,   0,   0)
y      <- c(207, 306, 257, 570, 100, 513, 315, 154, 630, 528, 609)

d52  <- data.frame(RadDos,Prime1,x1,x2,y)

summary( lm(y ~ (RadDos + Prime1)^2 + I(RadDos^2) + I(Prime1^2) , data=d52))


#  Darstellung der Wirkungsflche
#  ------------------------------
x <-  seq(100,300, length=51)
y <-  seq(  7, 22, length=21)

f <- function(x,y)
      {
      -608.44 +5.237*x +77*y + 0.0764*x*y -0.01265*x*x -3.243*y*y
      }
z <- outer(x, y, f)

par(bg = "white")
persp(x, y, z, theta = 30, phi = 10, expand=0.5, col="lightblue",
      xlab="RadDos", ylab="Prime1", zlab="Y", ticktype="detailed")

contour(x, y, z, xlab="RadDose", ylab="Prime1")

filled.contour(x, y, z, xlab="RadDose", ylab="Prime1")

# Daten aus Tabelle 5.8: steilster Anstieg
# ----------------------------------------
d53 <-subset(d51,subset=(Growth==+1) & Prime2==-1)

summary( lm(Ausbeute ~ (Prime1 + RadDos)^2 , data=d53))

# ------ THE END --------------------------------------------------
p  <- ppoints(length(Effekte))
y  <- qnorm(p)
x  <- sort(Effekte)
plot(x, y, xlab="Effekt", main="Normal plot")
text(x, y, labels=names(x), pos=4, cex=0.7)
#t.y <- quantile(y,probs=c(0.25,.75))
t.y <- qnorm(c(3/8, 1-3/8))
t.x <- quantile(x,probs=c(3/8,1-3/8))
t.X <- cbind(c(1,1),t.x)
abline(solve(t.X,t.y))

# Siehe R-Code von qqline. In R braucht man die Funktion Solve nicht
# sondern verwendet das Steigungsdreieck.

library(MASS)
library(lqs)
r.res <- rlm(y ~ x,)# method = c("M"))
abline(coef(r.res))


p  <- 0.5 + ppoints(length(Effekte))/2
y  <- qnorm(p)
x  <- sort(abs(Effekte))
plot(x, y, xlab="absoluter Effekt", main="Half-normal plot")
text(x, y, labels=names(x),pos=4, cex=0.7)

t.y <- median(y)
t.x <- median(x)
t.x;t.y
abline(a=0,b=t.y/t.x)
