#1234567890123456789012345678901234567890123456789012345678901234567890123456789
#Ruth Meili

# Aufgabe 1 der Serie 3:

# Teilaufgabe a)
# Daten einlesen:
d.air<-read.table("http://stat.ethz.ch/Teaching/Datasets/NDK/ozon.newyork.dat",header=T)
d.ozone<-d.air[,1]   # Ozonwerte
d.logozone<-log(d.ozone)  # Logarithmierte Ozonwerte

# Dichteschtzer und Boxplots:
par(mfrow=c(2,2))
plot(density(d.ozone))
plot(density(d.logozone))
boxplot(d.ozone)
boxplot(d.logozone)

# Kurtosis und Skewness Funktionen:
kurt<-function(x)
{
        xbar <- mean(x)
        n <- length(x)
        s <- sqrt(var(x)) * sqrt((n - 1)/n)
        num <- sum((x - xbar)^4)/n
        den <- s^4
        return(num/den - 3)
}
skew<-function(x)
{
        xbar <- mean(x)
        n <- length(x)
        s <- sqrt(var(x)) * sqrt((n - 1)/n)
        num <- sum((x - xbar)^3)/n
        den <- s^3
        return(num/den)
}

cat("-------------------------------------------------------------","\n")
print("skewness and kurtosis for d.ozone:")
print(c(skew(d.ozone),kurt(d.ozone)))
cat("-------------------------------------------------------------","\n")
print("skewness and kurtosis for log d.ozone:")
print(c(skew(d.logozone),kurt(d.logozone)))
cat("-------------------------------------------------------------","\n")
print("skewness and kurtosis for log ozone without 17:")
print(c(skew(d.logozone[-17]),kurt(d.logozone[-17])))
cat("-------------------------------------------------------------","\n")

#Teilaufgabe b)
# Scatterplots
d.air<-cbind(d.air,d.logozone)
pairs(d.air,panel=panel.smooth)

# Teilaufgabe c)
d.air<-d.air[-17,] # Ausreisser entfernen

# Lineares Model mit ozone als Zielvariable:
lm.oz<-lm(formula = ozone ~ radiation+temperature+wind,data=d.air)
res.lm.oz<-lm.oz$residuals
fit.lm.oz<-lm.oz$fitted.values
print(summary(lm.oz))

# Lineares Model mit Log ozone als Zielvariable:

lm.loz<-lm(formula = d.logozone ~ radiation+temperature+wind,data=d.air)
res.lm.loz<-lm.loz$residuals
fit.lm.loz<-lm.loz$fitted.values
print(summary(lm.loz))
# Die drei Variablen sind klar signifikant

# Teilaufgabe d)
library(mgcv)
# Additives Model mit ozone als Zielvariable:
am.oz<-gam(formula = ozone ~ s(radiation)+s(temperature)+s(wind),data=d.air)
res.am.oz<-am.oz$residuals
fit.am.oz<-am.oz$fitted.values
print(summary(am.oz))

# Additives Model mit d.logozone als Zielvariable:
am.loz<-gam(formula = d.logozone ~ s(radiation)+s(temperature)+s(wind),data=d.air)
res.am.loz<-am.loz$residuals
fit.am.loz<-am.loz$fitted.values
print(summary(am.loz))
# Die drei Variablen sind klar signifikant


# Graphische Darstellung von c) und d)

# Residuen versus Fitted values (R^2 wird auch berechnet)
par(mfrow=c(2,2))
plot(fit.lm.oz,res.lm.oz) # Tukey-Anscomb-Plot
abline(h=0,lty=2)
r2<-cor(fit.lm.oz,res.lm.oz+fit.lm.oz)^2
title(paste("Linear without transformation R2=",round(r2,2)))
plot(fit.lm.loz,res.lm.loz)
abline(h=0,lty=2)
r2<-cor(fit.lm.loz,res.lm.loz+fit.lm.loz)^2
title(paste("Linear with transformation R2=",round(r2,2)))
plot(fit.am.oz,res.am.oz)
abline(h=0,lty=2)
r2<-cor(fit.am.oz,res.am.oz+fit.am.oz)^2
title(paste("Additive without transformation R2=",round(r2,2)))
plot(fit.am.loz,res.am.loz)
abline(h=0,lty=2)
r2<-cor(fit.am.loz,res.am.loz+fit.am.loz)^2
title(paste("Additive with transformation R2=",round(r2,2)))
# Die R2 sind hher beim "Additiven Modell".
# Auch die Residuen sehen besser aus beim "additiven Modell".

#------------------------------------------------------------------------

# Absolut-Residuen versus Fitted values 
# (Spearman Korrelation wird auch berechnet)
par(mfrow=c(2,2))
plot(fit.lm.oz,abs(res.lm.oz))
abline(h=0,lty=2)
r<-cor(rank(fit.lm.oz),rank(abs(res.lm.oz)))
title(paste("Linear without transformation r=",round(r,2)))
plot(fit.lm.loz,abs(res.lm.loz))
abline(h=0,lty=2)
r<-cor(rank(fit.lm.loz),rank(abs(res.lm.loz)))
title(paste("Linear with transformation r=",round(r,2)))
plot(fit.am.oz,abs(res.am.oz))
abline(h=0,lty=2)
r<-cor(rank(fit.am.oz),rank(abs(res.am.oz)))
title(paste("Additive without transformation r=",round(r,2)))
plot(fit.am.loz,abs(res.am.loz))
abline(h=0,lty=2)
r<-cor(rank(fit.am.loz),rank(abs(res.am.loz)))
title(paste("Additive with transformation r=",round(r,2)))
# Konstanz der Varianz ist besser fr die "additiven Modelle", aber nicht
# unbedingt fr die Log-Transformation.

# Teilaufgabe e)
# Man kann diese Aufgabe entweder mit ozone oder logozone als Zielvariable 
# durchfhren

# Zuerst die 8 Modelle mit ozone:
am.oz0<-gam(formula = ozone ~ s(radiation)+s(temperature)+s(wind),data=d.air)
r2<-cor(am.oz0$fitted.values,am.oz0$fitted.values+am.oz0$residuals)^2
print(summary(am.oz0))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz1<-gam(formula = ozone ~ radiation+s(temperature)+s(wind),data=d.air)
r2<-cor(am.oz1$fitted.values,am.oz1$fitted.values+am.oz1$residuals)^2
print(summary(am.oz1))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz2<-gam(formula = ozone ~ s(radiation)+temperature+s(wind),data=d.air)
r2<-cor(am.oz2$fitted.values,am.oz2$fitted.values+am.oz2$residuals)^2
print(summary(am.oz2))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz3<-gam(formula = ozone ~ s(radiation)+s(temperature)+wind,data=d.air)
r2<-cor(am.oz3$fitted.values,am.oz3$fitted.values+am.oz3$residuals)^2
print(summary(am.oz3))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz4<-gam(formula = ozone ~ radiation+temperature+s(wind),data=d.air)
r2<-cor(am.oz4$fitted.values,am.oz4$fitted.values+am.oz4$residuals)^2
print(summary(am.oz4))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz5<-gam(formula = ozone ~ radiation+s(temperature)+wind,data=d.air)
r2<-cor(am.oz5$fitted.values,am.oz5$fitted.values+am.oz5$residuals)^2
print(summary(am.oz5))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz6<-gam(formula = ozone ~ s(radiation)+temperature+wind,data=d.air)
r2<-cor(am.oz6$fitted.values,am.oz6$fitted.values+am.oz6$residuals)^2
print(summary(am.oz6))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.oz7<-gam(formula = ozone ~ radiation+temperature+wind,data=d.air)
drop1(am.oz7)
r2<-cor(am.oz7$fitted.values,am.oz7$fitted.values+am.oz7$residuals)^2
print(summary(am.oz7))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")


# Dasselbe mit logozone als Zielvariable:

am.loz0<-gam(formula = d.logozone ~ s(radiation)+s(temperature)+s(wind),data=d.air)
r2<-cor(am.loz0$fitted.values,am.loz0$fitted.values+am.loz0$residuals)^2
print(summary(am.loz0))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz1<-gam(formula = d.logozone ~ radiation+s(temperature)+s(wind),data=d.air)
r2<-cor(am.loz1$fitted.values,am.loz1$fitted.values+am.loz1$residuals)^2
print(summary(am.loz1))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz2<-gam(formula = d.logozone ~ s(radiation)+temperature+s(wind),data=d.air)
r2<-cor(am.loz2$fitted.values,am.loz2$fitted.values+am.loz2$residuals)^2
print(summary(am.loz2))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz3<-gam(formula = d.logozone ~ s(radiation)+s(temperature)+wind,data=d.air)
r2<-cor(am.loz3$fitted.values,am.loz3$fitted.values+am.loz3$residuals)^2
print(summary(am.loz3))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz4<-gam(formula = d.logozone ~ radiation+temperature+s(wind),data=d.air)
r2<-cor(am.loz4$fitted.values,am.loz4$fitted.values+am.loz4$residuals)^2
print(summary(am.loz4))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz5<-gam(formula = d.logozone ~ radiation+s(temperature)+wind,data=d.air)
r2<-cor(am.loz5$fitted.values,am.loz5$fitted.values+am.loz5$residuals)^2
print(summary(am.loz5))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz6<-gam(formula = d.logozone ~ s(radiation)+temperature+wind,data=d.air)
r2<-cor(am.loz6$fitted.values,am.loz6$fitted.values+am.loz6$residuals)^2
print(summary(am.loz6))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")
am.loz7<-gam(formula = d.logozone ~ radiation+temperature+wind,data=d.air)
r2<-cor(am.loz7$fitted.values,am.loz7$fitted.values+am.loz7$residuals)^2
print(summary(am.loz7))
cat("R2=",round(r2,3),"\n")
cat("-------------------------------------------------------------","\n")


#1234567890123456789012345678901234567890123456789012345678901234567890123456789
#Ruth Meili

# Aufgabe 2 von Serie 3

# Teilaufgabe a)
# dAten einlesen
cantmp<-read.table("http://stat.ethz.ch/Teaching/Datasets/NDK/temperatur2.dat")
 n <-dim(cantmp)[1]
 ind <-seq(.5,11.5,1)
 lab <-   c("St. John_s",    "Charlottetown", "Halifax" ,
                   "Sydney",        "Yarmouth",      "Fredericton",
                   "Arvida",        "Montreal",      "Quebec City",
                   "Schefferville", "Sherbrooke",    "Kapuskasing",
                   "London",        "Ottawa",        "Thunder Bay",
                   "Toronto",       "Churchill",     "The Pas",
                   "Winnipeg",      "Prince Albert", "Regina",
                   "Beaverlodge",   "Calgary",       "Edmonton",
                   "Kamloops",      "Prince George", "Prince Rupert",
                   "Vancouver",     "Victoria",      "Dawson",
                   "Whitehorse",    "Frobisher Bay", "Inuvik",
                   "Resolute",      "Yellowknife")
 
 row.names(cantmp) <- lab
# Graphische Darstelllung der Daten:
plot(0,0,xlim=c(0,12),ylim=c(min(cantmp),max(cantmp)),type="n",xlab="month",ylab="temperature")
for(i in 1:n) points(ind,cantmp[i,],pch=i)
for(i in 1:n) lines(ind,cantmp[i,],lty=2)

# Teilaufgabe b)
grid<-seq(0,12,0.1) # Definition des feineren Gitters
N<-length(grid)

# Kurvenglttung  mit glkerns:
library(lokern) 
fit<-matrix(0,ncol=N,nrow=n)
for(i in 1:n) fit[i,]<-glkerns(ind,cantmp[i,],x.out=grid)$est
par(mfrow=c(1,1))
plot(0,0,xlim=c(0,12),ylim=c(min(cantmp),max(cantmp)),type="n",xlab="month",ylab="temperature")
for(i in 1:n) lines(grid,fit[i,],lty=i)

# Hauptkomponentenanalyse mit princomp()
library(mva)
t.hk <- princomp(fit,cor=F)
str(t.hk$loadings)
t.hk$loadings[,1]
# Darstellung der Loading-Funktionen:
par(mfrow=c(2,2))
plot(grid,t.hk$loadings[,1])
plot(grid,t.hk$loadings[,2])
plot(grid,t.hk$loadings[,3])
plot(grid,t.hk$loadings[,4])

var.expl <- t.hk$sdev[1:4]^2/sum(t.hk$sdev^2) # Erklrte Varianz
screeplot(t.hk,npcs=4) # Plots der erklrten Varianz
#--------------------------------------------------------------------
# Alternativ:
# Haupt-Komponenten berechnen als Eigenvektoren der Kovarianz-Matrix:
v<-eigen(var(fit))[[1]] # Eigenwerte
P<-eigen(var(fit))[[2]] # Eigenvektoren
# Darstellung der Loading-Funktionen:
par(mfrow=c(2,2))
plot(grid,P[,1])# Erster Eigenvektor
abline(h=0)
plot(grid,P[,2])# zweiter Eigenvektor
abline(h=0)
plot(grid,P[,3])# dritter Eigenvektor
abline(h=0)
plot(grid,P[,4]) # Vierter Eigenvektor
abline(h=0)
locator(1)
# Varianz-Anteil fuer die ersten 4 Haupt-Komponenten:
cat("-------------------------------------------------------","\n")
cat("Percentage of variance of principal components:","\n")
print(round(v[1:4]/sum(v),3))


# Teilaufgabe d)

# Varimax rotation:
Q<-varimax(P[,1:4])[[1]]
# analog in der princomp-Variante:
Q2 <- varimax(-t.hk$loadings[,1:4])[[1]]
        #Vorzeichen sind bei princomp- und Handberechnung umgekehrt.

for(i in 1:4) if(sum(Q[,i])<0) Q[,i]<--Q[,i]

# Loading-Funktionen fuer Varimax:
par(mfrow=c(2,2))
plot(grid,Q[,1])
abline(h=0)
plot(grid,Q[,2])
abline(h=0)
plot(grid,Q[,3])
abline(h=0)
plot(grid,Q[,4])
abline(h=0)
locator(1)

# Teilaufgabe e)

# Daten zentrieren:
fitcent<-scale(fit,scale=F)

# Scores bezueglich Hauptkomponenten:
Z<-fitcent%*%P
# Alternativ mit princomp-Methode:
Z2 <- fitcent%*%t.hk$loadings
# Scores bezueglich varimax:
V<-fitcent%*%Q
# Alternativ mit princomp-Methode:
V2 <- fitcent%*%Q2

# Varianz von Varimax Komponenten:
cat("Percentage of variance of rotated principal components:","\n")
print(round(diag(var(V))/sum(v),3))
barplot(round(diag(var(V))/sum(v),3),xlab="Varianzkomponenten",ylab="Varianzanteil")
# Plots der Scores bezueglich den ersten zwei Hauptkomponenten:
par(mfrow=c(1,1))
plot(Z[,1],Z[,2],type="n")
text(Z[,1],Z[,2],row.names(cantmp))

# Alternativ: Biplot
biplot(t.hk,choices=1:2,scale=1,xlabs=rownames(cantmp),expand=0)
# expand=0 bedeutet, dass die Pfeile der Variablen nicht gezeichnet werden

# Plots der Scores bezglich der ersten und dritten Varimax-Komponenten:
par(mfrow=c(1,1))
plot(V[,1],V[,3],type="n")
text(V[,1],V[,3],row.names(cantmp))
