#1234567890123456789012345678901234567890123456789012345678901234567890123456789
#Ruth Meili
## Aufgabe 1 zu rg2e-1, NDK7

d.geyser <- read.table("http://stat.ethz.ch/Teaching/Datasets/NDK/geyser.dat", header=TRUE)
summary(d.geyser)

par(mfrow=c(2,2))
boxplot(d.geyser$x,main="Boxplot: Geysir-Daten")
hist(d.geyser$x,breaks=10)
hist(d.geyser$x,breaks=20)
hist(d.geyser$x,breaks=30)
str(d.geyser)
## a)
par(mfrow=c(1,1))
plot(density(d.geyser$x, bw = 0.35), type='l')
rug(d.geyser$x)

par(mfrow=c(3,2))
for(h in c(1:3,5,7,10)/50) {
  plot(density(d.geyser$x, bw= h), xlab='', ylab='',
       main = paste("bandwidth h =", format(h)), type='l', xlim = c(1.25, 5.5), ylim = c(0,1.2))
  rug(d.geyser$x)
}

## b)
t.R <- diff(quantile(d.geyser$x, probs=c(0.25,0.75)))/1.34 ## 1.710075
sqrt(var(d.geyser$x))  ## 1.141371

t.h <- 0.9 * sqrt(var(d.geyser$x))/ length(d.geyser$x)^(1/5) ##0.334777
plot(density(d.geyser$x, , bw = t.h), type='l')



#1234567890123456789012345678901234567890123456789012345678901234567890123456789
#Ruth Meili
## Aufgabe 2 zu rg2e-1, NDK7
################################################################################

# a):
# mit sd=0.2 als "wenig rauschen" und sd=0.5 als "viel Rauschen" 

# Initialisierung der Simulation
set.seed(155)

# Grenzen des Definitionsbereichs
 a<-0
 b<-7

# Parameter der Stichprobe 
n1<-50
n2<-100
sd1<-0.2
sd2<-0.5

f.reg<-function(x){dnorm(x,3,0.5)+dnorm(x,5,0.2)}
x.n1<-seq(a,b,len=n1)
Y1.n1<-f.reg(x.n1)
Y2.n1<-f.reg(x.n1)+rnorm(n1,mean=0,sd=sd1)
Y3.n1<-f.reg(x.n1)+rnorm(n1,mean=0,sd=sd2)

x.n2<-seq(a,b,len=n2)
Y1.n2<-f.reg(x.n2)
Y2.n2<-f.reg(x.n2)+rnorm(n2,mean=0,sd=sd1)
Y3.n2<-f.reg(x.n2)+rnorm(n2,mean=0,sd=sd2)

par(mfrow=c(2,3))
plot(x.n1,Y1.n1,main="kein Rauschen")
plot(x.n1,Y2.n1,main="Rauschen mit sd=0.2")
plot(x.n1,Y3.n1,main="Rauschen mit sd=0.5")
plot(x.n2,Y1.n2,main="keine Rauschen")
plot(x.n2,Y2.n2,main="Rauschen mit sd=0.2")
plot(x.n2,Y3.n2,main="Rauschen mit sd=0.5")
###############################################################################

# Loesung fuer b)

# Polynom 1. Grades
coef1.Y1.n1<-lsfit(x.n1,Y1.n1)$coef
coef1.Y2.n1<-lsfit(x.n1,Y2.n1)$coef
coef1.Y3.n1<-lsfit(x.n1,Y3.n1)$coef
coef1.Y1.n2<-lsfit(x.n2,Y1.n2)$coef
coef1.Y2.n2<-lsfit(x.n2,Y2.n2)$coef
coef1.Y3.n2<-lsfit(x.n2,Y3.n2)$coef

t.grid<-seq(a,b,0.01)

fit1.Y1.n1<-coef1.Y1.n1[1]+coef1.Y1.n1[2]*t.grid
fit1.Y2.n1<-coef1.Y2.n1[1]+coef1.Y2.n1[2]*t.grid
fit1.Y3.n1<-coef1.Y3.n1[1]+coef1.Y3.n1[2]*t.grid
fit1.Y1.n2<-coef1.Y1.n2[1]+coef1.Y1.n2[2]*t.grid
fit1.Y2.n2<-coef1.Y2.n2[1]+coef1.Y2.n2[2]*t.grid
fit1.Y3.n2<-coef1.Y3.n2[1]+coef1.Y3.n2[2]*t.grid

# Polynom 3. Grades
coef3.Y1.n1<-lsfit(cbind(x.n1,x.n1^2,x.n1^3),Y1.n1)$coef
coef3.Y2.n1<-lsfit(cbind(x.n1,x.n1^2,x.n1^3),Y2.n1)$coef
coef3.Y3.n1<-lsfit(cbind(x.n1,x.n1^2,x.n1^3),Y3.n1)$coef
coef3.Y1.n2<-lsfit(cbind(x.n2,x.n2^2,x.n2^3),Y1.n2)$coef
coef3.Y2.n2<-lsfit(cbind(x.n2,x.n2^2,x.n2^3),Y2.n2)$coef
coef3.Y3.n2<-lsfit(cbind(x.n2,x.n2^2,x.n2^3),Y3.n2)$coef

fit3.Y1.n1<-coef3.Y1.n1[1]+coef3.Y1.n1[2]*t.grid+coef3.Y1.n1[3]*t.grid^2+coef3.Y1.n1[4]*t.grid^3
fit3.Y2.n1<-coef3.Y2.n1[1]+coef3.Y2.n1[2]*t.grid+coef3.Y2.n1[3]*t.grid^2+coef3.Y2.n1[4]*t.grid^3
fit3.Y3.n1<-coef3.Y3.n1[1]+coef3.Y3.n1[2]*t.grid+coef3.Y3.n1[3]*t.grid^2+coef3.Y3.n1[4]*t.grid^3
fit3.Y1.n2<-coef3.Y1.n2[1]+coef3.Y1.n2[2]*t.grid+coef3.Y1.n2[3]*t.grid^2+coef3.Y1.n2[4]*t.grid^3
fit3.Y2.n2<-coef3.Y2.n2[1]+coef3.Y2.n2[2]*t.grid+coef3.Y2.n2[3]*t.grid^2+coef3.Y2.n2[4]*t.grid^3
fit3.Y3.n2<-coef3.Y3.n2[1]+coef3.Y3.n2[2]*t.grid+coef3.Y3.n2[3]*t.grid^2+coef3.Y3.n2[4]*t.grid^3

# ISE
t.scale<-(b-a)/(length(t.grid)-1)

ise1.Y1.n1<-sum((fit1.Y1.n1-f.reg(t.grid))^2)*t.scale
ise1.Y2.n1<-sum((fit1.Y2.n1-f.reg(t.grid))^2)*t.scale
ise1.Y3.n1<-sum((fit1.Y3.n1-f.reg(t.grid))^2)*t.scale
ise1.Y1.n2<-sum((fit1.Y1.n2-f.reg(t.grid))^2)*t.scale
ise1.Y2.n2<-sum((fit1.Y2.n2-f.reg(t.grid))^2)*t.scale
ise1.Y3.n2<-sum((fit1.Y3.n2-f.reg(t.grid))^2)*t.scale

ise3.Y1.n1<-sum((fit3.Y1.n1-f.reg(t.grid))^2)*t.scale
ise3.Y2.n1<-sum((fit3.Y2.n1-f.reg(t.grid))^2)*t.scale
ise3.Y3.n1<-sum((fit3.Y3.n1-f.reg(t.grid))^2)*t.scale
ise3.Y1.n2<-sum((fit3.Y1.n2-f.reg(t.grid))^2)*t.scale
ise3.Y2.n2<-sum((fit3.Y2.n2-f.reg(t.grid))^2)*t.scale
ise3.Y3.n2<-sum((fit3.Y3.n2-f.reg(t.grid))^2)*t.scale

##############################################################################

# c)
library(lokern)
fitnp.Y1.n1<-glkerns(x.n1,Y1.n1,x.out=t.grid)$est
fitnp.Y2.n1<-glkerns(x.n1,Y2.n1,x.out=t.grid)$est
fitnp.Y3.n1<-glkerns(x.n1,Y3.n1,x.out=t.grid)$est
fitnp.Y1.n2<-glkerns(x.n2,Y1.n2,x.out=t.grid)$est
fitnp.Y2.n2<-glkerns(x.n2,Y2.n2,x.out=t.grid)$est
fitnp.Y3.n2<-glkerns(x.n2,Y3.n2,x.out=t.grid)$est

(isenp.Y1.n1<-sum((fitnp.Y1.n1-f.reg(t.grid))^2)*t.scale)
(isenp.Y2.n1<-sum((fitnp.Y2.n1-f.reg(t.grid))^2)*t.scale)
(isenp.Y3.n1<-sum((fitnp.Y3.n1-f.reg(t.grid))^2)*t.scale)
(isenp.Y1.n2<-sum((fitnp.Y1.n2-f.reg(t.grid))^2)*t.scale)
(isenp.Y2.n2<-sum((fitnp.Y2.n2-f.reg(t.grid))^2)*t.scale)
(isenp.Y3.n2<-sum((fitnp.Y3.n2-f.reg(t.grid))^2)*t.scale)

###############################################################################

# Grafische Darstellung von a)-c):

par(mfrow=c(2,3))
par(mfrow=c(3,2))
ymin<-min(Y1.n1,Y2.n1,Y3.n1,Y1.n2,Y2.n2,Y3.n2)
ymax<-max(Y1.n1,Y2.n1,Y3.n1,Y1.n2,Y2.n2,Y3.n2)

plot(x.n1,Y1.n1,ylim=c(ymin,ymax))
lines(t.grid,f.reg(t.grid),col=1)
lines(t.grid,fit1.Y1.n1,col=2)
lines(t.grid,fit3.Y1.n1,col=3)
lines(t.grid,fitnp.Y1.n1,col=4)
title("kleine Stichprobe, ohne Rauschen")
legend(0,2.5,legend=c("wahre Kurve","Polynom 1. Grades","Polynom 3. Grades","Nichtparametrische Anpassung"),col = c(1,2,3,4))

plot(x.n2,Y1.n2,ylim=c(ymin,ymax))
lines(t.grid,f.reg(t.grid),col=1)
lines(t.grid,fit1.Y1.n2,col=2)
lines(t.grid,fit3.Y1.n2,col=3)
lines(t.grid,fitnp.Y1.n2,col=4)
title("grosse Stichprobe, ohne Rauschen")
legend(0,2.5,legend=c("wahre Kurve","Polynom 1. Grades","Polynom 3. Grades","Nichtparametrische Anpassung"),col = c(1,2,3,4))

plot(x.n1,Y2.n1,ylim=c(ymin,ymax))
lines(t.grid,f.reg(t.grid),col=1)
lines(t.grid,fit1.Y2.n1,col=2)
lines(t.grid,fit3.Y2.n1,col=3)
lines(t.grid,fitnp.Y2.n1,col=4)
title("kleine Stichprobe, schwaches Rauschen")
legend(0,2.5,legend=c("wahre Kurve","Polynom 1. Grades","Polynom 3. Grades","Nichtparametrische Anpassung"),col = c(1,2,3,4))

plot(x.n2,Y2.n2,ylim=c(ymin,ymax))
lines(t.grid,f.reg(t.grid),col=1)
lines(t.grid,fit1.Y2.n2,col=2)
lines(t.grid,fit3.Y2.n2,col=3)
lines(t.grid,fitnp.Y2.n2,col=4)
title("grosse Stichprobe, schwaches Rauschen")
legend(0,2.5,legend=c("wahre Kurve","Polynom 1. Grades","Polynom 3. Grades","Nichtparametrische Anpassung"),col = c(1,2,3,4))

plot(x.n1,Y3.n1,ylim=c(ymin,ymax))
lines(t.grid,f.reg(t.grid),col=1)
lines(t.grid,fit1.Y2.n1,col=2)
lines(t.grid,fit3.Y2.n1,col=3)
lines(t.grid,fitnp.Y2.n1,col=4)
title("kleine Stichprobe, starkes  Rauschen")
legend(0,2.5,legend=c("wahre Kurve","Polynom 1. Grades","Polynom 3. Grades","Nichtparametrische Anpassung"),col = c(1,2,3,4))

plot(x.n2,Y3.n2,ylim=c(ymin,ymax))
lines(t.grid,f.reg(t.grid),col=1)
lines(t.grid,fit1.Y2.n2,col=2)
lines(t.grid,fit3.Y2.n2,col=3)
lines(t.grid,fitnp.Y2.n2,col=4)
title("grosse Stichprobe, starkes Rauschen")
legend(0,2.5,legend=c("wahre Kurve","Polynom 1. Grades","Polynom 3. Grades","Nichtparametrische Anpassung"),col = c(1,2,3,4))

###############################################################################

# d)
x0<-c(3,5)
(bias1.n1<-coef1.Y1.n1[1]+coef1.Y1.n1[2]*x0-f.reg(x0))
(bias3.n1<-coef3.Y1.n1[1]+coef3.Y1.n1[2]*x0+coef3.Y1.n1[3]*x0^2+coef3.Y1.n1[4]*x0^3-f.reg(x0))
(biasnp.n1<-glkerns(x.n1,Y1.n1,x.out=x0)$est-f.reg(x0))


bias1.n2<-coef1.Y1.n2[1]+coef1.Y1.n2[2]*x0-f.reg(x0)
bias3.n2<-coef3.Y1.n2[1]+coef3.Y1.n2[2]*x0+coef3.Y1.n2[3]*x0^2+coef3.Y1.n2[4]*x0^3-f.reg(x0)
biasnp.n2<-glkerns(x.n2,Y1.n2,x.out=x0)$est-f.reg(x0)


#############################################################################

# e)

# Wiederholung von a)-d) mit folgenden Parameter fuer die Regression-Funktion:

f.reg<-function(x){exp(-x)}
a<-0
b<-7
# Oder mit
f.reg<-function(x){exp(x)/(1+exp(x))}
a<--7
b<-7

# f)
source("ftp://stat.ethz.ch/NDK/Source-NDK-7/rg2e.R")
f.compare.ise(n=50,sd=0.1,nof=1,b=7,dpol=3,N=10)

