# Chapter 10: Linear models

# 10.1 Polynomial regression

data(cystfibr) 
attach(cystfibr)
summary(lm(pemax~height+I(height^2)))
pred.frame <- data.frame(height=seq(110,180,2))
lm.pemax.hq <- lm(pemax~height+I(height^2))
predict(lm.pemax.hq,interval="pred",newdata=pred.frame)
pp <- predict(lm.pemax.hq,newdata=pred.frame,interval="pred")
pc <- predict(lm.pemax.hq,newdata=pred.frame,interval="conf")
plot(height,pemax,ylim=c(0,200))
matlines(pred.frame$height,pp,lty=c(1,2,2),col="black")
matlines(pred.frame$height,pc,lty=c(1,3,3),col="black")

# 10.2 Regression through the origin

x <- runif(20)
y <- 2*x+rnorm(20,0,0.3)
summary(lm(y~x))
summary(lm(y~x-1))
anova(lm(y~x))
anova(lm(y~x-1))

# 10.3 Design matrices and dummy variables

data(cystfibr)
attach(cystfibr)
model.matrix(pemax~height+weight)
data(red.cell.folate)
attach(red.cell.folate)
model.matrix(folate~ventilation)

# 10.4 Linearity over groups

data(fake.trypsin)
attach(fake.trypsin)
summary(fake.trypsin)
anova(lm(trypsin~grpf))
anova(lm(trypsin~grp))
model1 <- lm(trypsin~grp)
model2 <- lm(trypsin~grpf)
anova(model1,model2) 
anova(lm(trypsin~grp+grpf))

xbar.trypsin <- tapply(trypsin,grpf,mean)
stripchart(trypsin~grp,"jitter",jitter=.1,vertical=T,pch=20)
lines(1:6,xbar.trypsin,type="b",pch=4,cex=2,lty=2)
abline(lm(trypsin~grp))

n <- c(32,137, 38,44,16,4)
tryp.mean <- c(128,152,194,207,215,218)
tryp.sd <-c(50.9,58.5,49.3,66.3,60,14)
gr<-1:6
anova(lm(tryp.mean~gr+factor(gr),weights=n))
sum(tryp.sd^2*(n-1))
sum(n-1)
sum(tryp.sd^2*(n-1))/sum(n-1)
206698/3318.007 # F statistic for gr
1-pf(206698/3318.007,1,265) # p-value
4351/3318.007   # F statistic for factor(gr)
1-pf(4351/3318.007,4,265) # p-value

# 10.6 Two-way ANOVA with replication

data(coking)
attach(coking)
anova(lm(time~width*temp))
tapply(time,list(width,temp),mean)  

# 10.7 Analysis of covariance

data(hellung)
hellung
summary(hellung)
hellung$glucose <- factor(hellung$glucose, labels=c("Yes","No"))
summary(hellung)
attach(hellung)

plot(conc,diameter,pch=as.numeric(glucose))
locator <- function(n)list(x=4e5,y=26)
legend(locator(n=1),legend=c("glucose","no glucose"),pch=1:2)

plot(conc,diameter,pch=as.numeric(glucose),log="x")
plot(conc,diameter,pch=as.numeric(glucose),log="xy")

tethym.gluc <- hellung[glucose=="Yes",]
tethym.nogluc <- hellung[glucose=="No",]
lm.nogluc <- lm(log10(diameter)~ log10(conc),data=tethym.nogluc)
lm.gluc <- lm(log10(diameter)~ log10(conc),data=tethym.gluc)
abline(lm.nogluc)
abline(lm.gluc)

summary(lm(log10(diameter)~ log10(conc), data=tethym.gluc))
summary(lm(log10(diameter)~ log10(conc), data=tethym.nogluc))
summary(lm(log10(diameter)~log10(conc)*glucose))
summary(lm(log10(diameter)~log10(conc)+glucose))

var.test(lm.gluc,lm.nogluc)

anova(lm(log10(diameter)~ log10(conc)*glucose))
anova(lm(log10(diameter)~glucose+log10(conc)))
anova(lm(log10(diameter)~log10(conc)+ glucose))
t.test(log10(diameter)~glucose)

# 10.8 Diagnostics

data(thuesen)
attach(thuesen)
options(na.action="na.exclude")
lm.velo <- lm(short.velocity~blood.glucose)
par(mfrow=c(2,2), mex=0.6)
plot(lm.velo)
par(mfrow=c(1,1), mex=1)

par(mfrow=c(2,2), mex=0.6)
plot(rstandard(lm.velo))
plot(rstudent(lm.velo))
plot(dffits(lm.velo),type="l")
matplot(dfbetas(lm.velo),type="l", col="black")
lines(sqrt(cooks.distance(lm.velo)), lwd=2)
par(mfrow=c(1,1), mex=1)

summary(lm(short.velocity~blood.glucose, subset=-13))
cookd <- cooks.distance(lm(pemax~height+weight))
cookd <- cookd/max(cookd)
cook.colors <- gray(1-sqrt(cookd))
plot(height,weight,bg=cook.colors,pch=21,cex=1.5)
points(height,weight,pch=1,cex=1.5)

data(secher)
attach(secher)
rst <- rstudent(lm(log10(bwt)~log10(ad)+log10(bpd)))
range(rst)
rst <- rst/3.71
plot(ad,bpd,log="xy",bg=gray(1-abs(rst)),
     pch=ifelse(rst>0,24,25), cex=1.5)
