kid.score 学生成绩
mom.hs 母亲是否有高中学历
mom.iq 母亲智商

    kid.score mom.hs mom.iq
1      91.18      0 121.50
2      70.00      0 102.00
3      80.00      0 107.00
4      93.31      1 110.00
5      98.00      1 120.00
6      99.00      1 123.00
7      92.88      1 126.24
8      95.87      1 114.56
9      93.73      1 100.00
10     80.70      0 108.72
11     95.44      1 128.00
12     92.03      1 110.00
13     90.00      0 129.00
14     95.01      1 120.40
15     94.59      1  97.04
16     92.45      0 120.00
17     88.00      0 100.00
18     83.50      0  99.00
19     98.00      1 112.00
20     86.50      0 100.20
21     90.75      0 102.88
22     91.60      0 104.20
23     94.16      1 120.40
24     96.40      1 122.40


data1 <- read.csv("chapter 3 data1.csv")
kid.score <- data1$kid.score
mom.hs <- data1$mom.hs
mom.iq <- data1$mom.iq
fit.3 <- lm (kid.score ~ mom.hs + mom.iq)


lm(formula = kid.score ~ mom.hs + mom.iq)

     Min       1Q   Median       3Q      Max 
-16.9100  -2.9430   0.7737   4.6099   8.9200 

            Estimate Std. Error t value Pr(>|t|)  
(Intercept) 75.74834   31.49762   2.405   0.0255 *
mom.hs       0.14823    0.18918   0.784   0.4420  
mom.iq      -0.07502    0.05608  -1.338   0.1953  
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.734 on 21 degrees of freedom
Multiple R-squared:  0.09141,	Adjusted R-squared:  0.004876 
F-statistic: 1.056 on 2 and 21 DF,  p-value: 0.3655



fit.2 <- lm (kid.score ~ mom.iq)
plot (mom.iq, kid.score, xlab="Mother IQ score", ylab="Child test score")
curve (coef(fit.2)[1] + coef(fit.2)[2]*x, add=TRUE)#绘制fit.2的回归直线
#curve (cbind(1,x) %*% coef(fit.2), add=TRUE)也可以


fit.3 <- lm (kid.score ~ mom.hs + mom.iq)
colors <- ifelse (mom.hs==1, "black", "gray")
plot (mom.iq, kid.score, xlab="Mother IQ score", ylab="Child test score",
      col=colors, pch=20)
#plot (mom.iq, kid.score, xlab="Mother IQ score", ylab="Child test score", type="n")
#points (mom.iq[mom.hs==1], kid.score[mom.hs==1], pch=20, col="black")
#points (mom.iq[mom.hs==0], kid.score[mom.hs==0], pch=20, col="gray")
curve (cbind (1, 1, x) %*% coef(fit.3), add=TRUE, col="black")
curve (cbind (1, 0, x) %*% coef(fit.3), add=TRUE, col="gray")



sim <- function(input){
  n.sim <- 1000
  beta_ori <- summary(input)$coefficients[,1]
  std_ori <- summary(input)$coefficients[,2]
  beta <- rnorm(n.sim,mean=beta_ori[1],sd=std_ori[1])
  for(i in 2:length(beta_ori)){
    beta_plus <- rnorm(n.sim,mean=beta_ori[i],sd=std_ori[i])
    beta <- cbind(beta,beta_plus)
fit.2.sim <- sim(fit.2)
plot (mom.iq, kid.score, xlab="Mother IQ score", ylab="Child test score")
for (i in 1:10){
  curve (fit.2.sim[i,1] + fit.2.sim[i,2]*x, add=TRUE,col="gray")
curve (coef(fit.2)[1] + coef(fit.2)[2]*x, add=TRUE, col="black")

fit.3 <- lm (kid.score ~ mom.hs + mom.iq)
beta.hat <- coef(fit.3)
beta.sim <- sim(fit.3)
plot(mom.iq, kid.score, xlab="Mother IQ score", ylab="Child test score")
for (i in 1:10){
  curve (cbind (1, mean(mom.hs), x) %*% beta.sim[i,], lwd=.5,
         col="gray", add=TRUE)
curve (cbind (1, mean(mom.hs), x) %*% beta.hat, col="black", add=TRUE)
plot (mom.hs, kid.score, xlab="Mother completed high school",ylab="Child test score")
for (i in 1:10){
  curve (cbind (1, x, mean(mom.iq)) %*% beta.sim[i,], lwd=.5,
         col="gray", add=TRUE)
curve (cbind (1, x, mean(mom.iq)) %*% beta.hat, col="black", add=TRUE)

这些灰色的拟合线分布非常分散,IQ 对于 test-score 的影响的拟合曲线的斜率有的为正,有的为负,虽然大多数都为负,但可以看出这个影响是可疑的。



> x.new <- data.frame (mom.hs=1, mom.iq=100)
> x.new
  mom.hs mom.iq
1      1    100
> predict (fit.3, x.new, interval="prediction", level=0.95)
       fit      lwr      upr
1 92.62915 81.96868 103.2896

