Small Log to Illustrate Factors and Linear/GL Models in Splus Using Them ============================================== 4/23/04 We will use the Bass data again as an example. > Basstmp <- Bass[,c(1,3:5,12)] tmpvec <- character(53) tmpvec[Basstmp$Alk < 15] <- "A" tmpvec[Basstmp$Alk >= 15 & Basstmp$Alk < 40] <- "B" tmpvec[Basstmp$Alk >= 40] <- "C" Basstmp$Alk <- factor(tmpvec) Basstmp$pH <- factor(Basstmp$pH) > levels(Basstmp$Alk) [1] "A" "B" "C" > contrasts(Basstmp$Alk) B C A 0 0 B 1 0 C 0 1 > Basstmp$pH <- C(Basstmp$pH, helmert) > contrasts(Basstmp$pH) [,1] [,2] [,3] [,4] [,5] 4 -1 -1 -1 -1 -1 5 1 -1 -1 -1 -1 6 0 2 -1 -1 -1 7 0 0 3 -1 -1 8 0 0 0 4 -1 9 0 0 0 0 5 > lmBass.tmp <- lm(logAvM ~ . , data=Basstmp)> lmBass.tmp Call: lm(formula = logAvM ~ ., data = Basstmp) Coefficients: (Intercept) AlkB AlkC pH1 pH2 pH3 -0.8350372 -0.3258071 -1.329827 -0.07429764 -0.06790021 -0.05839478 pH4 pH5 Calc agedat 0.01472259 -0.02283154 0.000791893 0.5687784 Degrees of freedom: 53 total; 43 residual Residual standard error: 0.6283208 > dim(model.matrix(lmBass.tmp)) [1] 53 10 > Basstmp[c(1,15),] logAvM Alk pH Calc agedat Alligator 0.2070142 A 6 3.0 1 Farm13 -2.9957323 C 8 86.5 0 > model.matrix(lmBass.tmp)[c(1,15),] (Intercept) AlkB AlkC pH1 pH2 pH3 pH4 pH5 Calc agedat Alligator 1 0 0 0 2 -1 -1 -1 3.0 1 Farm13 1 0 1 0 0 0 4 -1 86.5 0 > lmBass.tmp2 <- update(lmBass.tmp, formula = .~. + Alk:agedat) > dimnames(model.matrix(lmBass.tmp2))[[2]] [1] "(Intercept)" "AlkB" "AlkC" "pH1" "pH2" [6] "pH3" "pH4" "pH5" "Calc" "agedat" [11] "AlkBagedat" "AlkCagedat" > lmBass.tmp2$coef (Intercept) AlkB AlkC pH1 pH2 pH3 -0.3083213 -0.2735762 -2.847379 -0.0608514 -0.02846815 -0.04441668 pH4 pH5 Calc agedat AlkBagedat AlkCagedat 0.06340573 0.09410899 0.003151322 0.07044184 -0.0210463 1.480321 > newfac <- interaction(Basstmp$Alk, factor(Basstmp$agedat)) > lmBass.tmp3 <- update(lmBass.tmp, formula = .~ pH + Calc + newfac) > lmBass.tmp3 Call: lm(formula = logAvM ~ pH + Calc + newfac, data = Basstmp) Coefficients: (Intercept) pH1 pH2 pH3 pH4 pH5 -0.3083213 -0.0608514 -0.02846815 -0.04441668 0.06340573 0.09410899 Calc newfacB.0 newfacC.0 newfacA.1 newfacB.1 newfacC.1 0.003151322 -0.2735762 -2.847379 0.07044184 -0.2241807 -1.296616 Degrees of freedom: 53 total; 41 residual Residual standard error: 0.5790415 > lmBass.tmp2 Call: lm(formula = logAvM ~ Alk + pH + Calc + agedat + Alk:agedat, data = Basstmp) Coefficients: (Intercept) AlkB AlkC pH1 pH2 pH3 -0.3083213 -0.2735762 -2.847379 -0.0608514 -0.02846815 -0.04441668 pH4 pH5 Calc agedat AlkBagedat AlkCagedat 0.06340573 0.09410899 0.003151322 0.07044184 -0.0210463 1.480321 Degrees of freedom: 53 total; 41 residual Residual standard error: 0.5790415 > levels(newfac) [1] "A.0" "B.0" "C.0" "A.1" "B.1" "C.1" > contrasts(newfac) B.0 C.0 A.1 B.1 C.1 A.0 0 0 0 0 0 B.0 1 0 0 0 0 C.0 0 1 0 0 0 A.1 0 0 1 0 0 B.1 0 0 0 1 0 C.1 0 0 0 0 1 > names(summary(lmBass.tmp2)) [1] "call" "terms" "residuals" "coefficients" "sigma" [6] "df" "r.squared" "fstatistic" "cov.unscaled" "correlation" > diag(summary(lmBass.tmp2)$cov.unsc) [1] 0.3274954841 1.4949280661 1.5379438476 0.0934975568 0.0232323935 [6] 0.0167495396 0.0206899671 0.0232150644 0.0001052244 0.2962471445 [11] 1.3917615352 0.7504732289 > diag(solve(t(model.matrix(lmBass.tmp2)) %*% model.matrix(lmBass.tmp2))) [1] 0.3274954841 1.4949280661 1.5379438476 0.0934975568 0.0232323935 [6] 0.0167495396 0.0206899671 0.0232150644 0.0001052244 0.2962471445 [11] 1.3917615352 0.7504732289 ### So for actual covariance matrix of estimated coefficients, ### multiply the cov.unscaled matrix by $sigma^2 . ### That could then be used to find confidence intervals and ### hypothesis tests for significant "contrasts".