Today we explore various type of ANOVA models and their implementation. First let’s read in some data and plot a histogram:
bumpus<-read.csv("Data/bumpus.csv",header=T)
bumpus.data<-log(as.matrix(bumpus[,(5:13)])) # matrix of log-linear measurements
sex<-as.factor(bumpus[,2])
surv<-as.factor(bumpus[,4])
SexBySurv<-as.factor(paste(sex,surv))
TotalLength<-bumpus.data[,1]
## Multi-group histogram
sexdata<-split(TotalLength,sex)
hist(sexdata$m,col="blue",freq=TRUE)
hist(sexdata$f,col="red",freq=TRUE,add=TRUE)
#ANOTHER WAY, SHOWING OVERLAP
hist(sexdata$m, col=rgb(0,0,0,1),freq=TRUE)
hist(sexdata$f, col=rgb(0.8,0.8,0.8,0.5), freq=TRUE,add=T)
A single-factor ANOVA fits a bivariate model of the form Y~X, where variation in Y is explained by variation among groups as coded in X. Here are several implementations, including two parametric and one permutational. Additionally, pairwise comparisons are shown using standard and permutation approaches:
# Some components of the linear model
model1<-lm(TotalLength~sex)
anova(model1) #Statistical summary
## Analysis of Variance Table
##
## Response: TotalLength
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 0.007460 0.0074597 16.667 7.617e-05 ***
## Residuals 134 0.059975 0.0004476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model1) #for ANOVA, these are means gp1 and dev for mean gp 2
## (Intercept) sexm
## 5.06220426 0.01542669
predict(model1) #Predicted values
## 1 2 3 4 5 6 7 8
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 9 10 11 12 13 14 15 16
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 17 18 19 20 21 22 23 24
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 25 26 27 28 29 30 31 32
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 33 34 35 36 37 38 39 40
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 41 42 43 44 45 46 47 48
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 49 50 51 52 53 54 55 56
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 57 58 59 60 61 62 63 64
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 65 66 67 68 69 70 71 72
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631
## 73 74 75 76 77 78 79 80
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631
## 81 82 83 84 85 86 87 88
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631
## 89 90 91 92 93 94 95 96
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631
## 97 98 99 100 101 102 103 104
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.062204
## 105 106 107 108 109 110 111 112
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204
## 113 114 115 116 117 118 119 120
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204
## 121 122 123 124 125 126 127 128
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204
## 129 130 131 132 133 134 135 136
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204
resid(model1) #Residuals
## 1 2 3 4 5
## -0.0406783460 0.0283145255 -0.0024571332 -0.0024571332 -0.0342058315
## 6 7 8 9 10
## 0.0037734166 -0.0406783460 0.0099653868 -0.0277749411 0.0161192524
## 11 12 13 14 15
## 0.0037734166 0.0099653868 -0.0213851430 0.0161192524 -0.0087267462
## 16 17 18 19 20
## 0.0037734166 -0.0150359154 -0.0024571332 -0.0150359154 0.0099653868
## 21 22 23 24 25
## -0.0024571332 -0.0024571332 0.0037734166 0.0099653868 0.0037734166
## 26 27 28 29 30
## 0.0099653868 -0.0024571332 0.0283145255 -0.0087267462 0.0037734166
## 31 32 33 34 35
## -0.0150359154 0.0037734166 -0.0087267462 0.0099653868 0.0222354794
## 36 37 38 39 40
## 0.0343568400 -0.0150359154 -0.0087267462 -0.0024571332 0.0099653868
## 41 42 43 44 45
## -0.0277749411 0.0037734166 0.0161192524 0.0343568400 -0.0277749411
## 46 47 48 49 50
## 0.0283145255 0.0283145255 0.0343568400 -0.0024571332 -0.0024571332
## 51 52 53 54 55
## -0.0150359154 -0.0277749411 -0.0024571332 -0.0150359154 -0.0213851430
## 56 57 58 59 60
## 0.0343568400 -0.0087267462 0.0283145255 -0.0024571332 -0.0213851430
## 61 62 63 64 65
## -0.0150359154 0.0222354794 0.0037734166 0.0343568400 -0.0024571332
## 66 67 68 69 70
## 0.0403628640 -0.0024571332 0.0037734166 -0.0471930270 0.0343568400
## 71 72 73 74 75
## -0.0277749411 0.0037734166 -0.0187791388 -0.0277749411 -0.0123482485
## 76 77 78 79 80
## 0.0161192524 0.0129695595 0.0161192524 -0.0383237349 -0.0024571332
## 81 82 83 84 85
## 0.0129695595 -0.0277749411 -0.0187791388 0.0099653868 -0.0059584504
## 86 87 88 89 90
## 0.0161192524 0.0437412182 0.0222354794 -0.0317663344 0.0161192524
## 91 92 93 94 95
## 0.0253920795 -0.0024571332 0.0253920795 -0.0024571332 0.0066999465
## 96 97 98 99 100
## -0.0150359154 0.0066999465 -0.0150359154 -0.0187791388 -0.0150359154
## 101 102 103 104 105
## 0.0253920795 -0.0342058315 -0.0383237349 -0.0123482485 -0.0252516533
## 106 107 108 109 110
## 0.0066999465 -0.0317663344 -0.0187791388 -0.0317663344 0.0315459451
## 111 112 113 114 115
## -0.0187791388 0.0315459451 -0.0123482485 0.0315459451 -0.0059584504
## 116 117 118 119 120
## 0.0066999465 -0.0187791388 0.0192001092 -0.0187791388 0.0376621721
## 121 122 123 124 125
## 0.0003907773 0.0253920795 -0.0317663344 0.0003907773 0.0129695595
## 126 127 128 129 130
## 0.0253920795 0.0192001092 0.0376621721 -0.0059584504 -0.0059584504
## 131 132 133 134 135
## -0.0123482485 0.0003907773 -0.0317663344 -0.0187791388 0.0315459451
## 136
## 0.0066999465
anova(lm(TotalLength~sex))
## Analysis of Variance Table
##
## Response: TotalLength
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 0.007460 0.0074597 16.667 7.617e-05 ***
## Residuals 134 0.059975 0.0004476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(sex,TotalLength,ylab="Total Length")
summary(aov(TotalLength~sex)) #another way to do univariate ANOVA
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 0.00746 0.007460 16.67 7.62e-05 ***
## Residuals 134 0.05998 0.000448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##### ANOVA via residual randomization
library(RRPP)
mydat <- rrpp.data.frame(TotalLength = TotalLength, sex = sex, surv=surv, SexBySurv = SexBySurv)
res <- lm.rrpp(TotalLength~sex, print.progress = FALSE, data = mydat)
anova(res) #note: summary components are part of the object: see res$ANOVA$ res$coef, etc.
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 16.667 2.9944 0.001 **
## Residuals 134 0.059975 0.0004476 0.88938
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex, data = mydat, print.progress = FALSE)
coef(res)
## [,1]
## (Intercept) 5.06220426
## sexm 0.01542669
There are several important components to note here. First, the parameters, SS, MS, F-values are identical to those obtained using standard parametric methods. So RRPP estimates identical parameters and summary values when compared to ‘standard’ methods. Note though that there are additional elements in the ANOVA table. Specifically, each term in the model has an R-squared value, and an empirically-derived effect size (Z-score). These can assist in interpreting the strength of signal across factors in the model. These become particularly important with multivariate response data.
Programming note: with RRPP, it is generally advisable to create an rrpp.dataframe (see above). R functions generally first search for data in a supplied data frame and if none are found, then search the global environment, which increases the probability of an error. The easiest way to assure there are no errors is to provide an rrpp.data.frame for lm.rrpp, thus eliminating some common problems with local versus global environmental variables.
anova(lm(TotalLength~SexBySurv)) ### Single-Factor ANOVA (4 groups)
## Analysis of Variance Table
##
## Response: TotalLength
## Df Sum Sq Mean Sq F value Pr(>F)
## SexBySurv 3 0.014515 0.0048382 12.068 4.957e-07 ***
## Residuals 132 0.052920 0.0004009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model2 <- lm.rrpp(TotalLength~SexBySurv, print.progress = FALSE, data = mydat) #via RRPP
anova(model2)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## SexBySurv 3 0.014515 0.0048382 0.21524 12.068 4.6318 0.001 **
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ SexBySurv, data = mydat, print.progress = FALSE)
#post-hoc tests
pairwise.t.test(TotalLength, SexBySurv, p.adj = "none") #standard
##
## Pairwise comparisons using t tests with pooled SD
##
## data: TotalLength and SexBySurv
##
## f FALSE f TRUE m FALSE
## f TRUE 0.259 - -
## m FALSE 1.2e-05 3.5e-07 -
## m TRUE 0.260 0.024 9.1e-05
##
## P value adjustment method: none
#pairwise comparisons via RRPP
posthoc <- pairwise(fit = model2,groups = SexBySurv, print.progress = FALSE)
summary(posthoc)
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## f FALSE:f TRUE 0.006555326 0.011860377 0.5780316 0.314
## f FALSE:m FALSE 0.022936308 0.010849405 3.2605314 0.001
## f FALSE:m TRUE 0.005333238 0.010790696 0.5578098 0.320
## f TRUE:m FALSE 0.029491634 0.011995000 3.5218171 0.001
## f TRUE:m TRUE 0.011888564 0.011446375 1.6578467 0.041
## m FALSE:m TRUE 0.017603070 0.009860329 2.7340975 0.001
Factorial ANOVA is the case where variation in Y is explained by several independent factors: \(Y=X_1\beta_1+X_2\beta_2+\dots\). The interaction between factors may also be of interest. Pairwise comparisons are also implemented:
anova(lm(TotalLength~sex+surv+sex:surv))
## Analysis of Variance Table
##
## Response: TotalLength
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 0.007460 0.0074597 18.6069 3.122e-05 ***
## surv 1 0.006121 0.0061212 15.2683 0.0001483 ***
## sex:surv 1 0.000934 0.0009337 2.3289 0.1293803
## Residuals 132 0.052920 0.0004009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###SHORTHAND for FULL Factorial
anova(lm(TotalLength~sex*surv))
## Analysis of Variance Table
##
## Response: TotalLength
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 0.007460 0.0074597 18.6069 3.122e-05 ***
## surv 1 0.006121 0.0061212 15.2683 0.0001483 ***
## sex:surv 1 0.000934 0.0009337 2.3289 0.1293803
## Residuals 132 0.052920 0.0004009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3 <- lm.rrpp(TotalLength~sex*surv, print.progress = FALSE, data = mydat)
anova(model3)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 18.6069 3.1191 0.001 **
## surv 1 0.006121 0.0061212 0.09077 15.2683 2.9171 0.001 **
## sex:surv 1 0.000934 0.0009337 0.01385 2.3289 1.2044 0.115
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, data = mydat, print.progress = FALSE)
With factorial models, another function argument comes into play: full versus residual randomization. This option changes the reduced model against which full models are compared. Full randomization (FRPP) compares each term versus Y~1, whereas RRPP performs a set of sequential comparisons as described in lecture (where we learned that RRPP is the most general and universal permutation procedure).
anova(lm.rrpp(TotalLength~sex*surv, print.progress = FALSE, RRPP = TRUE, data = mydat))
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 18.6069 3.1191 0.001 **
## surv 1 0.006121 0.0061212 0.09077 15.2683 2.9171 0.001 **
## sex:surv 1 0.000934 0.0009337 0.01385 2.3289 1.2044 0.115
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, RRPP = TRUE, data = mydat,
## print.progress = FALSE)
anova(lm.rrpp(TotalLength~sex*surv, print.progress = FALSE,RRPP = FALSE, data = mydat))
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of raw values (residuals of mean)
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 18.6069 3.1191 0.001 **
## surv 1 0.006121 0.0061212 0.09077 15.2683 2.9051 0.001 **
## sex:surv 1 0.000934 0.0009337 0.01385 2.3289 1.2070 0.112
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, RRPP = FALSE, data = mydat,
## print.progress = FALSE)
For pairwise comparisons, one can also specify FRPP or RRPP. In this particular example, this does make a difference.
model.r <- lm.rrpp(TotalLength~sex+surv, print.progress = FALSE, data = mydat)
model.n <- lm.rrpp(TotalLength~1, print.progress = FALSE, data = mydat)
###NOTE: pairwise comparison interpretations can change when using different reduced models
summary(pairwise(model3, groups = SexBySurv, print.progress = FALSE)) # Default: against H_r: Y=sex+surv
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## f FALSE:f TRUE 0.006555326 0.02250545 -1.3225379 0.899
## f FALSE:m FALSE 0.022936308 0.02543711 1.1669058 0.129
## f FALSE:m TRUE 0.005333238 0.01121912 0.3242748 0.391
## f TRUE:m FALSE 0.029491634 0.04030517 -0.2736234 0.609
## f TRUE:m TRUE 0.011888564 0.02600557 -1.0458391 0.854
## m FALSE:m TRUE 0.017603070 0.02139632 0.8689545 0.190
summary(pairwise(model3, fit.null = model.r,groups = SexBySurv, print.progress = FALSE)) #against H_r: Y=sex+surv
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## f FALSE:f TRUE 0.006555326 0.02250545 -1.3225379 0.899
## f FALSE:m FALSE 0.022936308 0.02543711 1.1669058 0.129
## f FALSE:m TRUE 0.005333238 0.01121912 0.3242748 0.391
## f TRUE:m FALSE 0.029491634 0.04030517 -0.2736234 0.609
## f TRUE:m TRUE 0.011888564 0.02600557 -1.0458391 0.854
## m FALSE:m TRUE 0.017603070 0.02139632 0.8689545 0.190
summary(pairwise(model3, fit.null = model.n,groups = SexBySurv, print.progress = FALSE)) #against H_r: Y~1
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## f FALSE:f TRUE 0.006555326 0.011860377 0.5780316 0.314
## f FALSE:m FALSE 0.022936308 0.010849405 3.2605314 0.001
## f FALSE:m TRUE 0.005333238 0.010790696 0.5578098 0.320
## f TRUE:m FALSE 0.029491634 0.011995000 3.5218171 0.001
## f TRUE:m TRUE 0.011888564 0.011446375 1.6578467 0.041
## m FALSE:m TRUE 0.017603070 0.009860329 2.7340975 0.001
One can also have a factor which is nested within a main effect. These are implemented using the following notation:
anova(lm(TotalLength~sex/surv)) #survival nested within sex
## Analysis of Variance Table
##
## Response: TotalLength
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 0.007460 0.0074597 18.6069 3.122e-05 ***
## sex:surv 2 0.007055 0.0035275 8.7986 0.0002588 ***
## Residuals 132 0.052920 0.0004009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#NOTE: F-tests not correct: all are based on MSE; but sex should be tested against nested effect!
Note however, that the standard F-test is not correct for the main effect (sex in this case). The reason is that all factors are tested agains the global residual error (MSE). Thus, one can either ‘by hand’ re-calculate post-hoc, write a function, or use the ‘aov’ function combined with some by-hand calculations. A simpler alternative is to use RRPP:
model.nested <- lm.rrpp(TotalLength~sex/surv, effect.type = "F", print.progress = FALSE, data = mydat)
anova(model.nested) #unadjusted
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 18.6069 3.1191 0.001 **
## sex:surv 2 0.007055 0.0035275 0.10462 8.7986 3.2187 0.001 **
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex/surv, data = mydat, print.progress = FALSE,
## effect.type = "F")
anova(model.nested, error = c("sex:surv", "Residuals")) # Correct MSE specification
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 2.1147 0.4929 0.309
## sex:surv 2 0.007055 0.0035275 0.10462 8.7986 3.2187 0.001 **
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex/surv, data = mydat, print.progress = FALSE,
## effect.type = "F")
#inspect F for sex relative to MS-sex and MS-sex:surv
Linear mixed models contain both fixed and random effects. They are especially useful for accounting for: random variation across subject groups, in repeated measures or hierarchical designs, or other situations where the observations are not independent. Thus, they are enormously useful in the biological sciences.
Statistically, line mixed models are evaluated using REML (reduced maximum likelihood). Thus, in R, a different set of functions are used; these are found in the lme4 package.
Here is a simple example. In this case, the data are from of a sleep deprivation study. We are looking at reaction time as a function of days, subject by subject:
library(lme4)
## Loading required package: Matrix
library(lattice)
xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
index = function(x,y) coef(lm(y ~ x))[1],
xlab = "Days of sleep deprivation",
ylab = "Average reaction time (ms)", aspect = "xy")
As can be seen, reaction time increases with day, but the relationship differs by subject (the slopes and intercepts in particular). This suggests that a model with random slopes and intercepts is appropriate.
Let’s run an analysis evaluating reaction time as a function of days, while accounting for the random variation in slopes and intercepts among subjects: Reaction~Days (Days|Subject).
fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
anova(fm1)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## Days 1 30031 30031 45.853
summary(fm1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.10 24.741
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.405 6.825 36.838
## Days 10.467 1.546 6.771
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
For illustration purposes, let’s now fit just a random intercepts model, and compare this with linear model alternatives:
fm2 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy)
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1786.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2257 -0.5529 0.0109 0.5188 4.2506
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 1378.2 37.12
## Residual 960.5 30.99
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.4051 9.7467 25.79
## Days 10.4673 0.8042 13.02
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.371
anova(fm2)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## Days 1 162703 162703 169.4
anova(lm(Reaction~Days*Subject, data= sleepstudy))
## Analysis of Variance Table
##
## Response: Reaction
## Df Sum Sq Mean Sq F value Pr(>F)
## Days 1 162703 162703 248.4234 < 2.2e-16 ***
## Subject 17 250618 14742 22.5093 < 2.2e-16 ***
## Days:Subject 17 60322 3548 5.4178 3.272e-09 ***
## Residuals 144 94312 655
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(Reaction~Days, data= sleepstudy))
## Analysis of Variance Table
##
## Response: Reaction
## Df Sum Sq Mean Sq F value Pr(>F)
## Days 1 162703 162703 71.464 9.894e-15 ***
## Residuals 178 405252 2277
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Notice that the SSE for Days is the same across all models, but the F-values are not. The reason is that each model differs in how the remaining variation is accounted for. This is important! One must carefully consider what is the appropriate model regarding fixed and random effects, based on the types of expected variation in the study (see lecture).
Statistically, there are multiple ways to obtain sums-of-squares for a linear model. Type I SS are sequential SS; type II are conditional SS, and type III are marginal SS. As discussed in class, there are times when one of these may be more appropriate than another, and one may wish to specify a SS type for a particular anlaysis. In R, type I SS are the default. However, using RRPP, one can adjust this quite easily.
anova(lm.rrpp(TotalLength~sex*surv, SS.type="I", print.progress = FALSE, data = mydat)) #sequential SS
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.007460 0.0074597 0.11062 18.6069 3.1191 0.001 **
## surv 1 0.006121 0.0061212 0.09077 15.2683 2.9171 0.001 **
## sex:surv 1 0.000934 0.0009337 0.01385 2.3289 1.2044 0.115
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, SS.type = "I", data = mydat,
## print.progress = FALSE)
anova(lm.rrpp(TotalLength~sex*surv, SS.type="II", print.progress = FALSE, data = mydat)) #conditional SS
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type II
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.009454 0.0094544 0.14020 23.5822 3.5668 0.001 **
## surv 1 0.006121 0.0061212 0.09077 15.2683 2.9171 0.001 **
## sex:surv 1 0.000934 0.0009337 0.01385 2.3289 1.2044 0.115
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, SS.type = "II", data = mydat,
## print.progress = FALSE)
anova(lm.rrpp(TotalLength~sex*surv, SS.type="III", print.progress = FALSE, data = mydat)) #marginal SS
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type III
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.008286 0.0082857 0.12287 20.6671 3.5449 0.001 **
## surv 1 0.000516 0.0005157 0.00765 1.2862 0.7710 0.241
## sex:surv 1 0.000934 0.0009337 0.01385 2.3289 1.2044 0.115
## Residuals 132 0.052920 0.0004009 0.78476
## Total 135 0.067435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, SS.type = "III", data = mydat,
## print.progress = FALSE)
###7: A Permutation Test for ANOVA Here is code for performing a permutation test to evaluate ANOVA. It is written in ‘loop’ style, so it is more readable (in a few weeks we’ll discuss other implementations). Note that ALL functions in the package RRPP above use permutation approaches.
F.obs<-anova(lm(TotalLength~sex))[1,4] #Find Test value and save
permute<-999
F.rand.vec<-array(NA,(permute+1))
F.rand.vec[permute+1]<-F.obs
for(i in 1:permute){
###Shuffle Data
y.rand<-sample(TotalLength) #SHUFFLE FIRST COL
F.rand.vec[i]<-anova(lm(y.rand~sex))[1,4]
}
P.Ftest<-rank(F.rand.vec[permute+1])/(permute+1)
F.obs
## [1] 16.66688
P.Ftest
## [1] 0.001
hist(F.rand.vec,40,freq=T,col="gray")
segments(F.obs, 0, F.obs, 50) ##Plot Observed value