ANOVA Models

Motivation

Today we explore various type of ANOVA models and their implementation. First let’s read in some data and plot a histogram:

bumpus<-read.csv("Data/bumpus.csv",header=T)
bumpus.data<-log(as.matrix(bumpus[,(5:13)])) # matrix of log-linear measurements
sex<-as.factor(bumpus[,2])
surv<-as.factor(bumpus[,4])
SexBySurv<-as.factor(paste(sex,surv))
 TotalLength<-bumpus.data[,1]
 
 ## Multi-group histogram
sexdata<-split(TotalLength,sex)
hist(sexdata$m,col="blue",freq=TRUE)
hist(sexdata$f,col="red",freq=TRUE,add=TRUE)

#ANOTHER WAY, SHOWING OVERLAP

hist(sexdata$m, col=rgb(0,0,0,1),freq=TRUE)
hist(sexdata$f, col=rgb(0.8,0.8,0.8,0.5), freq=TRUE,add=T)

1: Single-Factor ANOVA

A single-factor ANOVA fits a bivariate model of the form Y~X, where variation in Y is explained by variation among groups as coded in X. Here are several implementations, including two parametric and one permutational. Additionally, pairwise comparisons are shown using standard and permutation approaches:

#  Some components of the linear model
 model1<-lm(TotalLength~sex)
 anova(model1)      #Statistical summary

## Analysis of Variance Table
## 
## Response: TotalLength
##            Df   Sum Sq   Mean Sq F value    Pr(>F)    
## sex         1 0.007460 0.0074597  16.667 7.617e-05 ***
## Residuals 134 0.059975 0.0004476                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 coef(model1)   #for ANOVA, these are means gp1 and dev for mean gp 2

## (Intercept)        sexm 
##  5.06220426  0.01542669

 predict(model1)        #Predicted values

##        1        2        3        4        5        6        7        8 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##        9       10       11       12       13       14       15       16 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       17       18       19       20       21       22       23       24 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       25       26       27       28       29       30       31       32 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       33       34       35       36       37       38       39       40 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       41       42       43       44       45       46       47       48 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       49       50       51       52       53       54       55       56 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       57       58       59       60       61       62       63       64 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       65       66       67       68       69       70       71       72 
## 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 5.077631 
##       73       74       75       76       77       78       79       80 
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 
##       81       82       83       84       85       86       87       88 
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 
##       89       90       91       92       93       94       95       96 
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 
##       97       98       99      100      101      102      103      104 
## 5.062204 5.077631 5.062204 5.077631 5.062204 5.077631 5.062204 5.062204 
##      105      106      107      108      109      110      111      112 
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 
##      113      114      115      116      117      118      119      120 
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 
##      121      122      123      124      125      126      127      128 
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 
##      129      130      131      132      133      134      135      136 
## 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204 5.062204

 resid(model1)      #Residuals

##             1             2             3             4             5 
## -0.0406783460  0.0283145255 -0.0024571332 -0.0024571332 -0.0342058315 
##             6             7             8             9            10 
##  0.0037734166 -0.0406783460  0.0099653868 -0.0277749411  0.0161192524 
##            11            12            13            14            15 
##  0.0037734166  0.0099653868 -0.0213851430  0.0161192524 -0.0087267462 
##            16            17            18            19            20 
##  0.0037734166 -0.0150359154 -0.0024571332 -0.0150359154  0.0099653868 
##            21            22            23            24            25 
## -0.0024571332 -0.0024571332  0.0037734166  0.0099653868  0.0037734166 
##            26            27            28            29            30 
##  0.0099653868 -0.0024571332  0.0283145255 -0.0087267462  0.0037734166 
##            31            32            33            34            35 
## -0.0150359154  0.0037734166 -0.0087267462  0.0099653868  0.0222354794 
##            36            37            38            39            40 
##  0.0343568400 -0.0150359154 -0.0087267462 -0.0024571332  0.0099653868 
##            41            42            43            44            45 
## -0.0277749411  0.0037734166  0.0161192524  0.0343568400 -0.0277749411 
##            46            47            48            49            50 
##  0.0283145255  0.0283145255  0.0343568400 -0.0024571332 -0.0024571332 
##            51            52            53            54            55 
## -0.0150359154 -0.0277749411 -0.0024571332 -0.0150359154 -0.0213851430 
##            56            57            58            59            60 
##  0.0343568400 -0.0087267462  0.0283145255 -0.0024571332 -0.0213851430 
##            61            62            63            64            65 
## -0.0150359154  0.0222354794  0.0037734166  0.0343568400 -0.0024571332 
##            66            67            68            69            70 
##  0.0403628640 -0.0024571332  0.0037734166 -0.0471930270  0.0343568400 
##            71            72            73            74            75 
## -0.0277749411  0.0037734166 -0.0187791388 -0.0277749411 -0.0123482485 
##            76            77            78            79            80 
##  0.0161192524  0.0129695595  0.0161192524 -0.0383237349 -0.0024571332 
##            81            82            83            84            85 
##  0.0129695595 -0.0277749411 -0.0187791388  0.0099653868 -0.0059584504 
##            86            87            88            89            90 
##  0.0161192524  0.0437412182  0.0222354794 -0.0317663344  0.0161192524 
##            91            92            93            94            95 
##  0.0253920795 -0.0024571332  0.0253920795 -0.0024571332  0.0066999465 
##            96            97            98            99           100 
## -0.0150359154  0.0066999465 -0.0150359154 -0.0187791388 -0.0150359154 
##           101           102           103           104           105 
##  0.0253920795 -0.0342058315 -0.0383237349 -0.0123482485 -0.0252516533 
##           106           107           108           109           110 
##  0.0066999465 -0.0317663344 -0.0187791388 -0.0317663344  0.0315459451 
##           111           112           113           114           115 
## -0.0187791388  0.0315459451 -0.0123482485  0.0315459451 -0.0059584504 
##           116           117           118           119           120 
##  0.0066999465 -0.0187791388  0.0192001092 -0.0187791388  0.0376621721 
##           121           122           123           124           125 
##  0.0003907773  0.0253920795 -0.0317663344  0.0003907773  0.0129695595 
##           126           127           128           129           130 
##  0.0253920795  0.0192001092  0.0376621721 -0.0059584504 -0.0059584504 
##           131           132           133           134           135 
## -0.0123482485  0.0003907773 -0.0317663344 -0.0187791388  0.0315459451 
##           136 
##  0.0066999465

 anova(lm(TotalLength~sex))

## Analysis of Variance Table
## 
## Response: TotalLength
##            Df   Sum Sq   Mean Sq F value    Pr(>F)    
## sex         1 0.007460 0.0074597  16.667 7.617e-05 ***
## Residuals 134 0.059975 0.0004476                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(sex,TotalLength,ylab="Total Length")
  summary(aov(TotalLength~sex))    #another way to do univariate ANOVA

##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## sex           1 0.00746 0.007460   16.67 7.62e-05 ***
## Residuals   134 0.05998 0.000448                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##### ANOVA via residual randomization
library(RRPP)

mydat <- rrpp.data.frame(TotalLength = TotalLength, sex = sex, surv=surv, SexBySurv = SexBySurv)  
  
res <- lm.rrpp(TotalLength~sex, print.progress = FALSE, data = mydat)
anova(res)     #note: summary components are part of the object: see res$ANOVA$ res$coef, etc.

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq      F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 16.667 2.9944  0.001 **
## Residuals 134 0.059975 0.0004476 0.88938                        
## Total     135 0.067435                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex, data = mydat, print.progress = FALSE)

coef(res)

##                   [,1]
## (Intercept) 5.06220426
## sexm        0.01542669

There are several important components to note here. First, the parameters, SS, MS, F-values are identical to those obtained using standard parametric methods. So RRPP estimates identical parameters and summary values when compared to ‘standard’ methods. Note though that there are additional elements in the ANOVA table. Specifically, each term in the model has an R-squared value, and an empirically-derived effect size (Z-score). These can assist in interpreting the strength of signal across factors in the model. These become particularly important with multivariate response data.

Programming note: with RRPP, it is generally advisable to create an rrpp.dataframe (see above). R functions generally first search for data in a supplied data frame and if none are found, then search the global environment, which increases the probability of an error. The easiest way to assure there are no errors is to provide an rrpp.data.frame for lm.rrpp, thus eliminating some common problems with local versus global environmental variables.

anova(lm(TotalLength~SexBySurv)) ### Single-Factor ANOVA (4 groups)

## Analysis of Variance Table
## 
## Response: TotalLength
##            Df   Sum Sq   Mean Sq F value    Pr(>F)    
## SexBySurv   3 0.014515 0.0048382  12.068 4.957e-07 ***
## Residuals 132 0.052920 0.0004009                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model2 <- lm.rrpp(TotalLength~SexBySurv, print.progress = FALSE, data = mydat) #via RRPP
anova(model2)

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq      F      Z Pr(>F)   
## SexBySurv   3 0.014515 0.0048382 0.21524 12.068 4.6318  0.001 **
## Residuals 132 0.052920 0.0004009 0.78476                        
## Total     135 0.067435                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ SexBySurv, data = mydat, print.progress = FALSE)

#post-hoc tests
pairwise.t.test(TotalLength, SexBySurv, p.adj = "none")  #standard

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  TotalLength and SexBySurv 
## 
##         f FALSE f TRUE  m FALSE
## f TRUE  0.259   -       -      
## m FALSE 1.2e-05 3.5e-07 -      
## m TRUE  0.260   0.024   9.1e-05
## 
## P value adjustment method: none

#pairwise comparisons via RRPP
posthoc <- pairwise(fit = model2,groups = SexBySurv, print.progress = FALSE)
summary(posthoc)

## 
## Pairwise comparisons
## 
## Groups: f FALSE f TRUE m FALSE m TRUE 
## 
## RRPP: 1000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                           d   UCL (95%)         Z Pr > d
## f FALSE:f TRUE  0.006555326 0.011860377 0.5780316  0.314
## f FALSE:m FALSE 0.022936308 0.010849405 3.2605314  0.001
## f FALSE:m TRUE  0.005333238 0.010790696 0.5578098  0.320
## f TRUE:m FALSE  0.029491634 0.011995000 3.5218171  0.001
## f TRUE:m TRUE   0.011888564 0.011446375 1.6578467  0.041
## m FALSE:m TRUE  0.017603070 0.009860329 2.7340975  0.001

3: Factorial ANOVA

Factorial ANOVA is the case where variation in Y is explained by several independent factors: \(Y=X_1\beta_1+X_2\beta_2+\dots\). The interaction between factors may also be of interest. Pairwise comparisons are also implemented:

anova(lm(TotalLength~sex+surv+sex:surv))

## Analysis of Variance Table
## 
## Response: TotalLength
##            Df   Sum Sq   Mean Sq F value    Pr(>F)    
## sex         1 0.007460 0.0074597 18.6069 3.122e-05 ***
## surv        1 0.006121 0.0061212 15.2683 0.0001483 ***
## sex:surv    1 0.000934 0.0009337  2.3289 0.1293803    
## Residuals 132 0.052920 0.0004009                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

###SHORTHAND for FULL Factorial
anova(lm(TotalLength~sex*surv))

## Analysis of Variance Table
## 
## Response: TotalLength
##            Df   Sum Sq   Mean Sq F value    Pr(>F)    
## sex         1 0.007460 0.0074597 18.6069 3.122e-05 ***
## surv        1 0.006121 0.0061212 15.2683 0.0001483 ***
## sex:surv    1 0.000934 0.0009337  2.3289 0.1293803    
## Residuals 132 0.052920 0.0004009                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model3 <- lm.rrpp(TotalLength~sex*surv, print.progress = FALSE, data = mydat)
anova(model3)

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 18.6069 3.1191  0.001 **
## surv        1 0.006121 0.0061212 0.09077 15.2683 2.9171  0.001 **
## sex:surv    1 0.000934 0.0009337 0.01385  2.3289 1.2044  0.115   
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, data = mydat, print.progress = FALSE)

With factorial models, another function argument comes into play: full versus residual randomization. This option changes the reduced model against which full models are compared. Full randomization (FRPP) compares each term versus Y~1, whereas RRPP performs a set of sequential comparisons as described in lecture (where we learned that RRPP is the most general and universal permutation procedure).

anova(lm.rrpp(TotalLength~sex*surv, print.progress = FALSE, RRPP = TRUE, data = mydat))

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 18.6069 3.1191  0.001 **
## surv        1 0.006121 0.0061212 0.09077 15.2683 2.9171  0.001 **
## sex:surv    1 0.000934 0.0009337 0.01385  2.3289 1.2044  0.115   
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, RRPP = TRUE, data = mydat,  
##     print.progress = FALSE)

anova(lm.rrpp(TotalLength~sex*surv, print.progress = FALSE,RRPP = FALSE, data = mydat))

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of raw values (residuals of mean) 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 18.6069 3.1191  0.001 **
## surv        1 0.006121 0.0061212 0.09077 15.2683 2.9051  0.001 **
## sex:surv    1 0.000934 0.0009337 0.01385  2.3289 1.2070  0.112   
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, RRPP = FALSE, data = mydat,  
##     print.progress = FALSE)

For pairwise comparisons, one can also specify FRPP or RRPP. In this particular example, this does make a difference.

model.r <- lm.rrpp(TotalLength~sex+surv, print.progress = FALSE, data = mydat)
model.n <- lm.rrpp(TotalLength~1, print.progress = FALSE, data = mydat)

###NOTE: pairwise comparison interpretations can change when using different reduced models
summary(pairwise(model3, groups = SexBySurv, print.progress = FALSE))  #  Default: against H_r: Y=sex+surv

## 
## Pairwise comparisons
## 
## Groups: f FALSE f TRUE m FALSE m TRUE 
## 
## RRPP: 1000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                           d  UCL (95%)          Z Pr > d
## f FALSE:f TRUE  0.006555326 0.02250545 -1.3225379  0.899
## f FALSE:m FALSE 0.022936308 0.02543711  1.1669058  0.129
## f FALSE:m TRUE  0.005333238 0.01121912  0.3242748  0.391
## f TRUE:m FALSE  0.029491634 0.04030517 -0.2736234  0.609
## f TRUE:m TRUE   0.011888564 0.02600557 -1.0458391  0.854
## m FALSE:m TRUE  0.017603070 0.02139632  0.8689545  0.190

summary(pairwise(model3, fit.null = model.r,groups = SexBySurv, print.progress = FALSE))  #against H_r: Y=sex+surv

## 
## Pairwise comparisons
## 
## Groups: f FALSE f TRUE m FALSE m TRUE 
## 
## RRPP: 1000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                           d  UCL (95%)          Z Pr > d
## f FALSE:f TRUE  0.006555326 0.02250545 -1.3225379  0.899
## f FALSE:m FALSE 0.022936308 0.02543711  1.1669058  0.129
## f FALSE:m TRUE  0.005333238 0.01121912  0.3242748  0.391
## f TRUE:m FALSE  0.029491634 0.04030517 -0.2736234  0.609
## f TRUE:m TRUE   0.011888564 0.02600557 -1.0458391  0.854
## m FALSE:m TRUE  0.017603070 0.02139632  0.8689545  0.190

summary(pairwise(model3, fit.null = model.n,groups = SexBySurv, print.progress = FALSE))  #against H_r: Y~1

## 
## Pairwise comparisons
## 
## Groups: f FALSE f TRUE m FALSE m TRUE 
## 
## RRPP: 1000 permutations
## 
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
## 
## Pairwise distances between means, plus statistics
##                           d   UCL (95%)         Z Pr > d
## f FALSE:f TRUE  0.006555326 0.011860377 0.5780316  0.314
## f FALSE:m FALSE 0.022936308 0.010849405 3.2605314  0.001
## f FALSE:m TRUE  0.005333238 0.010790696 0.5578098  0.320
## f TRUE:m FALSE  0.029491634 0.011995000 3.5218171  0.001
## f TRUE:m TRUE   0.011888564 0.011446375 1.6578467  0.041
## m FALSE:m TRUE  0.017603070 0.009860329 2.7340975  0.001

4: Nested ANOVA

One can also have a factor which is nested within a main effect. These are implemented using the following notation:

anova(lm(TotalLength~sex/surv)) #survival nested within sex

## Analysis of Variance Table
## 
## Response: TotalLength
##            Df   Sum Sq   Mean Sq F value    Pr(>F)    
## sex         1 0.007460 0.0074597 18.6069 3.122e-05 ***
## sex:surv    2 0.007055 0.0035275  8.7986 0.0002588 ***
## Residuals 132 0.052920 0.0004009                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

  #NOTE: F-tests not correct: all are based on MSE; but sex should be tested against nested effect!

Note however, that the standard F-test is not correct for the main effect (sex in this case). The reason is that all factors are tested agains the global residual error (MSE). Thus, one can either ‘by hand’ re-calculate post-hoc, write a function, or use the ‘aov’ function combined with some by-hand calculations. A simpler alternative is to use RRPP:

model.nested <- lm.rrpp(TotalLength~sex/surv, effect.type = "F", print.progress = FALSE, data = mydat)
anova(model.nested)  #unadjusted

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 18.6069 3.1191  0.001 **
## sex:surv    2 0.007055 0.0035275 0.10462  8.7986 3.2187  0.001 **
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex/surv, data = mydat, print.progress = FALSE,  
##     effect.type = "F")

anova(model.nested, error = c("sex:surv", "Residuals"))  #  Correct MSE specification

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq      F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 2.1147 0.4929  0.309   
## sex:surv    2 0.007055 0.0035275 0.10462 8.7986 3.2187  0.001 **
## Residuals 132 0.052920 0.0004009 0.78476                        
## Total     135 0.067435                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex/surv, data = mydat, print.progress = FALSE,  
##     effect.type = "F")

   #inspect F for sex relative to MS-sex and MS-sex:surv

5: Mixed Models

Linear mixed models contain both fixed and random effects. They are especially useful for accounting for: random variation across subject groups, in repeated measures or hierarchical designs, or other situations where the observations are not independent. Thus, they are enormously useful in the biological sciences.

Statistically, line mixed models are evaluated using REML (reduced maximum likelihood). Thus, in R, a different set of functions are used; these are found in the lme4 package.

Here is a simple example. In this case, the data are from of a sleep deprivation study. We are looking at reaction time as a function of days, subject by subject:

library(lme4)

## Loading required package: Matrix

library(lattice)
  
xyplot(Reaction ~ Days | Subject, sleepstudy, type = c("g","p","r"),
       index = function(x,y) coef(lm(y ~ x))[1],
       xlab = "Days of sleep deprivation",
       ylab = "Average reaction time (ms)", aspect = "xy")

As can be seen, reaction time increases with day, but the relationship differs by subject (the slopes and intercepts in particular). This suggests that a model with random slopes and intercepts is appropriate.

Let’s run an analysis evaluating reaction time as a function of days, while accounting for the random variation in slopes and intercepts among subjects: Reaction~Days (Days|Subject).

fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
anova(fm1)

## Analysis of Variance Table
##      npar Sum Sq Mean Sq F value
## Days    1  30031   30031  45.853

summary(fm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.10   24.741       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.825  36.838
## Days          10.467      1.546   6.771
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138

For illustration purposes, let’s now fit just a random intercepts model, and compare this with linear model alternatives:

fm2 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy) 
summary(fm2)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 | Subject)
##    Data: sleepstudy
## 
## REML criterion at convergence: 1786.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2257 -0.5529  0.0109  0.5188  4.2506 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 1378.2   37.12   
##  Residual              960.5   30.99   
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 251.4051     9.7467   25.79
## Days         10.4673     0.8042   13.02
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.371

anova(fm2)

## Analysis of Variance Table
##      npar Sum Sq Mean Sq F value
## Days    1 162703  162703   169.4

anova(lm(Reaction~Days*Subject, data= sleepstudy))

## Analysis of Variance Table
## 
## Response: Reaction
##               Df Sum Sq Mean Sq  F value    Pr(>F)    
## Days           1 162703  162703 248.4234 < 2.2e-16 ***
## Subject       17 250618   14742  22.5093 < 2.2e-16 ***
## Days:Subject  17  60322    3548   5.4178 3.272e-09 ***
## Residuals    144  94312     655                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(Reaction~Days, data= sleepstudy))

## Analysis of Variance Table
## 
## Response: Reaction
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Days        1 162703  162703  71.464 9.894e-15 ***
## Residuals 178 405252    2277                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Notice that the SSE for Days is the same across all models, but the F-values are not. The reason is that each model differs in how the remaining variation is accounted for. This is important! One must carefully consider what is the appropriate model regarding fixed and random effects, based on the types of expected variation in the study (see lecture).

6: Type I, type II, and type III SS

Statistically, there are multiple ways to obtain sums-of-squares for a linear model. Type I SS are sequential SS; type II are conditional SS, and type III are marginal SS. As discussed in class, there are times when one of these may be more appropriate than another, and one may wish to specify a SS type for a particular anlaysis. In R, type I SS are the default. However, using RRPP, one can adjust this quite easily.

anova(lm.rrpp(TotalLength~sex*surv, SS.type="I", print.progress = FALSE, data = mydat))  #sequential SS

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type I 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.007460 0.0074597 0.11062 18.6069 3.1191  0.001 **
## surv        1 0.006121 0.0061212 0.09077 15.2683 2.9171  0.001 **
## sex:surv    1 0.000934 0.0009337 0.01385  2.3289 1.2044  0.115   
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, SS.type = "I", data = mydat,  
##     print.progress = FALSE)

anova(lm.rrpp(TotalLength~sex*surv, SS.type="II", print.progress = FALSE, data = mydat)) #conditional SS

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type II 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.009454 0.0094544 0.14020 23.5822 3.5668  0.001 **
## surv        1 0.006121 0.0061212 0.09077 15.2683 2.9171  0.001 **
## sex:surv    1 0.000934 0.0009337 0.01385  2.3289 1.2044  0.115   
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, SS.type = "II", data = mydat,  
##     print.progress = FALSE)

anova(lm.rrpp(TotalLength~sex*surv, SS.type="III", print.progress = FALSE, data = mydat)) #marginal SS

## 
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals 
## Number of permutations: 1000 
## Estimation method: Ordinary Least Squares 
## Sums of Squares and Cross-products: Type III 
## Effect sizes (Z) based on F distributions
## 
##            Df       SS        MS     Rsq       F      Z Pr(>F)   
## sex         1 0.008286 0.0082857 0.12287 20.6671 3.5449  0.001 **
## surv        1 0.000516 0.0005157 0.00765  1.2862 0.7710  0.241   
## sex:surv    1 0.000934 0.0009337 0.01385  2.3289 1.2044  0.115   
## Residuals 132 0.052920 0.0004009 0.78476                         
## Total     135 0.067435                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Call: lm.rrpp(f1 = TotalLength ~ sex * surv, SS.type = "III", data = mydat,  
##     print.progress = FALSE)

###7: A Permutation Test for ANOVA Here is code for performing a permutation test to evaluate ANOVA. It is written in ‘loop’ style, so it is more readable (in a few weeks we’ll discuss other implementations). Note that ALL functions in the package RRPP above use permutation approaches.

F.obs<-anova(lm(TotalLength~sex))[1,4]  #Find Test value and save
permute<-999
F.rand.vec<-array(NA,(permute+1))
F.rand.vec[permute+1]<-F.obs
for(i in 1:permute){
  ###Shuffle Data
    y.rand<-sample(TotalLength)  #SHUFFLE FIRST COL
    F.rand.vec[i]<-anova(lm(y.rand~sex))[1,4]  
}  
P.Ftest<-rank(F.rand.vec[permute+1])/(permute+1)
F.obs

## [1] 16.66688

P.Ftest

## [1] 0.001

hist(F.rand.vec,40,freq=T,col="gray")
segments(F.obs, 0, F.obs, 50)  ##Plot Observed value