Today we explore multivariate implementations of linear models (anova, regression, etc.). First however, let’s read in some data and examine a few of its properties:
library(knitr)
library(RRPP)
library(mvabund)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
bumpus <- read.csv("Data/bumpus.csv",header=T)
bumpus.data <- log(as.matrix(bumpus[,(5:13)])) # matrix of linear measurements
sex <- as.factor(bumpus[,2])
surv <- as.factor(bumpus[,4])
SexBySurv <- paste(sex,surv)
TotalLength <- bumpus.data[,1]
Y <- bumpus.data[,-1]
mydat <- rrpp.data.frame(Y=Y,sex=sex,surv=surv,TotalLength=TotalLength,SexBySurv = SexBySurv)
Let’s do some simple exploratory descriptions of the data.
#Describe the data
(100*apply(bumpus.data,2,sd)) /apply(bumpus.data,2,mean) # CV: Coefficient of Variation
## TL AE WT BHL HL FL TTL SW
## 0.4406466 0.4112603 1.7600940 0.6465824 1.0884703 1.1748841 1.0818650 0.9140832
## SKL
## 1.5486045
cor.bumpus<-cor(bumpus.data)
cor.bumpus
## TL AE WT BHL HL FL TTL
## TL 1.0000000 0.6932777 0.5867330 0.4710276 0.4859102 0.4441295 0.3784330
## AE 0.6932777 1.0000000 0.5739660 0.5035656 0.6794433 0.5778571 0.5336809
## WT 0.5867330 0.5739660 1.0000000 0.5243470 0.5201937 0.4432527 0.4548922
## BHL 0.4710276 0.5035656 0.5243470 1.0000000 0.6234217 0.6163699 0.5853149
## HL 0.4859102 0.6794433 0.5201937 0.6234217 1.0000000 0.8215338 0.7486924
## FL 0.4441295 0.5778571 0.4432527 0.6163699 0.8215338 1.0000000 0.8107807
## TTL 0.3784330 0.5336809 0.4548922 0.5853149 0.7486924 0.8107807 1.0000000
## SW 0.4381097 0.4364807 0.4702211 0.5347691 0.5121106 0.5229477 0.4609773
## SKL 0.5052307 0.5864533 0.5192465 0.4944828 0.5522638 0.4571827 0.3887796
## SW SKL
## TL 0.4381097 0.5052307
## AE 0.4364807 0.5864533
## WT 0.4702211 0.5192465
## BHL 0.5347691 0.4944828
## HL 0.5121106 0.5522638
## FL 0.5229477 0.4571827
## TTL 0.4609773 0.3887796
## SW 1.0000000 0.3894656
## SKL 0.3894656 1.0000000
pairs(bumpus.data)
vcv.bumpus<-var(bumpus.data)
vcv.bumpus
## TL AE WT BHL HL
## TL 0.0004995189 0.0003505933 0.0007473663 0.0002349790 0.0003454281
## AE 0.0003505933 0.0005119657 0.0007401567 0.0002543216 0.0004889894
## WT 0.0007473663 0.0007401567 0.0032481351 0.0006670257 0.0009429918
## BHL 0.0002349790 0.0002543216 0.0006670257 0.0004982111 0.0004426031
## HL 0.0003454281 0.0004889894 0.0009429918 0.0004426031 0.0010116995
## FL 0.0003377309 0.0004448629 0.0008595154 0.0004680943 0.0008890710
## TTL 0.0003074033 0.0004388798 0.0009422562 0.0004748318 0.0008655112
## SW 0.0002441446 0.0002462486 0.0006682014 0.0002976194 0.0004061418
## SKL 0.0005349521 0.0006286416 0.0014019736 0.0005228861 0.0008321882
## FL TTL SW SKL
## TL 0.0003377309 0.0003074033 0.0002441446 0.0005349521
## AE 0.0004448629 0.0004388798 0.0002462486 0.0006286416
## WT 0.0008595154 0.0009422562 0.0006682014 0.0014019736
## BHL 0.0004680943 0.0004748318 0.0002976194 0.0005228861
## HL 0.0008890710 0.0008655112 0.0004061418 0.0008321882
## FL 0.0011576319 0.0010026103 0.0004436410 0.0007369266
## TTL 0.0010026103 0.0013209518 0.0004177450 0.0006694162
## SW 0.0004436410 0.0004177450 0.0006216936 0.0004600516
## SKL 0.0007369266 0.0006694162 0.0004600516 0.0022443908
var(scale(bumpus.data))
## TL AE WT BHL HL FL TTL
## TL 1.0000000 0.6932777 0.5867330 0.4710276 0.4859102 0.4441295 0.3784330
## AE 0.6932777 1.0000000 0.5739660 0.5035656 0.6794433 0.5778571 0.5336809
## WT 0.5867330 0.5739660 1.0000000 0.5243470 0.5201937 0.4432527 0.4548922
## BHL 0.4710276 0.5035656 0.5243470 1.0000000 0.6234217 0.6163699 0.5853149
## HL 0.4859102 0.6794433 0.5201937 0.6234217 1.0000000 0.8215338 0.7486924
## FL 0.4441295 0.5778571 0.4432527 0.6163699 0.8215338 1.0000000 0.8107807
## TTL 0.3784330 0.5336809 0.4548922 0.5853149 0.7486924 0.8107807 1.0000000
## SW 0.4381097 0.4364807 0.4702211 0.5347691 0.5121106 0.5229477 0.4609773
## SKL 0.5052307 0.5864533 0.5192465 0.4944828 0.5522638 0.4571827 0.3887796
## SW SKL
## TL 0.4381097 0.5052307
## AE 0.4364807 0.5864533
## WT 0.4702211 0.5192465
## BHL 0.5347691 0.4944828
## HL 0.5121106 0.5522638
## FL 0.5229477 0.4571827
## TTL 0.4609773 0.3887796
## SW 1.0000000 0.3894656
## SKL 0.3894656 1.0000000
Multivariate anova may be accompished in the base package of R using ‘lm’. However, one must specify that it is a manova; otherwise it is assumed that a series of independent anovas is desired. Alternatively, MANOVA via RRPP may be performed in the RRPP package.
model1 <- lm(bumpus.data~sex)
summary(model1) #yields a set of univariate analyses
## Response TL :
##
## Call:
## lm(formula = TL ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.047193 -0.015036 -0.002457 0.016119 0.043741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.062204 0.003022 1674.957 < 2e-16 ***
## sexm 0.015427 0.003779 4.083 7.62e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02116 on 134 degrees of freedom
## Multiple R-squared: 0.1106, Adjusted R-squared: 0.104
## F-statistic: 16.67 on 1 and 134 DF, p-value: 7.617e-05
##
##
## Response AE :
##
## Call:
## lm(formula = AE ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.047855 -0.013544 -0.001324 0.015324 0.043495
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.485934 0.002756 1990.568 < 2e-16 ***
## sexm 0.024778 0.003446 7.191 4.09e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01929 on 134 degrees of freedom
## Multiple R-squared: 0.2784, Adjusted R-squared: 0.2731
## F-statistic: 51.71 on 1 and 134 DF, p-value: 4.089e-11
##
##
## Response WT :
##
## Call:
## lm(formula = WT ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.104927 -0.040364 -0.002589 0.036700 0.199309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.218418 0.007893 407.771 < 2e-16 ***
## sexm 0.030662 0.009868 3.107 0.00231 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05525 on 134 degrees of freedom
## Multiple R-squared: 0.06721, Adjusted R-squared: 0.06025
## F-statistic: 9.655 on 1 and 134 DF, p-value: 0.002306
##
##
## Response BHL :
##
## Call:
## lm(formula = BHL ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.059680 -0.013926 0.001607 0.015340 0.060176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.448380 0.003175 1086.008 <2e-16 ***
## sexm 0.005808 0.003970 1.463 0.146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02223 on 134 degrees of freedom
## Multiple R-squared: 0.01572, Adjusted R-squared: 0.008377
## F-statistic: 2.14 on 1 and 134 DF, p-value: 0.1458
##
##
## Response HL :
##
## Call:
## lm(formula = HL ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.108253 -0.018507 0.003394 0.020429 0.070796
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.915492 0.004503 647.458 <2e-16 ***
## sexm 0.010478 0.005630 1.861 0.0649 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03152 on 134 degrees of freedom
## Multiple R-squared: 0.0252, Adjusted R-squared: 0.01792
## F-statistic: 3.464 on 1 and 134 DF, p-value: 0.06492
##
##
## Response FL :
##
## Call:
## lm(formula = FL ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.088257 -0.015131 -0.000352 0.027031 0.075104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.894376 0.004876 593.629 <2e-16 ***
## sexm 0.002451 0.006096 0.402 0.688
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03413 on 134 degrees of freedom
## Multiple R-squared: 0.001205, Adjusted R-squared: -0.006248
## F-statistic: 0.1617 on 1 and 134 DF, p-value: 0.6882
##
##
## Response TTL :
##
## Call:
## lm(formula = TTL ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.112349 -0.018086 0.000696 0.024309 0.083726
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.362005 0.005204 646.01 <2e-16 ***
## sexm -0.003967 0.006507 -0.61 0.543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03643 on 134 degrees of freedom
## Multiple R-squared: 0.002767, Adjusted R-squared: -0.004675
## F-statistic: 0.3718 on 1 and 134 DF, p-value: 0.5431
##
##
## Response SW :
##
## Call:
## lm(formula = SW ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.086408 -0.018190 -0.000752 0.015316 0.063325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.725137 0.003564 764.588 <2e-16 ***
## sexm 0.004065 0.004456 0.912 0.363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02495 on 134 degrees of freedom
## Multiple R-squared: 0.00617, Adjusted R-squared: -0.001246
## F-statistic: 0.8319 on 1 and 134 DF, p-value: 0.3634
##
##
## Response SKL :
##
## Call:
## lm(formula = SKL ~ sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.109789 -0.026668 -0.001033 0.031965 0.106245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.035292 0.006283 483.108 < 2e-16 ***
## sexm 0.037383 0.007855 4.759 4.97e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04398 on 134 degrees of freedom
## Multiple R-squared: 0.1446, Adjusted R-squared: 0.1382
## F-statistic: 22.65 on 1 and 134 DF, p-value: 4.971e-06
summary(manova(model1)) #does multivariate test (using Pillai's)
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.46652 12.243 9 126 9.166e-14 ***
## Residuals 134
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova(model1),test="Wilks") #does multivariate test (using Wilks)
## Df Wilks approx F num Df den Df Pr(>F)
## sex 1 0.53348 12.243 9 126 9.166e-14 ***
## Residuals 134
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##### MANOVA via RRPP
model.rrpp <- lm.rrpp(Y~sex,data = mydat, print.progress = FALSE)
anova(model.rrpp)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.09822 0.098219 0.06854 9.8604 3.336 0.001 **
## Residuals 134 1.33476 0.009961 0.93146
## Total 135 1.43298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Y ~ sex, data = mydat, print.progress = FALSE)
plot(model.rrpp, type = "PC", pch=21, bg = sex) #PC PLOT!
legend("topright", levels(sex), pch = 21, pt.bg = 1:4)
One may ask why permutation-based approaches are necessary. The following is a simple example for high-dimensional data, showing that when standard, parametric-based methods break down, RRPP can still evaluate statistical models.
##When parametric methods break down
Ynew <- matrix(rnorm(10000), nrow=100) #100 x 100 matrix: N=p
gp <- gl(2,50)
summary(manova(lm(Ynew~gp))) #parametric algebra cannot be completed
## Error in summary.manova(manova(lm(Ynew ~ gp))): residuals have rank 98 < 100
anova(lm.rrpp(Ynew~gp,print.progress = FALSE)) #NO problem with RRPP!
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## gp 1 123.0 123.016 0.01254 1.2447 1.6447 0.043 *
## Residuals 98 9685.5 98.832 0.98746
## Total 99 9808.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Ynew ~ gp, print.progress = FALSE)
Factorial manova may be accomplished in a similar manner. Alternatively, one may evaluate MANOVA via RRPP. Here one has the option of permuting the raw data or residuals from the reduced models. Simulations have shown that RRPP (reduced residual permutation procedure) displays higher statistical power for factorial designs and is thus preferred. It also permutes the correct exchangeable units under each null hypothesis (see discussion in lecture).
Finally, we have an example of performing pairwise comparisons. These are based on Euclidean distances between group means. Recall that univariate pairwise comparisons are based on the numerator of the T-test in some manner (typically |Y1-Y2|). This is a univariate representation of Euclidean distance, so the approach here is the multivariate equivalent.
#Factorial MANOVA
model2<-lm(bumpus.data~sex*surv)
summary(manova(model2))
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.47143 12.2882 9 124 9.520e-14 ***
## surv 1 0.34256 7.1788 9 124 2.442e-08 ***
## sex:surv 1 0.09718 1.4831 9 124 0.1613
## Residuals 132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Factorial MANOVA via RRPP
model2.rrpp <- lm.rrpp(Y~sex*surv,data = mydat, print.progress = FALSE)
anova(model2.rrpp)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## sex 1 0.09822 0.098219 0.06854 10.0366 3.3622 0.001 **
## surv 1 0.03995 0.039948 0.02788 4.0821 2.0450 0.017 *
## sex:surv 1 0.00305 0.003052 0.00213 0.3118 -0.9125 0.808
## Residuals 132 1.29176 0.009786 0.90145
## Total 135 1.43298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Y ~ sex * surv, data = mydat, print.progress = FALSE)
groups <- interaction(mydat$sex, mydat$surv)
plot(model2.rrpp, type = "PC", pch=21, bg = groups)
legend("topright", levels(groups), pch = 21, pt.bg = 1:4)
#Also in vegan (FRPP only)
adonis(formula = Y~sex*surv, method = "euclidean" )
##
## Call:
## adonis(formula = Y ~ sex * surv, method = "euclidean")
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## sex 1 0.09822 0.098219 10.0366 0.06854 0.001 ***
## surv 1 0.03995 0.039948 4.0821 0.02788 0.018 *
## sex:surv 1 0.00305 0.003052 0.3118 0.00213 0.846
## Residuals 132 1.29176 0.009786 0.90145
## Total 135 1.43298 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pairwise comparisons
PW <- pairwise(model2.rrpp,groups = SexBySurv, print.progress = FALSE)
summary(PW, test.type = "dist") #No pairwise groups significant AFTER accounting for main effects!
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## f FALSE:f TRUE 0.03129167 0.06256436 -1.1053593 0.877
## f FALSE:m FALSE 0.05292134 0.08605751 -0.4842815 0.669
## f FALSE:m TRUE 0.05395092 0.08147123 -0.2851959 0.607
## f TRUE:m FALSE 0.07766871 0.10635991 -0.1143882 0.537
## f TRUE:m TRUE 0.06388601 0.08724825 0.3222765 0.388
## m FALSE:m TRUE 0.03848096 0.05488069 -0.2152641 0.600
model.null <- lm.rrpp(Y~1,data=mydat, print.progress = FALSE)
summary(pairwise(model2.rrpp, model.null, groups = SexBySurv, print.progress = FALSE),test.type = "dist")
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## f FALSE:f TRUE 0.03129167 0.04919726 0.5399611 0.282
## f FALSE:m FALSE 0.05292134 0.04254589 2.1786819 0.014
## f FALSE:m TRUE 0.05395092 0.04073769 2.3689910 0.007
## f TRUE:m FALSE 0.07766871 0.04720942 2.9792883 0.001
## f TRUE:m TRUE 0.06388601 0.04332194 2.6774899 0.004
## m FALSE:m TRUE 0.03848096 0.03849811 1.6970478 0.051
#Male vs. female seem to be biggest differences
Multivariate regression is accomplished in a similar manner, with the exception that the independent variable (X) is continuous.
It is a challenge to visualize the multivariate regression, as the Y-axis of this plot is multivariate and can be highly dimensional. Several visualization representations can be used to summarize trends in a multivariate-Y versus X. One approach from the RRPP package are illustrated here.
summary(manova(Y~TotalLength))
## Df Pillai approx F num Df den Df Pr(>F)
## TotalLength 1 0.55629 19.903 8 127 < 2.2e-16 ***
## Residuals 134
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.reg <- lm.rrpp(Y~TotalLength, data = mydat, print.progress = FALSE)
anova(model.reg)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## TotalLength 1 0.38116 0.38116 0.26599 48.559 5.6649 0.001 **
## Residuals 134 1.05182 0.00785 0.73401
## Total 135 1.43298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Y ~ TotalLength, data = mydat, print.progress = FALSE)
### Visualizing multivariate regression
plot(model.reg, type = "regression", reg.type = "RegScore",
predictor = mydat$TotalLength, pch=19)
Multivariate analysis of covariance contains both continuous and categorical X-variables. Here one must first test for homogeneity in Y~X slopes across groups. If these are not statistically distinguishable, a common slope model may be used to compare group means while accounting for a common slope.
On the other hand, if the slopes are different, one must perform a pairwise comparison of slopes test to determine how the slopes differ (rather than the group mean test).
As with multivariate regression, visualizing these trends is challenging. RRPP has several options (derived from the literature): two of which are shown here.
#MANCOVA
summary(manova(lm(Y~TotalLength*sex*surv)))
## Df Pillai approx F num Df den Df Pr(>F)
## TotalLength 1 0.63862 26.7287 8 121 < 2.2e-16 ***
## sex 1 0.41791 10.8590 8 121 1.924e-11 ***
## surv 1 0.26227 5.3771 8 121 8.593e-06 ***
## TotalLength:sex 1 0.09667 1.6186 8 121 0.1263
## TotalLength:surv 1 0.02795 0.4348 8 121 0.8981
## sex:surv 1 0.09295 1.5499 8 121 0.1471
## TotalLength:sex:surv 1 0.01526 0.2343 8 121 0.9838
## Residuals 128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova(lm(Y~TotalLength+sex*surv))) # FIT COMMON SLOPE
## Df Pillai approx F num Df den Df Pr(>F)
## TotalLength 1 0.62878 26.2545 8 124 < 2.2e-16 ***
## sex 1 0.40635 10.6098 8 124 2.859e-11 ***
## surv 1 0.26117 5.4791 8 124 6.315e-06 ***
## sex:surv 1 0.08125 1.3708 8 124 0.2157
## Residuals 131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#MANCOVA via RRPP
model.mancova <- lm.rrpp(Y~TotalLength*sex*surv, data =mydat, print.progress = FALSE)
anova(model.mancova)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## TotalLength 1 0.38116 0.38116 0.26599 53.0290 5.8033 0.001 **
## sex 1 0.03336 0.03336 0.02328 4.6406 2.4586 0.010 *
## surv 1 0.06762 0.06762 0.04719 9.4075 3.5580 0.001 **
## TotalLength:sex 1 0.00554 0.00554 0.00386 0.7702 0.0070 0.505
## TotalLength:surv 1 0.00449 0.00449 0.00313 0.6246 -0.2631 0.596
## sex:surv 1 0.01942 0.01942 0.01355 2.7015 1.7321 0.042 *
## TotalLength:sex:surv 1 0.00137 0.00137 0.00096 0.1905 -1.8356 0.970
## Residuals 128 0.92003 0.00719 0.64204
## Total 135 1.43298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Y ~ TotalLength * sex * surv, data = mydat, print.progress = FALSE)
#Pairwise comparisons of slopes: NOTE: Used here for illustrative purposes only!
# Interaction should show evidence justifying this analysis
PW.mancova <- pairwise(model.mancova, groups = SexBySurv, covariate = TotalLength, print.progress = FALSE)
summary(PW.mancova, test.type = "VC", angle.type = "deg")
##
## Pairwise comparisons
##
## Groups: f FALSE f TRUE m FALSE m TRUE
##
## RRPP: 1000 permutations
##
## Slopes (vectors of variate change per one unit of covariate
## change, by group):
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise statistics based on slopes vector correlations (r)
## and angles, acos(r)
## The null hypothesis is that r = 1 (parallel vectors).
## This null hypothesis is better treated as the angle
## between vectors = 0
## r angle UCL (95%) Z Pr > angle
## f FALSE:f TRUE 0.9900078 8.106449 39.18516 -2.1209205 0.983
## f FALSE:m FALSE 0.9925400 7.002909 25.00201 -2.2438546 0.981
## f FALSE:m TRUE 0.9475137 18.645708 35.35471 -0.5622322 0.713
## f TRUE:m FALSE 0.9784336 11.920935 40.77940 -1.5571648 0.946
## f TRUE:m TRUE 0.9465036 18.825875 41.25513 -0.4911424 0.698
## m FALSE:m TRUE 0.9587392 16.516249 31.89484 -0.2167071 0.592
### Visualizing MANCOVA
plot(model.mancova, type = "regression", reg.type = "RegScore",
predictor = mydat$TotalLength, pch=19, col = as.numeric(groups))
plot(model.mancova, type = "regression", reg.type = "PredLine",
predictor = mydat$TotalLength, pch=19,
col = as.numeric(groups))
Here we perform a series of analyses on a high-dimensional dataset, containing 54 specimens and 112 variables describing the body shapes of fish. Several RRPP analyses are shown, which remind us of the strength of these permutation-based methods.
First, a MANOVA via RRPP, including pairwise comparisons and graphical visualization of results.
## Df SS MS Rsq F Z Pr(>F)
## Pop 1 0.008993 0.0089927 0.15964 16.076 3.9997 0.001 **
## Sex 1 0.015917 0.0159169 0.28255 28.453 4.1103 0.001 **
## Pop:Sex 1 0.003453 0.0034532 0.06130 6.173 3.7015 0.001 **
## Residuals 50 0.027970 0.0005594 0.49651
## Total 53 0.056333
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons
##
## Groups: F.Marsh M.Marsh F.Sinkhole M.Sinkhole
##
## RRPP: 1000 permutations
##
## LS means:
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise distances between means, plus statistics
## d UCL (95%) Z Pr > d
## F.Marsh:M.Marsh 0.04611590 0.04297378 2.3159417 0.007
## F.Marsh:F.Sinkhole 0.03302552 0.03312516 1.6124629 0.055
## F.Marsh:M.Sinkhole 0.03881514 0.04633821 -0.5421145 0.699
## M.Marsh:F.Sinkhole 0.04605211 0.05506197 -0.2523753 0.597
## M.Marsh:M.Sinkhole 0.02802087 0.03343901 0.1854026 0.420
## F.Sinkhole:M.Sinkhole 0.02568508 0.04364031 -2.2111968 0.993
Next we perform MANCOVA to include body size as a covariate. Here, pairwise comparisons are of multivariate slopes. Visualization of these high-dimensional allometry trends are found via approaches by Drake & Klingenberg (2008) and Adams & Nistri (2010).
## Df SS MS Rsq F Z Pr(>F)
## logSize 1 0.014019 0.0140193 0.24886 29.2078 5.0041 0.001 **
## Pop 1 0.010247 0.0102472 0.18190 21.3491 4.7949 0.001 **
## Sex 1 0.005028 0.0050284 0.08926 10.4763 4.5388 0.001 **
## logSize:Pop 1 0.001653 0.0016528 0.02934 3.4434 2.6259 0.004 **
## logSize:Sex 1 0.001450 0.0014498 0.02574 3.0206 2.2721 0.014 *
## Pop:Sex 1 0.001370 0.0013696 0.02431 2.8534 2.2866 0.010 *
## logSize:Pop:Sex 1 0.000486 0.0004865 0.00864 1.0135 0.3337 0.382
## Residuals 46 0.022079 0.0004800 0.39194
## Total 53 0.056333
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons
##
## Groups: F.Marsh M.Marsh F.Sinkhole M.Sinkhole
##
## RRPP: 1000 permutations
##
## Slopes (vectors of variate change per one unit of covariate
## change, by group):
## Vectors hidden (use show.vectors = TRUE to view)
##
## Pairwise statistics based on slopes vector correlations (r)
## and angles, acos(r)
## The null hypothesis is that r = 1 (parallel vectors).
## This null hypothesis is better treated as the angle
## between vectors = 0
## r angle UCL (95%) Z Pr > angle
## F.Marsh:M.Marsh 0.6139591 52.12367 95.40553 -1.0051685 0.844
## F.Marsh:F.Sinkhole 0.6318267 50.81498 88.71850 -0.8268874 0.782
## F.Marsh:M.Sinkhole 0.4461018 63.50614 116.45215 -1.3459215 0.904
## M.Marsh:F.Sinkhole 0.7175129 44.15048 77.31288 -1.4269023 0.926
## M.Marsh:M.Sinkhole 0.5629975 55.73665 74.08894 0.5136053 0.316
## F.Sinkhole:M.Sinkhole 0.4627734 62.43378 81.64708 0.4261192 0.342
If one does not have multivariate normal data (ie, not multivariate continuous), MANOVA via RRPP may still be utilized to evaluate such hypotheses. Here a distance matrix is used as the input Y-data. NOTE: one must use an appropriate distance measure for the data type under investigation (see lecture).
mydat$Ydist <- dist(Y)
mancova.dist <- lm.rrpp (Ydist~TotalLength*sex*surv, data =mydat, print.progress = FALSE)
anova(mancova.dist)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## TotalLength 1 0.38116 0.38116 0.26599 53.0290 5.8033 0.001 **
## sex 1 0.03336 0.03336 0.02328 4.6406 2.4586 0.010 *
## surv 1 0.06762 0.06762 0.04719 9.4075 3.5580 0.001 **
## TotalLength:sex 1 0.00554 0.00554 0.00386 0.7702 0.0070 0.505
## TotalLength:surv 1 0.00449 0.00449 0.00313 0.6246 -0.2631 0.596
## sex:surv 1 0.01942 0.01942 0.01355 2.7015 1.7321 0.042 *
## TotalLength:sex:surv 1 0.00137 0.00137 0.00096 0.1905 -1.8356 0.970
## Residuals 128 0.92003 0.00719 0.64204
## Total 135 1.43298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Ydist ~ TotalLength * sex * surv, data = mydat,
## print.progress = FALSE)
A common scenario in community ecology is to have a data matrix comprised of species abundances across sites. Typically, each species is a column (variable), and the rows are sites. One goal is to determine whether community richness patterns differ across sites in a particular manner (e.g., islands vs. continents, or across some ecological gradient). Effectively, these are ANOVA and regression-type hypotheses, but the response data are in the form of counts, not continuous variables. There is a large literature on what to do in such cases, and several implementations for analysis are possible.
One school uses matrices of log(abundance+1) or Bray-Curtis distances analyzed using linear models (LM) via permutational MANOVA (e.g., Anderson).
Another approach is to use generalized linear models (GLM with poisson or other link-function) on each species separately, followed by evaluating \(\small\sum{F}\)-ratio found across species (e.g., Wharton).
There are strengths and weaknesses to both approaches, and both have advantages. GLM can be accommodating to specific data types but LM is more robust to departures from assumptions. Importantly, it is suggested that residual resampling (i.e., RRPP!) cures a lot of LM/GLM ills. Future theoretical work on RRPP will investigate this more thoroughly.
Here we show a simple example using each of these 3 options:
data("spider") #species abundance data across localities
dim(spider$abund)
## [1] 28 12
# transformation of raw data (log+1)
spid <- data.frame(spider$x)
spid$abund <- as.matrix(spider$abund)
spid$logY <- as.matrix(log(spider$abund + 1)) # log-transform
### 1: MANOVA via RRPP on log(Y)
fitLM <- lm.rrpp(logY ~ soil.dry + bare.sand + fallen.leaves + moss
+ herb.layer + reflection,
data = spid, iter = 999, SS.type = "II",
print.progress = FALSE)
anova(fitLM)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type II
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## soil.dry 1 46.08 46.076 0.083788 6.8128 3.03685 0.001 **
## bare.sand 1 10.23 10.227 0.018597 1.5121 0.95185 0.166
## fallen.leaves 1 29.44 29.435 0.053528 4.3523 2.29271 0.009 **
## moss 1 19.50 19.498 0.035457 2.8830 1.75908 0.042 *
## herb.layer 1 13.05 13.053 0.023737 1.9301 1.27281 0.102
## reflection 1 11.36 11.359 0.020656 1.6795 1.10126 0.135
## Residuals 21 142.03 6.763 0.258270
## Total 27 549.91
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = logY ~ soil.dry + bare.sand + fallen.leaves + moss +
## herb.layer + reflection, iter = 999, SS.type = "II", data = spid,
## print.progress = FALSE)
pca <- plot(fitLM, type = "PC")
text(pca$PC.points, rownames(pca$PC.points), pos = 1, cex = 0.5)
### 2: MANOVA via RRPP on Bray-Curtis distance (ala Anderson)
Db <- vegdist(spid$abund, method="bray")
fitD <- lm.rrpp(Db ~ soil.dry + bare.sand + fallen.leaves + moss
+ herb.layer + reflection,
data = spid, iter = 999, SS.type = "II",
print.progress = FALSE)
anova(fitD) #pretty similar
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Ordinary Least Squares
## Sums of Squares and Cross-products: Type II
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## soil.dry 1 0.8891 0.88908 0.07082 4.2743 3.5080 0.001 **
## bare.sand 1 0.4640 0.46404 0.03697 2.2309 2.1061 0.016 *
## fallen.leaves 1 0.5586 0.55865 0.04450 2.6857 2.3120 0.008 **
## moss 1 0.5680 0.56800 0.04525 2.7307 2.2711 0.016 *
## herb.layer 1 0.3315 0.33150 0.02641 1.5937 1.3121 0.100 .
## reflection 1 0.2399 0.23994 0.01911 1.1535 0.5277 0.280
## Residuals 21 4.3681 0.20801 0.34797
## Total 27 12.5532
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: lm.rrpp(f1 = Db ~ soil.dry + bare.sand + fallen.leaves + moss +
## herb.layer + reflection, iter = 999, SS.type = "II", data = spid,
## print.progress = FALSE)
### 3: Generalized linear models: mvAbund (ala Wharton)
spiddat <- mvabund(spider$abund)
X <- spider$x
#To fit a log-linear model assuming counts are poisson:
#NOTE: mvabund fits generlized LM to each Y-variable separately, then sums F-values
glm.spid <- manyglm(spiddat~X, family="poisson")
summary(glm.spid) #not the same as above
##
## Test statistics:
## wald value Pr(>wald)
## (Intercept) 17.941 0.000999 ***
## Xsoil.dry 20.142 0.000999 ***
## Xbare.sand 8.057 0.087912 .
## Xfallen.leaves 11.414 0.003996 **
## Xmoss 13.831 0.002997 **
## Xherb.layer 17.671 0.000999 ***
## Xreflection 12.519 0.004995 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Test statistic: 51.38, p-value: 0.000999
## Arguments:
## Test statistics calculated assuming response assumed to be uncorrelated
## P-value calculated using 1000 resampling iterations via pit.trap resampling (to account for correlation in testing).