Matrices and the Linear Model

Motivation

Today’s lab has two objectives. First we will remind ourselves of matrix algebra computations in R, and derive the linear model from it. Next we will code multiple implementationf of LM in R, and compare their computational performance. First some basic matrix operations (reviewed from week 1):

bumpus<-read.csv("Data/bumpus.csv",header=T)
bumpus.data<-as.matrix(bumpus[,(5:13)])  #note: 'as.matrix' forces data type
  cor(bumpus.data)    #correlation  matrix

##            TL        AE        WT       BHL        HL        FL       TTL
## TL  1.0000000 0.6909709 0.5838648 0.4694466 0.4846190 0.4447051 0.3776146
## AE  0.6909709 1.0000000 0.5686500 0.4990738 0.6779536 0.5782836 0.5316798
## WT  0.5838648 0.5686500 1.0000000 0.5192088 0.5188943 0.4441451 0.4544589
## BHL 0.4694466 0.4990738 0.5192088 1.0000000 0.6229937 0.6164174 0.5843728
## HL  0.4846190 0.6779536 0.5188943 0.6229937 1.0000000 0.8205803 0.7460385
## FL  0.4447051 0.5782836 0.4441451 0.6164174 0.8205803 1.0000000 0.8092996
## TTL 0.3776146 0.5316798 0.4544589 0.5843728 0.7460385 0.8092996 1.0000000
## SW  0.4355363 0.4338913 0.4714846 0.5347534 0.5120226 0.5212480 0.4586951
## SKL 0.5008898 0.5801525 0.5126353 0.4903663 0.5486094 0.4536862 0.3840558
##            SW       SKL
## TL  0.4355363 0.5008898
## AE  0.4338913 0.5801525
## WT  0.4714846 0.5126353
## BHL 0.5347534 0.4903663
## HL  0.5120226 0.5486094
## FL  0.5212480 0.4536862
## TTL 0.4586951 0.3840558
## SW  1.0000000 0.3840089
## SKL 0.3840089 1.0000000

  vcv.bumpus<-var(bumpus.data)    #covariance matrix
  var(bumpus.data)

##             TL         AE        WT       BHL        HL        FL       TTL
## TL  12.6795207 13.5840959 3.0670370 1.1741068 1.0115508 0.9698705 1.3915631
## AE  13.5840959 30.4816993 4.6314815 1.9353268 2.1940952 1.9554680 3.0378926
## WT   3.0670370  4.6314815 2.1762593 0.5379815 0.4487145 0.4013012 0.6938292
## BHL  1.1741068  1.9353268 0.5379815 0.4933328 0.2565013 0.2651766 0.4247786
## HL   1.0115508  2.1940952 0.4487145 0.2565013 0.3436147 0.2946099 0.4525847
## FL   0.9698705  1.9554680 0.4013012 0.2651766 0.2946099 0.3751289 0.5129822
## TTL  1.3915631  3.0378926 0.6938292 0.4247786 0.4525847 0.5129822 1.0710392
## SW   0.5907160  0.9124383 0.2649267 0.1430627 0.1143215 0.1216011 0.1808130
## SKL  1.7962310  3.2257474 0.7616096 0.3468640 0.3238678 0.2798430 0.4002816
##            SW       SKL
## TL  0.5907160 1.7962310
## AE  0.9124383 3.2257474
## WT  0.2649267 0.7616096
## BHL 0.1430627 0.3468640
## HL  0.1143215 0.3238678
## FL  0.1216011 0.2798430
## TTL 0.1808130 0.4002816
## SW  0.1450794 0.1473034
## SKL 0.1473034 1.0142317

# Elementary matrix algebra
a<-matrix(c(1,0,4,2,-1,1),nrow=3)
b<-matrix(c(1,-1,2,1,1,0),nrow=2)
a

##      [,1] [,2]
## [1,]    1    2
## [2,]    0   -1
## [3,]    4    1

##      [,1] [,2] [,3]
## [1,]    1    2    1
## [2,]   -1    1    0

c<-t(a) #matrix transpose
a

##      [,1] [,2]
## [1,]    1    2
## [2,]    0   -1
## [3,]    4    1

##      [,1] [,2] [,3]
## [1,]    1    0    4
## [2,]    2   -1    1

d<-matrix(c(2,3,1,4),nrow=2)
det(d)  #matrix determinant

## [1] 5

exp(determinant(d)$modulus)  #another way

## [1] 5
## attr(,"logarithm")
## [1] TRUE

2*a #scalar multiplication

##      [,1] [,2]
## [1,]    2    4
## [2,]    0   -2
## [3,]    8    2

#Matrix addition and subtraction #NOTE: dimensions must agree 
b+c

##      [,1] [,2] [,3]
## [1,]    2    2    5
## [2,]    1    0    1

b-c

##      [,1] [,2] [,3]
## [1,]    0    2   -3
## [2,]   -3    2   -1

#elementwise multiplication (Hadamard product)
c

##      [,1] [,2] [,3]
## [1,]    1    0    4
## [2,]    2   -1    1

##      [,1] [,2] [,3]
## [1,]    1    2    1
## [2,]   -1    1    0

c*b

##      [,1] [,2] [,3]
## [1,]    1    0    4
## [2,]   -2   -1    0

diag(5)   #identity matrix with 5 rows/cols

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    0    0    0    0
## [2,]    0    1    0    0    0
## [3,]    0    0    1    0    0
## [4,]    0    0    0    1    0
## [5,]    0    0    0    0    1

# matrix multiplication
a%*%b       ## %*% is symbol for matrix multiplication

##      [,1] [,2] [,3]
## [1,]   -1    4    1
## [2,]    1   -1    0
## [3,]    3    9    4

b%*%a       ## matrix order matters

##      [,1] [,2]
## [1,]    5    1
## [2,]   -1   -3

# matrix inversion
d

##      [,1] [,2]
## [1,]    2    1
## [2,]    3    4

solve(d)

##      [,1] [,2]
## [1,]  0.8 -0.2
## [2,] -0.6  0.4

solve(vcv.bumpus)

##              TL          AE          WT         BHL         HL          FL
## TL   0.17682481 -0.05977137 -0.10157980 -0.07938958  0.1440015 -0.11052836
## AE  -0.05977137  0.09423566 -0.02716889  0.03820688 -0.3213803  0.01161768
## WT  -0.10157980 -0.02716889  0.89242823 -0.26790315 -0.2008471  0.38530152
## BHL -0.07938958  0.03820688 -0.26790315  4.21820525 -0.7300801 -0.73369062
## HL   0.14400150 -0.32138035 -0.20084712 -0.73008011 12.3145520 -5.38537340
## FL  -0.11052836  0.01161768  0.38530152 -0.73369062 -5.3853734 11.75236989
## TTL  0.06914392 -0.03202389 -0.20482705 -0.45464361 -1.1073817 -2.93456590
## SW  -0.14535562  0.02540986 -0.48403467 -1.42381878 -0.5085213 -1.53005436
## SKL -0.04129415 -0.07815621 -0.20325013 -0.40056242 -0.7678904 -0.02221795
##             TTL          SW         SKL
## TL   0.06914392 -0.14535562 -0.04129415
## AE  -0.03202389  0.02540986 -0.07815621
## WT  -0.20482705 -0.48403467 -0.20325013
## BHL -0.45464361 -1.42381878 -0.40056242
## HL  -1.10738172 -0.50852128 -0.76789039
## FL  -2.93456590 -1.53005436 -0.02221795
## TTL  3.01685639  0.05773773  0.25296602
## SW   0.05773773 11.27314186 -0.04848164
## SKL  0.25296602 -0.04848164  1.75583187

### example of redundant variables causing singularities
a <- rnorm(10)
b <-rnorm(10)
c <- a+b
cov.dat <- cov(cbind(a,b,c))
solve(cov.dat)

## Error in solve.default(cov.dat): system is computationally singular: reciprocal condition number = 1.10775e-17

# Generalized Inverse
library(MASS)
ginv(vcv.bumpus)        #one way of dealing with singular covariance matrices (when P>N, or when there are redundancies in the data)

##              [,1]        [,2]        [,3]        [,4]       [,5]        [,6]
##  [1,]  0.17682481 -0.05977137 -0.10157980 -0.07938958  0.1440015 -0.11052836
##  [2,] -0.05977137  0.09423566 -0.02716889  0.03820688 -0.3213803  0.01161768
##  [3,] -0.10157980 -0.02716889  0.89242823 -0.26790315 -0.2008471  0.38530152
##  [4,] -0.07938958  0.03820688 -0.26790315  4.21820525 -0.7300801 -0.73369062
##  [5,]  0.14400150 -0.32138035 -0.20084712 -0.73008011 12.3145520 -5.38537340
##  [6,] -0.11052836  0.01161768  0.38530152 -0.73369062 -5.3853734 11.75236989
##  [7,]  0.06914392 -0.03202389 -0.20482705 -0.45464361 -1.1073817 -2.93456590
##  [8,] -0.14535562  0.02540986 -0.48403467 -1.42381878 -0.5085213 -1.53005436
##  [9,] -0.04129415 -0.07815621 -0.20325013 -0.40056242 -0.7678904 -0.02221795
##              [,7]        [,8]        [,9]
##  [1,]  0.06914392 -0.14535562 -0.04129415
##  [2,] -0.03202389  0.02540986 -0.07815621
##  [3,] -0.20482705 -0.48403467 -0.20325013
##  [4,] -0.45464361 -1.42381878 -0.40056242
##  [5,] -1.10738172 -0.50852128 -0.76789039
##  [6,] -2.93456590 -1.53005436 -0.02221795
##  [7,]  3.01685639  0.05773773  0.25296602
##  [8,]  0.05773773 11.27314186 -0.04848164
##  [9,]  0.25296602 -0.04848164  1.75583187

### Matrix decomposition
#eigen-analysis
eigen(vcv.bumpus)           #decomposition of square-symmetric matrices

## eigen() decomposition
## $values
## [1] 39.81095374  5.41931081  1.56831442  0.88729297  0.59244459  0.23716043
## [7]  0.12349428  0.08429277  0.05664207
## 
## $vectors
##              [,1]        [,2]       [,3]        [,4]         [,5]         [,6]
##  [1,] -0.46421751  0.86423430  0.1715483 -0.08310303  0.018084648  0.026025763
##  [2,] -0.85587725 -0.48496835  0.1585133  0.06700653  0.041936781 -0.014974720
##  [3,] -0.14925589  0.11742918 -0.7477391  0.58472942  0.236729456  0.064403818
##  [4,] -0.06078428  0.02343696 -0.2539225 -0.21072835 -0.091077596 -0.890352786
##  [5,] -0.06392372 -0.02953856 -0.1804244 -0.23164163 -0.033910269 -0.020098616
##  [6,] -0.05821513 -0.01637147 -0.2024073 -0.34341107  0.025530449  0.004005409
##  [7,] -0.08979713 -0.04432935 -0.4206564 -0.65296219  0.212956971  0.368029461
##  [8,] -0.02895361  0.01724848 -0.1026446 -0.06261810 -0.005179982 -0.176945163
##  [9,] -0.09811776  0.01275622 -0.2466677  0.02047989 -0.941487692  0.187209000
##               [,7]         [,8]         [,9]
##  [1,] -0.003055381  0.012467160 -0.009853393
##  [2,] -0.020661587 -0.009236065  0.012974169
##  [3,]  0.020688399  0.038471990  0.028531844
##  [4,] -0.257259487  0.139602551  0.013355882
##  [5,]  0.586155553  0.295196611 -0.690331220
##  [6,]  0.569855554  0.115640453  0.706163706
##  [7,] -0.435446521 -0.090096639 -0.090180534
##  [8,]  0.263011538 -0.932439620 -0.120216865
##  [9,] -0.076491068 -0.026748739  0.030547343

#singular-value decomposition
svd(vcv.bumpus)         #Same results as 'eigen'

## $d
## [1] 39.81095374  5.41931081  1.56831442  0.88729297  0.59244459  0.23716043
## [7]  0.12349428  0.08429277  0.05664207
## 
## $u
##              [,1]        [,2]       [,3]        [,4]         [,5]         [,6]
##  [1,] -0.46421751  0.86423430  0.1715483 -0.08310303 -0.018084648 -0.026025763
##  [2,] -0.85587725 -0.48496835  0.1585133  0.06700653 -0.041936781  0.014974720
##  [3,] -0.14925589  0.11742918 -0.7477391  0.58472942 -0.236729456 -0.064403818
##  [4,] -0.06078428  0.02343696 -0.2539225 -0.21072835  0.091077596  0.890352786
##  [5,] -0.06392372 -0.02953856 -0.1804244 -0.23164163  0.033910269  0.020098616
##  [6,] -0.05821513 -0.01637147 -0.2024073 -0.34341107 -0.025530449 -0.004005409
##  [7,] -0.08979713 -0.04432935 -0.4206564 -0.65296219 -0.212956971 -0.368029461
##  [8,] -0.02895361  0.01724848 -0.1026446 -0.06261810  0.005179982  0.176945163
##  [9,] -0.09811776  0.01275622 -0.2466677  0.02047989  0.941487692 -0.187209000
##               [,7]         [,8]         [,9]
##  [1,]  0.003055381 -0.012467160  0.009853393
##  [2,]  0.020661587  0.009236065 -0.012974169
##  [3,] -0.020688399 -0.038471990 -0.028531844
##  [4,]  0.257259487 -0.139602551 -0.013355882
##  [5,] -0.586155553 -0.295196611  0.690331220
##  [6,] -0.569855554 -0.115640453 -0.706163706
##  [7,]  0.435446521  0.090096639  0.090180534
##  [8,] -0.263011538  0.932439620  0.120216865
##  [9,]  0.076491068  0.026748739 -0.030547343
## 
## $v
##              [,1]        [,2]       [,3]        [,4]         [,5]         [,6]
##  [1,] -0.46421751  0.86423430  0.1715483 -0.08310303 -0.018084648 -0.026025763
##  [2,] -0.85587725 -0.48496835  0.1585133  0.06700653 -0.041936781  0.014974720
##  [3,] -0.14925589  0.11742918 -0.7477391  0.58472942 -0.236729456 -0.064403818
##  [4,] -0.06078428  0.02343696 -0.2539225 -0.21072835  0.091077596  0.890352786
##  [5,] -0.06392372 -0.02953856 -0.1804244 -0.23164163  0.033910269  0.020098616
##  [6,] -0.05821513 -0.01637147 -0.2024073 -0.34341107 -0.025530449 -0.004005409
##  [7,] -0.08979713 -0.04432935 -0.4206564 -0.65296219 -0.212956971 -0.368029461
##  [8,] -0.02895361  0.01724848 -0.1026446 -0.06261810  0.005179982  0.176945163
##  [9,] -0.09811776  0.01275622 -0.2466677  0.02047989  0.941487692 -0.187209000
##               [,7]         [,8]         [,9]
##  [1,]  0.003055381 -0.012467160  0.009853393
##  [2,]  0.020661587  0.009236065 -0.012974169
##  [3,] -0.020688399 -0.038471990 -0.028531844
##  [4,]  0.257259487 -0.139602551 -0.013355882
##  [5,] -0.586155553 -0.295196611  0.690331220
##  [6,] -0.569855554 -0.115640453 -0.706163706
##  [7,]  0.435446521  0.090096639  0.090180534
##  [8,] -0.263011538  0.932439620  0.120216865
##  [9,]  0.076491068  0.026748739 -0.030547343

#svd(bumpus.data)           #Can also decompose rectangular matrices (NOT run)

1: The Linear Model in Matrix Form

Here we use matrix algebra to accomplish linear regression and anova. The steps require fitting a full and reduced model to obtain parameters, fitted values, residuals, and SS. We show that these are identical to values obtained using ‘lm’: thus they are equivant ‘under the hood.’

#Some data
x<-matrix(1,10)
x2<-matrix(c(1,1,1,1,1,0,0,0,0,0))
xnew<-cbind(x,x2)
y<-matrix(c(5,4,4,4,3,7,5,6,6,6))
yreg<-matrix(c(1,3,4,6,8,7,9,11,10,13))
xreg<-matrix(c(1,2,3,4,5,6,7,8,9,10))
xnewreg<-cbind(x,xreg)

#ANOVA
model1<-lm(y~x2)
  model.matrix(model1)

##    (Intercept) x2
## 1            1  1
## 2            1  1
## 3            1  1
## 4            1  1
## 5            1  1
## 6            1  0
## 7            1  0
## 8            1  0
## 9            1  0
## 10           1  0
## attr(,"assign")
## [1] 0 1

b<-solve(t(xnew)%*%xnew)%*%t(xnew)%*%y
summary(model1)

## 
## Call:
## lm(formula = y ~ x2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##     -1      0      0      0      1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.0000     0.3162  18.974 6.16e-08 ***
## x2           -2.0000     0.4472  -4.472  0.00208 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7071 on 8 degrees of freedom
## Multiple R-squared:  0.7143, Adjusted R-squared:  0.6786 
## F-statistic:    20 on 1 and 8 DF,  p-value: 0.002077

##      [,1]
## [1,]    6
## [2,]   -2

predict(model1)

##  1  2  3  4  5  6  7  8  9 10 
##  4  4  4  4  4  6  6  6  6  6

yhat<-xnew%*%b
yhat

##       [,1]
##  [1,]    4
##  [2,]    4
##  [3,]    4
##  [4,]    4
##  [5,]    4
##  [6,]    6
##  [7,]    6
##  [8,]    6
##  [9,]    6
## [10,]    6

SSFull<-t(y-yhat)%*%(y-yhat)
bred<-solve(t(x)%*%x)%*%t(x)%*%y
yhatred<-x%*%bred
yhatred

##       [,1]
##  [1,]    5
##  [2,]    5
##  [3,]    5
##  [4,]    5
##  [5,]    5
##  [6,]    5
##  [7,]    5
##  [8,]    5
##  [9,]    5
## [10,]    5

SSRed<-t(y-yhatred)%*%(y-yhatred) #Is SSTot
SSModel<-SSRed-SSFull
anova(model1)

## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## x2         1     10    10.0      20 0.002077 **
## Residuals  8      4     0.5                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

SSModel

##      [,1]
## [1,]   10

SSFull

##      [,1]
## [1,]    4

SSRed

##      [,1]
## [1,]   14

#Regression
model2<-lm(yreg~xreg)
breg<-solve(t(xnewreg)%*%xnewreg)%*%t(xnewreg)%*%yreg
summary(model2)

## 
## Call:
## lm(formula = yreg ~ xreg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.44242 -0.60152  0.01212  0.55000  1.40606 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.53333    0.61043   0.874    0.408    
## xreg         1.21212    0.09838  12.321 1.75e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8936 on 8 degrees of freedom
## Multiple R-squared:  0.9499, Adjusted R-squared:  0.9437 
## F-statistic: 151.8 on 1 and 8 DF,  p-value: 1.753e-06

breg

##           [,1]
## [1,] 0.5333333
## [2,] 1.2121212

predict(model2)

##         1         2         3         4         5         6         7         8 
##  1.745455  2.957576  4.169697  5.381818  6.593939  7.806061  9.018182 10.230303 
##         9        10 
## 11.442424 12.654545

yhatreg<-xnewreg%*%breg
yhatreg

##            [,1]
##  [1,]  1.745455
##  [2,]  2.957576
##  [3,]  4.169697
##  [4,]  5.381818
##  [5,]  6.593939
##  [6,]  7.806061
##  [7,]  9.018182
##  [8,] 10.230303
##  [9,] 11.442424
## [10,] 12.654545

SSFullreg<-t(yreg-yhatreg)%*%(yreg-yhatreg)
bredreg<-solve(t(x)%*%x)%*%t(x)%*%yreg
yhatredreg<-x%*%bredreg
yhatredreg

##       [,1]
##  [1,]  7.2
##  [2,]  7.2
##  [3,]  7.2
##  [4,]  7.2
##  [5,]  7.2
##  [6,]  7.2
##  [7,]  7.2
##  [8,]  7.2
##  [9,]  7.2
## [10,]  7.2

SSRedreg<-t(yreg-yhatredreg)%*%(yreg-yhatredreg) #Is SSTot
SSModelreg<-SSRedreg-SSFullreg
anova(model2)

## Analysis of Variance Table
## 
## Response: yreg
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## xreg       1 121.212 121.212   151.8 1.753e-06 ***
## Residuals  8   6.388   0.798                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

SSModelreg

##          [,1]
## [1,] 121.2121

SSFullreg

##          [,1]
## [1,] 6.387879

SSRedreg

##       [,1]
## [1,] 127.6

2: Multiple implementations of LM

In R there are often multiple ways to implement the same analysis. One question is how they compare computationally. Linear models serve as an example. Below are five (yes 5) different ways to implement a regression. We then compare them for different sized data (both N-objects and P-variables):

library(microbenchmark)

x<-matrix(rnorm(10000),ncol=2)
xf<-cbind(1,x)
y<-matrix(rnorm(nrow(x)))

lm(y~x)  #Common method

## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)           x1           x2  
##   -0.006864    -0.007409     0.009480

solve(t(xf)%*%xf)%*%t(xf)%*%y

##              [,1]
## [1,] -0.006863857
## [2,] -0.007408796
## [3,]  0.009479634

crossprod(solve(crossprod(xf)),crossprod(xf,y))

##              [,1]
## [1,] -0.006863857
## [2,] -0.007408796
## [3,]  0.009479634

lm.fit(xf,y)$coefficients

##           x1           x2           x3 
## -0.006863857 -0.007408796  0.009479634

.lm.fit(xf,y)$coefficients  ### NOTE: a very low-level function (cannot use in packages submitted to CRAN)

## [1] -0.006863857 -0.007408796  0.009479634

qr.coef(qr(xf),y)

##              [,1]
## [1,] -0.006863857
## [2,] -0.007408796
## [3,]  0.009479634

microbenchmark(
  lm(y~x),
  solve(t(xf)%*%xf)%*%t(xf)%*%y,
  crossprod(solve(crossprod(xf)),crossprod(xf,y)),
  lm.fit(xf,y),.lm.fit(xf,y),
  qr.coef(qr(xf),y)
)

## Unit: microseconds
##                                               expr     min        lq      mean
##                                          lm(y ~ x) 975.999 1282.6655 1932.4458
##                solve(t(xf) %*% xf) %*% t(xf) %*% y 192.000  243.6925  386.1826
##  crossprod(solve(crossprod(xf)), crossprod(xf, y))  90.257  110.1540  151.8771
##                                      lm.fit(xf, y) 288.821  403.0770  874.2231
##                                     .lm.fit(xf, y) 198.564  228.1030  318.4451
##                                 qr.coef(qr(xf), y) 191.590  261.9485  674.4202
##     median        uq       max neval cld
##  1705.6395 2170.4600  9885.941   100   c
##   344.4105  439.3845  1985.230   100 ab 
##   126.9740  150.5640   641.230   100 a  
##   494.1535  654.1535 27566.747   100  b 
##   277.5385  363.6925  1304.205   100 ab 
##   330.2565  514.2570 19389.113   100 ab

###NOTE that the best implementation can change with the size of the data matrix
#Large X univ. Y
x<-matrix(rnorm(10000),ncol=50)
xf<-cbind(1,x)
y<-matrix(rnorm(nrow(x)))
microbenchmark(
  lm(y~x),
  solve(t(xf)%*%xf)%*%t(xf)%*%y,
  crossprod(solve(crossprod(xf)),crossprod(xf,y)),
  lm.fit(xf,y),.lm.fit(xf,y),
  qr.coef(qr(xf),y)
)

## Unit: microseconds
##                                               expr      min        lq      mean
##                                          lm(y ~ x) 1020.307 1255.3840 1734.7476
##                solve(t(xf) %*% xf) %*% t(xf) %*% y  735.179  822.5630 1132.2906
##  crossprod(solve(crossprod(xf)), crossprod(xf, y))  393.846  443.0770  536.3896
##                                      lm.fit(xf, y)  470.564  529.4355  651.6428
##                                     .lm.fit(xf, y)  425.436  456.0000  562.3012
##                                 qr.coef(qr(xf), y)  447.180  518.3585  738.5966
##     median        uq      max neval  cld
##  1564.9220 1935.9985 6239.174   100    d
##   954.8715 1270.7680 4534.150   100   c 
##   481.2305  606.7685 1705.435   100 a   
##   588.1020  711.1790 1312.410   100 ab  
##   492.9225  613.3335 1699.281   100 ab  
##   609.6405  758.5635 6146.457   100  b

##Large Y univ. X
y<-matrix(rnorm(10000),ncol=100)
x<-matrix(rnorm(nrow(y)))
xf<-cbind(1,x)
microbenchmark(
  lm(y~x),
  solve(t(xf)%*%xf)%*%t(xf)%*%y,
  crossprod(solve(crossprod(xf)),crossprod(xf,y)),
  lm.fit(xf,y),.lm.fit(xf,y),
  qr.coef(qr(xf),y)
)

## Unit: microseconds
##                                               expr     min      lq      mean
##                                          lm(y ~ x) 745.025 872.205 1247.5357
##                solve(t(xf) %*% xf) %*% t(xf) %*% y  49.641  60.308   82.1540
##  crossprod(solve(crossprod(xf)), crossprod(xf, y))  38.564  45.744   74.3345
##                                      lm.fit(xf, y) 157.129 186.666  311.8359
##                                     .lm.fit(xf, y) 118.564 128.205  172.4350
##                                 qr.coef(qr(xf), y)  85.743 108.923  166.4084
##     median        uq      max neval cld
##  1003.4870 1355.2815 5920.406   100   c
##    72.2055   98.4615  282.666   100 a  
##    58.4620   81.4360  468.513   100 a  
##   248.0005  312.6150 3359.587   100  b 
##   147.6920  187.8970  578.872   100 a  
##   133.7435  182.5640  438.564   100 a

#large Y and X
y<-matrix(rnorm(20000),ncol=100)
x<-matrix(rnorm(10000),ncol=50)
xf<-cbind(1,x)
microbenchmark(
  lm(y~x),
  solve(t(xf)%*%xf)%*%t(xf)%*%y,
  crossprod(solve(crossprod(xf)),crossprod(xf,y)),
  lm.fit(xf,y),.lm.fit(xf,y),
  qr.coef(qr(xf),y)
)

## Unit: milliseconds
##                                               expr      min       lq     mean
##                                          lm(y ~ x) 3.993433 4.215791 5.077473
##                solve(t(xf) %*% xf) %*% t(xf) %*% y 1.258666 1.299897 1.648947
##  crossprod(solve(crossprod(xf)), crossprod(xf, y)) 1.453948 1.501537 1.712368
##                                      lm.fit(xf, y) 3.182767 3.265023 3.787197
##                                     .lm.fit(xf, y) 3.118357 3.174357 3.556219
##                                 qr.coef(qr(xf), y) 1.812102 1.865024 3.081651
##    median       uq       max neval cld
##  4.638150 5.697432  8.556301   100   c
##  1.416204 1.712409  3.817433   100 a  
##  1.609024 1.876921  2.549332   100 a  
##  3.522664 3.952818  8.202250   100  bc
##  3.352408 3.764305  6.132508   100  bc
##  2.000204 2.253947 96.022896   100 ab

Matrices and the Linear Model

Dean Adams (dcadams@iastate.edu)

2021-01-19

Motivation

1: The Linear Model in Matrix Form

2: Multiple implementations of LM