Today’s lab has two objectives. First we will remind ourselves of matrix algebra computations in R, and derive the linear model from it. Next we will code multiple implementationf of LM in R, and compare their computational performance. First some basic matrix operations (reviewed from week 1):
bumpus<-read.csv("Data/bumpus.csv",header=T)
bumpus.data<-as.matrix(bumpus[,(5:13)]) #note: 'as.matrix' forces data type
cor(bumpus.data) #correlation matrix
## TL AE WT BHL HL FL TTL
## TL 1.0000000 0.6909709 0.5838648 0.4694466 0.4846190 0.4447051 0.3776146
## AE 0.6909709 1.0000000 0.5686500 0.4990738 0.6779536 0.5782836 0.5316798
## WT 0.5838648 0.5686500 1.0000000 0.5192088 0.5188943 0.4441451 0.4544589
## BHL 0.4694466 0.4990738 0.5192088 1.0000000 0.6229937 0.6164174 0.5843728
## HL 0.4846190 0.6779536 0.5188943 0.6229937 1.0000000 0.8205803 0.7460385
## FL 0.4447051 0.5782836 0.4441451 0.6164174 0.8205803 1.0000000 0.8092996
## TTL 0.3776146 0.5316798 0.4544589 0.5843728 0.7460385 0.8092996 1.0000000
## SW 0.4355363 0.4338913 0.4714846 0.5347534 0.5120226 0.5212480 0.4586951
## SKL 0.5008898 0.5801525 0.5126353 0.4903663 0.5486094 0.4536862 0.3840558
## SW SKL
## TL 0.4355363 0.5008898
## AE 0.4338913 0.5801525
## WT 0.4714846 0.5126353
## BHL 0.5347534 0.4903663
## HL 0.5120226 0.5486094
## FL 0.5212480 0.4536862
## TTL 0.4586951 0.3840558
## SW 1.0000000 0.3840089
## SKL 0.3840089 1.0000000
vcv.bumpus<-var(bumpus.data) #covariance matrix
var(bumpus.data)
## TL AE WT BHL HL FL TTL
## TL 12.6795207 13.5840959 3.0670370 1.1741068 1.0115508 0.9698705 1.3915631
## AE 13.5840959 30.4816993 4.6314815 1.9353268 2.1940952 1.9554680 3.0378926
## WT 3.0670370 4.6314815 2.1762593 0.5379815 0.4487145 0.4013012 0.6938292
## BHL 1.1741068 1.9353268 0.5379815 0.4933328 0.2565013 0.2651766 0.4247786
## HL 1.0115508 2.1940952 0.4487145 0.2565013 0.3436147 0.2946099 0.4525847
## FL 0.9698705 1.9554680 0.4013012 0.2651766 0.2946099 0.3751289 0.5129822
## TTL 1.3915631 3.0378926 0.6938292 0.4247786 0.4525847 0.5129822 1.0710392
## SW 0.5907160 0.9124383 0.2649267 0.1430627 0.1143215 0.1216011 0.1808130
## SKL 1.7962310 3.2257474 0.7616096 0.3468640 0.3238678 0.2798430 0.4002816
## SW SKL
## TL 0.5907160 1.7962310
## AE 0.9124383 3.2257474
## WT 0.2649267 0.7616096
## BHL 0.1430627 0.3468640
## HL 0.1143215 0.3238678
## FL 0.1216011 0.2798430
## TTL 0.1808130 0.4002816
## SW 0.1450794 0.1473034
## SKL 0.1473034 1.0142317
# Elementary matrix algebra
a<-matrix(c(1,0,4,2,-1,1),nrow=3)
b<-matrix(c(1,-1,2,1,1,0),nrow=2)
a
## [,1] [,2]
## [1,] 1 2
## [2,] 0 -1
## [3,] 4 1
b
## [,1] [,2] [,3]
## [1,] 1 2 1
## [2,] -1 1 0
c<-t(a) #matrix transpose
a
## [,1] [,2]
## [1,] 1 2
## [2,] 0 -1
## [3,] 4 1
c
## [,1] [,2] [,3]
## [1,] 1 0 4
## [2,] 2 -1 1
d<-matrix(c(2,3,1,4),nrow=2)
det(d) #matrix determinant
## [1] 5
exp(determinant(d)$modulus) #another way
## [1] 5
## attr(,"logarithm")
## [1] TRUE
2*a #scalar multiplication
## [,1] [,2]
## [1,] 2 4
## [2,] 0 -2
## [3,] 8 2
#Matrix addition and subtraction #NOTE: dimensions must agree
b+c
## [,1] [,2] [,3]
## [1,] 2 2 5
## [2,] 1 0 1
b-c
## [,1] [,2] [,3]
## [1,] 0 2 -3
## [2,] -3 2 -1
#elementwise multiplication (Hadamard product)
c
## [,1] [,2] [,3]
## [1,] 1 0 4
## [2,] 2 -1 1
b
## [,1] [,2] [,3]
## [1,] 1 2 1
## [2,] -1 1 0
c*b
## [,1] [,2] [,3]
## [1,] 1 0 4
## [2,] -2 -1 0
diag(5) #identity matrix with 5 rows/cols
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 0 0 0 0
## [2,] 0 1 0 0 0
## [3,] 0 0 1 0 0
## [4,] 0 0 0 1 0
## [5,] 0 0 0 0 1
# matrix multiplication
a%*%b ## %*% is symbol for matrix multiplication
## [,1] [,2] [,3]
## [1,] -1 4 1
## [2,] 1 -1 0
## [3,] 3 9 4
b%*%a ## matrix order matters
## [,1] [,2]
## [1,] 5 1
## [2,] -1 -3
# matrix inversion
d
## [,1] [,2]
## [1,] 2 1
## [2,] 3 4
solve(d)
## [,1] [,2]
## [1,] 0.8 -0.2
## [2,] -0.6 0.4
solve(vcv.bumpus)
## TL AE WT BHL HL FL
## TL 0.17682481 -0.05977137 -0.10157980 -0.07938958 0.1440015 -0.11052836
## AE -0.05977137 0.09423566 -0.02716889 0.03820688 -0.3213803 0.01161768
## WT -0.10157980 -0.02716889 0.89242823 -0.26790315 -0.2008471 0.38530152
## BHL -0.07938958 0.03820688 -0.26790315 4.21820525 -0.7300801 -0.73369062
## HL 0.14400150 -0.32138035 -0.20084712 -0.73008011 12.3145520 -5.38537340
## FL -0.11052836 0.01161768 0.38530152 -0.73369062 -5.3853734 11.75236989
## TTL 0.06914392 -0.03202389 -0.20482705 -0.45464361 -1.1073817 -2.93456590
## SW -0.14535562 0.02540986 -0.48403467 -1.42381878 -0.5085213 -1.53005436
## SKL -0.04129415 -0.07815621 -0.20325013 -0.40056242 -0.7678904 -0.02221795
## TTL SW SKL
## TL 0.06914392 -0.14535562 -0.04129415
## AE -0.03202389 0.02540986 -0.07815621
## WT -0.20482705 -0.48403467 -0.20325013
## BHL -0.45464361 -1.42381878 -0.40056242
## HL -1.10738172 -0.50852128 -0.76789039
## FL -2.93456590 -1.53005436 -0.02221795
## TTL 3.01685639 0.05773773 0.25296602
## SW 0.05773773 11.27314186 -0.04848164
## SKL 0.25296602 -0.04848164 1.75583187
### example of redundant variables causing singularities
a <- rnorm(10)
b <-rnorm(10)
c <- a+b
cov.dat <- cov(cbind(a,b,c))
solve(cov.dat)
## Error in solve.default(cov.dat): system is computationally singular: reciprocal condition number = 1.10775e-17
# Generalized Inverse
library(MASS)
ginv(vcv.bumpus) #one way of dealing with singular covariance matrices (when P>N, or when there are redundancies in the data)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.17682481 -0.05977137 -0.10157980 -0.07938958 0.1440015 -0.11052836
## [2,] -0.05977137 0.09423566 -0.02716889 0.03820688 -0.3213803 0.01161768
## [3,] -0.10157980 -0.02716889 0.89242823 -0.26790315 -0.2008471 0.38530152
## [4,] -0.07938958 0.03820688 -0.26790315 4.21820525 -0.7300801 -0.73369062
## [5,] 0.14400150 -0.32138035 -0.20084712 -0.73008011 12.3145520 -5.38537340
## [6,] -0.11052836 0.01161768 0.38530152 -0.73369062 -5.3853734 11.75236989
## [7,] 0.06914392 -0.03202389 -0.20482705 -0.45464361 -1.1073817 -2.93456590
## [8,] -0.14535562 0.02540986 -0.48403467 -1.42381878 -0.5085213 -1.53005436
## [9,] -0.04129415 -0.07815621 -0.20325013 -0.40056242 -0.7678904 -0.02221795
## [,7] [,8] [,9]
## [1,] 0.06914392 -0.14535562 -0.04129415
## [2,] -0.03202389 0.02540986 -0.07815621
## [3,] -0.20482705 -0.48403467 -0.20325013
## [4,] -0.45464361 -1.42381878 -0.40056242
## [5,] -1.10738172 -0.50852128 -0.76789039
## [6,] -2.93456590 -1.53005436 -0.02221795
## [7,] 3.01685639 0.05773773 0.25296602
## [8,] 0.05773773 11.27314186 -0.04848164
## [9,] 0.25296602 -0.04848164 1.75583187
### Matrix decomposition
#eigen-analysis
eigen(vcv.bumpus) #decomposition of square-symmetric matrices
## eigen() decomposition
## $values
## [1] 39.81095374 5.41931081 1.56831442 0.88729297 0.59244459 0.23716043
## [7] 0.12349428 0.08429277 0.05664207
##
## $vectors
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.46421751 0.86423430 0.1715483 -0.08310303 0.018084648 0.026025763
## [2,] -0.85587725 -0.48496835 0.1585133 0.06700653 0.041936781 -0.014974720
## [3,] -0.14925589 0.11742918 -0.7477391 0.58472942 0.236729456 0.064403818
## [4,] -0.06078428 0.02343696 -0.2539225 -0.21072835 -0.091077596 -0.890352786
## [5,] -0.06392372 -0.02953856 -0.1804244 -0.23164163 -0.033910269 -0.020098616
## [6,] -0.05821513 -0.01637147 -0.2024073 -0.34341107 0.025530449 0.004005409
## [7,] -0.08979713 -0.04432935 -0.4206564 -0.65296219 0.212956971 0.368029461
## [8,] -0.02895361 0.01724848 -0.1026446 -0.06261810 -0.005179982 -0.176945163
## [9,] -0.09811776 0.01275622 -0.2466677 0.02047989 -0.941487692 0.187209000
## [,7] [,8] [,9]
## [1,] -0.003055381 0.012467160 -0.009853393
## [2,] -0.020661587 -0.009236065 0.012974169
## [3,] 0.020688399 0.038471990 0.028531844
## [4,] -0.257259487 0.139602551 0.013355882
## [5,] 0.586155553 0.295196611 -0.690331220
## [6,] 0.569855554 0.115640453 0.706163706
## [7,] -0.435446521 -0.090096639 -0.090180534
## [8,] 0.263011538 -0.932439620 -0.120216865
## [9,] -0.076491068 -0.026748739 0.030547343
#singular-value decomposition
svd(vcv.bumpus) #Same results as 'eigen'
## $d
## [1] 39.81095374 5.41931081 1.56831442 0.88729297 0.59244459 0.23716043
## [7] 0.12349428 0.08429277 0.05664207
##
## $u
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.46421751 0.86423430 0.1715483 -0.08310303 -0.018084648 -0.026025763
## [2,] -0.85587725 -0.48496835 0.1585133 0.06700653 -0.041936781 0.014974720
## [3,] -0.14925589 0.11742918 -0.7477391 0.58472942 -0.236729456 -0.064403818
## [4,] -0.06078428 0.02343696 -0.2539225 -0.21072835 0.091077596 0.890352786
## [5,] -0.06392372 -0.02953856 -0.1804244 -0.23164163 0.033910269 0.020098616
## [6,] -0.05821513 -0.01637147 -0.2024073 -0.34341107 -0.025530449 -0.004005409
## [7,] -0.08979713 -0.04432935 -0.4206564 -0.65296219 -0.212956971 -0.368029461
## [8,] -0.02895361 0.01724848 -0.1026446 -0.06261810 0.005179982 0.176945163
## [9,] -0.09811776 0.01275622 -0.2466677 0.02047989 0.941487692 -0.187209000
## [,7] [,8] [,9]
## [1,] 0.003055381 -0.012467160 0.009853393
## [2,] 0.020661587 0.009236065 -0.012974169
## [3,] -0.020688399 -0.038471990 -0.028531844
## [4,] 0.257259487 -0.139602551 -0.013355882
## [5,] -0.586155553 -0.295196611 0.690331220
## [6,] -0.569855554 -0.115640453 -0.706163706
## [7,] 0.435446521 0.090096639 0.090180534
## [8,] -0.263011538 0.932439620 0.120216865
## [9,] 0.076491068 0.026748739 -0.030547343
##
## $v
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.46421751 0.86423430 0.1715483 -0.08310303 -0.018084648 -0.026025763
## [2,] -0.85587725 -0.48496835 0.1585133 0.06700653 -0.041936781 0.014974720
## [3,] -0.14925589 0.11742918 -0.7477391 0.58472942 -0.236729456 -0.064403818
## [4,] -0.06078428 0.02343696 -0.2539225 -0.21072835 0.091077596 0.890352786
## [5,] -0.06392372 -0.02953856 -0.1804244 -0.23164163 0.033910269 0.020098616
## [6,] -0.05821513 -0.01637147 -0.2024073 -0.34341107 -0.025530449 -0.004005409
## [7,] -0.08979713 -0.04432935 -0.4206564 -0.65296219 -0.212956971 -0.368029461
## [8,] -0.02895361 0.01724848 -0.1026446 -0.06261810 0.005179982 0.176945163
## [9,] -0.09811776 0.01275622 -0.2466677 0.02047989 0.941487692 -0.187209000
## [,7] [,8] [,9]
## [1,] 0.003055381 -0.012467160 0.009853393
## [2,] 0.020661587 0.009236065 -0.012974169
## [3,] -0.020688399 -0.038471990 -0.028531844
## [4,] 0.257259487 -0.139602551 -0.013355882
## [5,] -0.586155553 -0.295196611 0.690331220
## [6,] -0.569855554 -0.115640453 -0.706163706
## [7,] 0.435446521 0.090096639 0.090180534
## [8,] -0.263011538 0.932439620 0.120216865
## [9,] 0.076491068 0.026748739 -0.030547343
#svd(bumpus.data) #Can also decompose rectangular matrices (NOT run)
Here we use matrix algebra to accomplish linear regression and anova. The steps require fitting a full and reduced model to obtain parameters, fitted values, residuals, and SS. We show that these are identical to values obtained using ‘lm’: thus they are equivant ‘under the hood.’
#Some data
x<-matrix(1,10)
x2<-matrix(c(1,1,1,1,1,0,0,0,0,0))
xnew<-cbind(x,x2)
y<-matrix(c(5,4,4,4,3,7,5,6,6,6))
yreg<-matrix(c(1,3,4,6,8,7,9,11,10,13))
xreg<-matrix(c(1,2,3,4,5,6,7,8,9,10))
xnewreg<-cbind(x,xreg)
#ANOVA
model1<-lm(y~x2)
model.matrix(model1)
## (Intercept) x2
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 0
## 7 1 0
## 8 1 0
## 9 1 0
## 10 1 0
## attr(,"assign")
## [1] 0 1
b<-solve(t(xnew)%*%xnew)%*%t(xnew)%*%y
summary(model1)
##
## Call:
## lm(formula = y ~ x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1 0 0 0 1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0000 0.3162 18.974 6.16e-08 ***
## x2 -2.0000 0.4472 -4.472 0.00208 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7071 on 8 degrees of freedom
## Multiple R-squared: 0.7143, Adjusted R-squared: 0.6786
## F-statistic: 20 on 1 and 8 DF, p-value: 0.002077
b
## [,1]
## [1,] 6
## [2,] -2
predict(model1)
## 1 2 3 4 5 6 7 8 9 10
## 4 4 4 4 4 6 6 6 6 6
yhat<-xnew%*%b
yhat
## [,1]
## [1,] 4
## [2,] 4
## [3,] 4
## [4,] 4
## [5,] 4
## [6,] 6
## [7,] 6
## [8,] 6
## [9,] 6
## [10,] 6
SSFull<-t(y-yhat)%*%(y-yhat)
bred<-solve(t(x)%*%x)%*%t(x)%*%y
yhatred<-x%*%bred
yhatred
## [,1]
## [1,] 5
## [2,] 5
## [3,] 5
## [4,] 5
## [5,] 5
## [6,] 5
## [7,] 5
## [8,] 5
## [9,] 5
## [10,] 5
SSRed<-t(y-yhatred)%*%(y-yhatred) #Is SSTot
SSModel<-SSRed-SSFull
anova(model1)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x2 1 10 10.0 20 0.002077 **
## Residuals 8 4 0.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SSModel
## [,1]
## [1,] 10
SSFull
## [,1]
## [1,] 4
SSRed
## [,1]
## [1,] 14
#Regression
model2<-lm(yreg~xreg)
breg<-solve(t(xnewreg)%*%xnewreg)%*%t(xnewreg)%*%yreg
summary(model2)
##
## Call:
## lm(formula = yreg ~ xreg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.44242 -0.60152 0.01212 0.55000 1.40606
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.53333 0.61043 0.874 0.408
## xreg 1.21212 0.09838 12.321 1.75e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8936 on 8 degrees of freedom
## Multiple R-squared: 0.9499, Adjusted R-squared: 0.9437
## F-statistic: 151.8 on 1 and 8 DF, p-value: 1.753e-06
breg
## [,1]
## [1,] 0.5333333
## [2,] 1.2121212
predict(model2)
## 1 2 3 4 5 6 7 8
## 1.745455 2.957576 4.169697 5.381818 6.593939 7.806061 9.018182 10.230303
## 9 10
## 11.442424 12.654545
yhatreg<-xnewreg%*%breg
yhatreg
## [,1]
## [1,] 1.745455
## [2,] 2.957576
## [3,] 4.169697
## [4,] 5.381818
## [5,] 6.593939
## [6,] 7.806061
## [7,] 9.018182
## [8,] 10.230303
## [9,] 11.442424
## [10,] 12.654545
SSFullreg<-t(yreg-yhatreg)%*%(yreg-yhatreg)
bredreg<-solve(t(x)%*%x)%*%t(x)%*%yreg
yhatredreg<-x%*%bredreg
yhatredreg
## [,1]
## [1,] 7.2
## [2,] 7.2
## [3,] 7.2
## [4,] 7.2
## [5,] 7.2
## [6,] 7.2
## [7,] 7.2
## [8,] 7.2
## [9,] 7.2
## [10,] 7.2
SSRedreg<-t(yreg-yhatredreg)%*%(yreg-yhatredreg) #Is SSTot
SSModelreg<-SSRedreg-SSFullreg
anova(model2)
## Analysis of Variance Table
##
## Response: yreg
## Df Sum Sq Mean Sq F value Pr(>F)
## xreg 1 121.212 121.212 151.8 1.753e-06 ***
## Residuals 8 6.388 0.798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SSModelreg
## [,1]
## [1,] 121.2121
SSFullreg
## [,1]
## [1,] 6.387879
SSRedreg
## [,1]
## [1,] 127.6
In R there are often multiple ways to implement the same analysis. One question is how they compare computationally. Linear models serve as an example. Below are five (yes 5) different ways to implement a regression. We then compare them for different sized data (both N-objects and P-variables):
library(microbenchmark)
x<-matrix(rnorm(10000),ncol=2)
xf<-cbind(1,x)
y<-matrix(rnorm(nrow(x)))
lm(y~x) #Common method
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x1 x2
## -0.006864 -0.007409 0.009480
solve(t(xf)%*%xf)%*%t(xf)%*%y
## [,1]
## [1,] -0.006863857
## [2,] -0.007408796
## [3,] 0.009479634
crossprod(solve(crossprod(xf)),crossprod(xf,y))
## [,1]
## [1,] -0.006863857
## [2,] -0.007408796
## [3,] 0.009479634
lm.fit(xf,y)$coefficients
## x1 x2 x3
## -0.006863857 -0.007408796 0.009479634
.lm.fit(xf,y)$coefficients ### NOTE: a very low-level function (cannot use in packages submitted to CRAN)
## [1] -0.006863857 -0.007408796 0.009479634
qr.coef(qr(xf),y)
## [,1]
## [1,] -0.006863857
## [2,] -0.007408796
## [3,] 0.009479634
microbenchmark(
lm(y~x),
solve(t(xf)%*%xf)%*%t(xf)%*%y,
crossprod(solve(crossprod(xf)),crossprod(xf,y)),
lm.fit(xf,y),.lm.fit(xf,y),
qr.coef(qr(xf),y)
)
## Unit: microseconds
## expr min lq mean
## lm(y ~ x) 975.999 1282.6655 1932.4458
## solve(t(xf) %*% xf) %*% t(xf) %*% y 192.000 243.6925 386.1826
## crossprod(solve(crossprod(xf)), crossprod(xf, y)) 90.257 110.1540 151.8771
## lm.fit(xf, y) 288.821 403.0770 874.2231
## .lm.fit(xf, y) 198.564 228.1030 318.4451
## qr.coef(qr(xf), y) 191.590 261.9485 674.4202
## median uq max neval cld
## 1705.6395 2170.4600 9885.941 100 c
## 344.4105 439.3845 1985.230 100 ab
## 126.9740 150.5640 641.230 100 a
## 494.1535 654.1535 27566.747 100 b
## 277.5385 363.6925 1304.205 100 ab
## 330.2565 514.2570 19389.113 100 ab
###NOTE that the best implementation can change with the size of the data matrix
#Large X univ. Y
x<-matrix(rnorm(10000),ncol=50)
xf<-cbind(1,x)
y<-matrix(rnorm(nrow(x)))
microbenchmark(
lm(y~x),
solve(t(xf)%*%xf)%*%t(xf)%*%y,
crossprod(solve(crossprod(xf)),crossprod(xf,y)),
lm.fit(xf,y),.lm.fit(xf,y),
qr.coef(qr(xf),y)
)
## Unit: microseconds
## expr min lq mean
## lm(y ~ x) 1020.307 1255.3840 1734.7476
## solve(t(xf) %*% xf) %*% t(xf) %*% y 735.179 822.5630 1132.2906
## crossprod(solve(crossprod(xf)), crossprod(xf, y)) 393.846 443.0770 536.3896
## lm.fit(xf, y) 470.564 529.4355 651.6428
## .lm.fit(xf, y) 425.436 456.0000 562.3012
## qr.coef(qr(xf), y) 447.180 518.3585 738.5966
## median uq max neval cld
## 1564.9220 1935.9985 6239.174 100 d
## 954.8715 1270.7680 4534.150 100 c
## 481.2305 606.7685 1705.435 100 a
## 588.1020 711.1790 1312.410 100 ab
## 492.9225 613.3335 1699.281 100 ab
## 609.6405 758.5635 6146.457 100 b
##Large Y univ. X
y<-matrix(rnorm(10000),ncol=100)
x<-matrix(rnorm(nrow(y)))
xf<-cbind(1,x)
microbenchmark(
lm(y~x),
solve(t(xf)%*%xf)%*%t(xf)%*%y,
crossprod(solve(crossprod(xf)),crossprod(xf,y)),
lm.fit(xf,y),.lm.fit(xf,y),
qr.coef(qr(xf),y)
)
## Unit: microseconds
## expr min lq mean
## lm(y ~ x) 745.025 872.205 1247.5357
## solve(t(xf) %*% xf) %*% t(xf) %*% y 49.641 60.308 82.1540
## crossprod(solve(crossprod(xf)), crossprod(xf, y)) 38.564 45.744 74.3345
## lm.fit(xf, y) 157.129 186.666 311.8359
## .lm.fit(xf, y) 118.564 128.205 172.4350
## qr.coef(qr(xf), y) 85.743 108.923 166.4084
## median uq max neval cld
## 1003.4870 1355.2815 5920.406 100 c
## 72.2055 98.4615 282.666 100 a
## 58.4620 81.4360 468.513 100 a
## 248.0005 312.6150 3359.587 100 b
## 147.6920 187.8970 578.872 100 a
## 133.7435 182.5640 438.564 100 a
#large Y and X
y<-matrix(rnorm(20000),ncol=100)
x<-matrix(rnorm(10000),ncol=50)
xf<-cbind(1,x)
microbenchmark(
lm(y~x),
solve(t(xf)%*%xf)%*%t(xf)%*%y,
crossprod(solve(crossprod(xf)),crossprod(xf,y)),
lm.fit(xf,y),.lm.fit(xf,y),
qr.coef(qr(xf),y)
)
## Unit: milliseconds
## expr min lq mean
## lm(y ~ x) 3.993433 4.215791 5.077473
## solve(t(xf) %*% xf) %*% t(xf) %*% y 1.258666 1.299897 1.648947
## crossprod(solve(crossprod(xf)), crossprod(xf, y)) 1.453948 1.501537 1.712368
## lm.fit(xf, y) 3.182767 3.265023 3.787197
## .lm.fit(xf, y) 3.118357 3.174357 3.556219
## qr.coef(qr(xf), y) 1.812102 1.865024 3.081651
## median uq max neval cld
## 4.638150 5.697432 8.556301 100 c
## 1.416204 1.712409 3.817433 100 a
## 1.609024 1.876921 2.549332 100 a
## 3.522664 3.952818 8.202250 100 bc
## 3.352408 3.764305 6.132508 100 bc
## 2.000204 2.253947 96.022896 100 ab