## Regression "by hand" data(mtcars) View(mtcars) ## model is: mpg ~ wt + hp ## make y y <- mtcars$mpg ## make X: is this enough? X0 <- mtcars[ , c("wt","hp")] ## see first 6 rows head(X0) ## display 3rd row mtcars[3, ] mtcars["Datsun 710",] ## subset rows and columns together mtcars["Fiat 128", c("hp","wt")] ## X must also contain an intercept! X <- as.matrix(cbind(1, mtcars[ , c("wt","hp")])) head(X) ## beta.hat OLS beta.hat <- solve(t(X) %*% X) %*% t(X) %*% y beta.hat ## canned routine: is the result the same? summary(lm(mpg ~ wt + hp, data=mtcars)) ## estimate SE.beta ## need the residuals: y.hat <- X %*% beta.hat u.hat <- y - y.hat ## now estimate the errors' variance using the ## (corrected) residual variance: s2 <- t(u.hat) %*% u.hat / (32 - 3) ## check: sqrt(s2) ## must be the same as "Residual standard error:" ## from the model summary above ## now make Cov(beta.hat) cov.b <- as.numeric(s2) * solve(t(X)%*%X) ## it is a 3x3 matrix cov.b ## from here I get the SE(beta.hat): se.beta <- sqrt(diag(cov.b)) ## check: they must be the same as in the ## above model summary (up to rounding) se.beta ## and from these I calculate the t-statistics ## (check again vs. the model summary) tstats <- beta.hat/se.beta ## ... and the p.values: 2 * pt(abs(tstats), df=32-3, lower.tail=FALSE)