## Regression "by hand"

data(mtcars)
View(mtcars)

## model is: mpg ~ wt + hp

## make y
y <- mtcars$mpg

## make X: is this enough?
X0 <- mtcars[ , c("wt","hp")]

## see first 6 rows
head(X0)

## display 3rd row
mtcars[3, ]
mtcars["Datsun 710",]

## subset rows and columns together
mtcars["Fiat 128", c("hp","wt")]

## X must also contain an intercept!
X <- as.matrix(cbind(1, mtcars[ , c("wt","hp")]))

head(X)

## beta.hat OLS
beta.hat <- solve(t(X) %*% X) %*% t(X) %*% y
beta.hat

## canned routine: is the result the same?
summary(lm(mpg ~ wt + hp, data=mtcars))

## estimate SE.beta

## need the residuals:
y.hat <- X %*% beta.hat
u.hat <- y - y.hat

## now estimate the errors' variance using the 
## (corrected) residual variance:
s2 <- t(u.hat) %*% u.hat / (32 - 3)

## check: 
sqrt(s2)
## must be the same as "Residual standard error:"
## from the model summary above

## now make Cov(beta.hat)
cov.b <- as.numeric(s2) * solve(t(X)%*%X)

## it is a 3x3 matrix
cov.b

## from here I get the SE(beta.hat):
se.beta <- sqrt(diag(cov.b))

## check: they must be the same as in the 
## above model summary (up to rounding)
se.beta

## and from these I calculate the t-statistics
## (check again vs. the model summary)
tstats <- beta.hat/se.beta

## ... and the p.values:
2 * pt(abs(tstats), df=32-3, lower.tail=FALSE)