[R] R's basic lm() and summary.lm functions

Sat May 11 21:02:57 CEST 2013

coeftest with sandwich is indeed powerful and probably faster.  I see
one drawback: it requires at least three more packages: lmtest,
sandwich, and in turn zoo, which do not come with standard R.  I also
wonder whether I committed a bug in my own code, or whether there is
another parameter I need to specify in sandwich.  my standard error
estimates are very similar but not the same.

if anyone wants to use my code, I am dropping a revised version in
below.  it also uses a better way to document the function inline.
eval(parse(text=(docs[["lm"]][["examples"]]))) is nice.  you get what
you pay for.

more generally, I am still wondering why we have an lm and a
summary.lm function, rather than just one function and one resulting
object for parsimony, given that the summary.lm function is fast and
does not increase the object size.

----
Ivo Welch (ivo.welch at gmail.com)

docs[["lm"]] <- c(
  author='ivo.welch at gmail.com',
  date='2013',
  description='add newey-west and standardized coefficients to lm(),
and return the summary.lm',
  usage='lm(..., newey.west=0, stdcoefs=TRUE)',
  arguments='',
  details='
    Note that when y or x have different observations, the
coef(lm(y~x))*sd(x)/sd(y)
    calculations are different from a scale(y) ~ scale(x) regression.

    Note that the NeweyWest statistics I get from the sandwich package
are different.
    could be my bug, or could be my problem specifying an additional parameter.
    Here is my code

      library(sandwich)
      library(lmtest)
      print(coeftest(lmo, vcov = NeweyWest, lag=0, prewhite=FALSE))
',
  seealso='stats:::lm, stats:::summary.lm',
  examples='
    x <- rnorm(12); y <- rnorm(12); z <- rnorm(12); x[2] <-NA;
    lm( y ~ x + z )
',
  test='eval(parse(text=(docs[["lm"]][["examples"]])))',
  changes= '',
  version='0.91'
)

################################################################

lm <- function (..., x = FALSE, newey.west=(0), stdcoefs=TRUE) {

  ## R is painfully error-tolerant. I prefer reasonable and immediate
error warnings.
  stopifnot( (is.vector(newey.west))&(length(newey.west)==1)|(is.numeric(newey.west))
)
  stopifnot( (is.vector(stdcoefs))&(length(stdcoefs)==1)|(is.logical(stdcoefs))
)
  stopifnot( (is.vector(x))&(length(x)==1)|(is.logical(x)) )
  ## I wish I could check lm()'s argument, but I cannot.

  lmo <- stats:::lm(..., x=TRUE)
  ## note that both the x matrix and the residuals from the model have
their NA's omitted by default

  if (newey.west>=0) {
    resids <- residuals(lmo)
    diagband.matrix <- function(m, ar.terms) {
      nomit <- m - ar.terms - 1
      mm <- matrix(TRUE, nrow = m, ncol = m)
      mm[1:nomit, (ncol(mm) - nomit + 1):ncol(mm)] <-
(lower.tri(matrix(TRUE, nrow = nomit, ncol = nomit)))
      mm[(ncol(mm) - nomit + 1):ncol(mm), 1:nomit] <-
(upper.tri(matrix(TRUE, nrow = nomit, ncol = nomit)))
      mm
    }
    invx <- chol2inv(chol(crossprod(lmo$x)))
    invx.x <- invx %*% t(lmo$x)
    if (newey.west==0)
      resid.matrix <- diag(resids^2)
    else {
      full <- resids %*% t(resids)
      maskmatrix <- diagband.matrix(length(resids), newey.west)
      resid.matrix <- full * maskmatrix
    }
    vmat <- invx.x %*% resid.matrix %*% t(invx.x)

    nw <- newey.west  ## the number of AR terms
    nw.se <- sqrt(diag(vmat))  ## the standard errors
  }

  if (stdcoefs) {
    xsd <- apply( lmo$x, 2, sd)
    ysd <- sd( lmo$model[,1] )
    stdcoefs.v <- lmo$coefficients*xsd/ysd
  }

  full.x.matrix <- if (x) lmo$x else NULL
  lmo <- stats:::summary.lm(lmo)  ## add the summary.lm object
  if (x) lmo$x <- full.x.matrix

  if (stdcoefs) {
    lmo$coefficients <- cbind(lmo$coefficients, stdcoefs.v )
    colnames(lmo$coefficients)[ncol(lmo$coefficients)] <- "stdcoefs"
  }

  if (newey.west>=0) {
    lmo$coefficients <- cbind(lmo$coefficients, nw.se)
    colnames(lmo$coefficients)[ncol(lmo$coefficients)] <-
paste0("se.nw(",newey.west,")")
    lmo$coefficients <- cbind(lmo$coefficients, lmo$coefficients[,1]/nw.se)
    colnames(lmo$coefficients)[ncol(lmo$coefficients)] <-
paste0("T.nw(",newey.west,")")
  }

  lmo

}

attr(lm, "original") <- stats:::lm