[R] Function ar() in package stats: AIC procedure and time series length
Andreas Klein
klein82517 at yahoo.de
Sun Mar 10 21:45:27 CET 2013
Dear R users,
I took a closer look to the AIC-procedure for determining the order m of an VAR(m) process of the function ar() in package stats.
From the original code (source: http://cran.r-project.org/src/base/R-2/R-2.15.3.tar.gz -> R-2.15.3\src\library\stats\R\ar.R -> lines 95 to 98)
[...]
cal.aic <- function() { # omits mean params, that is constant adj
det <- abs(prod(diag(qr(EA)$qr)))
return(n.used * log(det) + 2 * m * nser * nser)
}
[...]
With:
nser: number of time series
n.used: number of observations in one of nser time series
m: number of parameters in one single equation out of nser equations of the VAR(m) model
This function computes "n.used * log(det)..." but in my oppinion it has to be "nser * n.used * log(det)...". Do I understand here something completely wrong?
#Example to show that it makes a difference (I use the original code but shorten it a bit to the multivariate case):
#Read in functions var.yw.aic.original() and var.yw.aic.modified()
var.yw.aic.original <- function (x) {
x <- as.matrix(x)
xm <- colMeans(x)
x <- sweep(x, 2L, xm, check.margin=FALSE)
n.used <- nrow(x)
nser <- ncol(x)
order.max <- floor(10 * log10(n.used))
xacf <- acf(x, type="covariance", lag.max=order.max, plot=FALSE, demean=TRUE)$acf
snames <- colnames(x)
A <- B <- array(0, dim = c(order.max + 1L, nser, nser))
A[1L, , ] <- B[1L, , ] <- diag(nser)
EA <- EB <- xacf[1L, , , drop = TRUE]
xaic <- numeric(order.max + 1L)
solve.yw <- function(m) {
betaA <- betaB <- 0
for (i in 0L:m) {
betaA <- betaA + A[i + 1L, , ] %*% xacf[m + 2L - i, , ]
betaB <- betaB + B[i + 1L, , ] %*% t(xacf[m + 2L - i, , ])
}
KA <- -t(qr.solve(t(EB), t(betaA)))
KB <- -t(qr.solve(t(EA), t(betaB)))
EB <<- (diag(nser) - KB %*% KA) %*% EB
EA <<- (diag(nser) - KA %*% KB) %*% EA
Aold <- A
Bold <- B
for (i in seq_len(m + 1L)) {
A[i + 1L, , ] <<- Aold[i + 1L, , ] + KA %*% Bold[m + 2L - i, , ]
B[i + 1L, , ] <<- Bold[i + 1L, , ] + KB %*% Aold[m + 2L - i, , ]
}
}
cal.aic <- function() {
det <- abs(prod(diag(qr(EA)$qr)))
return(n.used * log(det) + 2 * m * nser * nser)
}
cal.resid <- function() {
resid <- array(0, dim = c(n.used - order, nser))
for (i in 0L:order) resid <- resid + x[(order - i + 1L):(n.used - i), , drop = FALSE] %*% t(ar[i + 1L, , ])
return(rbind(matrix(NA, order, nser), resid))
}
order <- 0L
for (m in 0L:order.max) {
xaic[m + 1L] <- cal.aic()
if(xaic[m + 1L] == min(xaic[seq_len(m + 1L)])) {
ar <- A
order <- m
var.pred <- EA * n.used/(n.used - nser * (m + 1L))
}
if(m < order.max) solve.yw(m)
}
xaic <- xaic - min(xaic)
names(xaic) <- 0L:order.max
resid <- cal.resid()
if(order) {
ar <- -ar[2L:(order + 1L), , , drop = FALSE]
dimnames(ar) <- list(seq_len(order), snames, snames)
} else ar <- array(0, dim=c(0L, nser, nser), dimnames=list(NULL, snames, snames))
colnames(resid) <- colnames(x)
order
}
var.yw.aic.modified <- function (x) {
x <- as.matrix(x)
xm <- colMeans(x)
x <- sweep(x, 2L, xm, check.margin=FALSE)
n.used <- nrow(x)
nser <- ncol(x)
order.max <- floor(10 * log10(n.used))
xacf <- acf(x, type="covariance", lag.max=order.max, plot=FALSE, demean=TRUE)$acf
snames <- colnames(x)
A <- B <- array(0, dim = c(order.max + 1L, nser, nser))
A[1L, , ] <- B[1L, , ] <- diag(nser)
EA <- EB <- xacf[1L, , , drop = TRUE]
xaic <- numeric(order.max + 1L)
solve.yw <- function(m) {
betaA <- betaB <- 0
for (i in 0L:m) {
betaA <- betaA + A[i + 1L, , ] %*% xacf[m + 2L - i, , ]
betaB <- betaB + B[i + 1L, , ] %*% t(xacf[m + 2L - i, , ])
}
KA <- -t(qr.solve(t(EB), t(betaA)))
KB <- -t(qr.solve(t(EA), t(betaB)))
EB <<- (diag(nser) - KB %*% KA) %*% EB
EA <<- (diag(nser) - KA %*% KB) %*% EA
Aold <- A
Bold <- B
for (i in seq_len(m + 1L)) {
A[i + 1L, , ] <<- Aold[i + 1L, , ] + KA %*% Bold[m + 2L - i, , ]
B[i + 1L, , ] <<- Bold[i + 1L, , ] + KB %*% Aold[m + 2L - i, , ]
}
}
cal.aic <- function() {
det <- abs(prod(diag(qr(EA)$qr)))
return(nser * n.used * log(det) + 2 * m * nser * nser)
}
cal.resid <- function() {
resid <- array(0, dim = c(n.used - order, nser))
for (i in 0L:order) resid <- resid + x[(order - i + 1L):(n.used - i), , drop = FALSE] %*% t(ar[i + 1L, , ])
return(rbind(matrix(NA, order, nser), resid))
}
order <- 0L
for (m in 0L:order.max) {
xaic[m + 1L] <- cal.aic()
if(xaic[m + 1L] == min(xaic[seq_len(m + 1L)])) {
ar <- A
order <- m
var.pred <- EA * n.used/(n.used - nser * (m + 1L))
}
if(m < order.max) solve.yw(m)
}
xaic <- xaic - min(xaic)
names(xaic) <- 0L:order.max
resid <- cal.resid()
if(order) {
ar <- -ar[2L:(order + 1L), , , drop = FALSE]
dimnames(ar) <- list(seq_len(order), snames, snames)
} else ar <- array(0, dim=c(0L, nser, nser), dimnames=list(NULL, snames, snames))
colnames(resid) <- colnames(x)
order
}
#Data for the example
x <- c(-0.00658, 0.00802, 0.00642, 0.00412, -0.00618, 0.00462, -0.00168, -0.00148, -0.00108, -0.00608, -0.00058, -0.00318, -0.00528, -0.00218, -0.00688, 0.00022, 0.00472, -0.00028, 0.00362, 0.00122, 0.00852, -0.00118)
y <- c(-0.0046, 0.0018, 0.0088, 0.0051, -0.0020, 0.0018, 0.0083, -0.0012, 0.0096, -0.0014, -0.0035, -0.0107, -0.0052, -0.0027, -0.0095, 0.0008, 0.0070, -0.0018, 0.0031, -0.0019, 0.0097, -0.0022)
#Calculations
ar(cbind(x, y))$order #m=0
var.yw.aic.original(cbind(x, y)) #m=0
var.yw.aic.modified(cbind(x, y)) #m=13
Is my understanding of the function calc.aic() inside ar() wrong? I hope someone can help me to understand why ar() uses "n.used" instead of "nser * n.used"?
Best
Andreas
More information about the R-help
mailing list