[R] Change log(J) to log(J+1) to stop log(0) from occurring in harModel
cursethiscure
caolan.harvey6 at mail.dcu.ie
Thu Jul 19 18:15:26 CEST 2012
I think the code is part of the RTAQ package but is not included in it, as I
obtained it from
https://r-forge.r-project.org/scm/viewvc.php/pkg/RTAQ/R/HAR_model.R?view=markup&root=blotter&sortby=author&pathrev=1028.
It is not my code and I make no claim to other's good work, and apologize if
I should even be posting it I am not sure, but in the transform function it
allows to change the model to `log` or `sqrt`, but when then model is
changed to log and the model used is either "HARRVJ" or "HARRVCJ" it will
return the following error:
x = harModel(dat, periods = c(1,5,22), periodsJ=c(1), RVest =
c("RCov","RBPCov"),
+ type="HARRVJ", h=22, transform="log") ; # Estimate ....
[TRUNCATED]
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
NA/NaN/Inf in foreign function call (arg 1)
which is due to it taking the log(0), the actual model should take log(J +
1) in case of a 0 value for the J in the time series, but unfortunately I do
not know how to rectify this. I was wondering if any one could tell me how
how I can achieve this as I am very naive with R still.
# Get the matrix for estimation of linear model
maxp = max(periods,periodsJ); #max number of aggregation levels
if(!is.null(leverage)){ maxp = max(maxp,leverage) }
n = length(RM1); #Number of Days
# Aggregate RV:
RVmatrix1 = aggRV(RM1,periods);
if( nest==2 ){ RVmatrix2 = aggRV(RM2,periods); } # In case a jumprobust
estimator is supplied
# Aggregate and subselect y:
y = aggY(RM1,h,maxp);
# Only keep useful parts:
x1 = RVmatrix1[(maxp:(n-h)),];
if( nest==2 ){ x2 = RVmatrix2[(maxp:(n-h)),]; } # In case a jumprobust
estimator is supplied
# Jumps:
if(type!="HARRV"){ # If model type is as such that you need jump component
J = pmax( RM1 - RM2,0 ); # Jump contributions should be positive
J = aggJ(J,periodsJ);
}
if( !is.null(leverage) ){
if( sum(data<0) == 0 ){ warning("You cannot use leverage variables in
the model in case your input consists of Realized Measures") }
# Get close-to-close returns
e = apply.daily(data,sum); #Sum logreturns daily
# Get the rmins:
rmintemp = pmin(e,0);
# Aggregate everything:
rmin = aggRV(rmintemp,periods=leverage,type="Rmin");
# Select:
rmin = rmin[(maxp:(n-h)),];
}else{ rmin = matrix(ncol=0,nrow=dim(x1)[1]) }
###############################
# Estimate the model parameters, according to type of model :
# First model type: traditional HAR-RV:
if( type == "HARRV" ){
if(!is.null(transform)){ y = Ftransform(y); x1 = Ftransform(x1) }
x1 = cbind(x1,rmin);
model = estimhar(y=y,x=x1);
model$transform = transform; model$h = h; model$type = "HARRV";
model$dates = alldates[(maxp+h):n];
class(model) = c("harModel","lm");
return( model )
} #End HAR-RV if cond
if( type == "HARRVJ" ){
J = J[(maxp:(n-h)),];
x = cbind(x1,J); # bind jumps to RV data
if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }
x = cbind(x,rmin);
model = estimhar(y=y,x=x);
model$transform = transform; model$h = h; model$type = "HARRVJ";
model$dates = alldates[(maxp+h):n];
class(model) = c("harModel","lm");
return( model )
}#End HAR-RV-J if cond
if( type == "HARRVCJ" ){
# Are the jumps significant? if not set to zero:
if( jumptest=="ABDJumptest" ){
TQ = apply.daily(data, TQfun);
J = J[,1];
teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ );
}else{ jtest = match.fun(jumptest); teststats = jtest(data,...) }
Jindicators = teststats > qnorm(1-alpha);
J[!Jindicators] = 0;
# Get continuus components if necessary RV measures if necessary:
Cmatrix = matrix( nrow = dim(RVmatrix1)[1], ncol = 1 );
Cmatrix[Jindicators,] = RVmatrix2[Jindicators,1]; #Fill with
robust one in case of jump
Cmatrix[(!Jindicators)] = RVmatrix1[(!Jindicators),1]; #Fill with
non-robust one in case of no-jump
# Aggregate again:
Cmatrix <- aggRV(Cmatrix,periods,type="C");
Jmatrix <- aggJ(J,periodsJ);
# subset again:
Cmatrix <- Cmatrix[(maxp:(n-h)),];
Jmatrix <- Jmatrix[(maxp:(n-h)),];
x = cbind(Cmatrix,Jmatrix); # bind jumps to RV data
if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }
x = cbind(x,rmin);
model = estimhar( y=y, x=x );
model$transform = transform; model$h = h; model$type = "HARRVCJ";
model$dates = alldates[(maxp+h):n];
class(model) = c("harModel","lm");
return(model)
}
} #End function harModel
#################################################################
estimhar = function(y, x){ #Potentially add stuff here
colnames(y)="y";
output = lm( formula(y~x), data=cbind(y,x));
}
# Help function to get nicely formatted formula's for print/summary
methods..
getHarmodelformula = function(x){
modelnames = colnames(x$model$x);
if(!is.null(x$transform)){
modelnames = paste(x$transform,"(",modelnames,")",sep=""); } #Added
visual tingie for plotting transformed RV
betas = paste("beta",(1:length(modelnames)),"",sep="")
betas2 = paste(" + ",betas,"*")
rightside = paste(betas2, modelnames,collapse="");
h = x$h;
left = paste("RV",h,sep="");
if(!is.null(x$transform)){ left = paste(x$transform,"(",left,")",sep="" )
}
modeldescription = paste(left,"= beta0",rightside);
return(list(modeldescription,betas))
}
aggRV <- function(RM1,periods,type="RV"){
n = length(RM1);
nperiods = length(periods);
RVmatrix1 = matrix(nrow=n,ncol=nperiods);
for(i in 1:nperiods){
if(periods[i]==1){ RVmatrix1[,i] = RM1;
}else{ RVmatrix1[(periods[i]:n),i] =
rollmean(x=RM1,k=periods[i],align="left") }
} #end loop over periods for standard RV estimator
colnames(RVmatrix1) = paste(type,periods,sep="");
return(RVmatrix1);
}
aggJ <- function( J, periodsJ ){
n = length(J);
nperiods = length(periodsJ);
JM = matrix(nrow=n,ncol=nperiods);
for(i in 1:nperiods){
if(periodsJ[i]==1){ JM[,i] = J;
}else{ JM[(periodsJ[i]:n),i] = rollmean( x=J, k=periodsJ[i],
align="left") }
} # End loop over periods for standard RV estimator
colnames(JM) = paste("J",periodsJ,sep="");
return(JM)
}
aggY = function(RM1,h,maxp){
n = length(RM1);
if( h == 1 ){ y = RM1[(maxp+1):n]; }
if( h != 1 ){
y = matrix( nrow=length(RM1), ncol=1 ); colnames(y) = "y";
y[(h:n),] = rollmean(x=RM1,k=h,align="left");
y = matrix(y[((maxp+h):n),],ncol=1); y=as.data.frame(y) }
return(y);
}
#########################################################################
# Print method for harmodel:
print.harModel = function(x, digits = max(3, getOption("digits") - 3), ...){
formula = getHarmodelformula(x); modeldescription = formula[[1]]; betas =
formula[[2]];
cat("\nModel:\n", paste(modeldescription, sep = "\n", collapse = "\n"),
"\n\n", sep = "")
coefs = coef(x);
names(coefs) = c("beta0",betas)
if (length(coef(x))){
cat("Coefficients:\n")
print.default(format(coefs, digits = digits), print.gap = 2,quote =
FALSE);
cat("\n\n");
Rs = summary(x)[c("r.squared", "adj.r.squared")]
zz = c(Rs$r.squared,Rs$adj.r.squared);
names(zz) = c("r.squared","adj.r.squared")
print.default((format(zz,digits=digits) ),print.gap = 2,quote=FALSE)
}
else cat("No coefficients\n")
cat("\n")
invisible(x)
}
summary.harModel = function(object, correlation = FALSE, symbolic.cor =
FALSE,...){
x=object;
dd = summary.lm(x);
formula = getHarmodelformula(x); modeldescription = formula[[1]]; betas =
formula[[2]];
dd$call = modeldescription;
rownames(dd$coefficients) = c("beta0",betas);
return(dd)
}
plot.harModel = function(x, which = c(1L:3L, 5L), caption = list("Residuals
vs Fitted",
"Normal
Q-Q", "Scale-Location", "Cook's distance", "Residuals vs Leverage",
expression("Cook's dist vs Leverage " * h[ii]/(1 - h[ii]))),
panel = if (add.smooth) panel.smooth else points,
sub.caption = NULL,
main = "", ask = prod(par("mfcol")) < length(which)
&& dev.interactive(),
..., id.n = 3, labels.id = names(residuals(x)),
cex.id = 0.75,
qqline = TRUE, cook.levels = c(0.5, 1), add.smooth
= getOption("add.smooth"),
label.pos = c(4, 2), cex.caption = 1){
observed = x$model$y;
fitted = x$fitted.values;
dates = x$dates;
dates = as.POSIXct(dates);
observed = xts(observed, order.by=dates);
fitted = xts(fitted, order.by=dates);
type = x$type;
g_range = range(fitted,observed)
g_range[1] = 0.95*g_range[1]; g_range[2]= 1.05 * g_range[2];
#ind = seq(1,length(fitted),length.out=5);
title = paste("Observed and forecasted RV based on HAR Model:",type);
plot.zoo(observed,col="red",lwd=2,main=title,
ylim=g_range,xlab="Time",ylab="Realized Volatility");
# axis(1,time(b)[ind], format(time(b)[ind],), las=2, cex.axis=0.8); not
used anymore
# axis(2);
lines(fitted,col="blue",lwd=2);
legend("topleft", c("Observed RV","Forecasted RV"), cex=1.1,
col=c("red","blue"),lty=1, lwd=2, bty="n");
}
I have tried changing
if( type == "HARRVJ" ){
J = J[(maxp:(n-h)),];
*x = cbind(x1,J); * # bind jumps to RV data
if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }
x = cbind(x,rmin);
model = estimhar(y=y,x=x);
model$transform = transform; model$h = h; model$type = "HARRVJ";
model$dates = alldates[(maxp+h):n];
class(model) = c("harModel","lm");
return( model )
}#End HAR-RV-J if cond
to
if( type == "HARRVJ" ){
J = J[(maxp:(n-h)),];
* x = cbind(x1,J+1);* # bind jumps to RV data
if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }
x = cbind(x,rmin);
model = estimhar(y=y,x=x);
model$transform = transform; model$h = h; model$type = "HARRVJ";
model$dates = alldates[(maxp+h):n];
class(model) = c("harModel","lm");
return( model )
}#End HAR-RV-J if cond
# and this
if( type == "HARRVCJ" ){
# Are the jumps significant? if not set to zero:
if( jumptest=="ABDJumptest" ){
TQ = apply.daily(data, TQfun);
J = J[,1];
teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ );
}else{ jtest = match.fun(jumptest); teststats = jtest(data,...) }
Jindicators = teststats > qnorm(1-alpha);
* J[!Jindicators] = 0;*
# to
if( type == "HARRVCJ" ){
# Are the jumps significant? if not set to zero:
if( jumptest=="ABDJumptest" ){
TQ = apply.daily(data, TQfun);
J = J[,1];
teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ );
}else{ jtest = match.fun(jumptest); teststats = jtest(data,...) }
Jindicators = teststats > qnorm(1-alpha);
*J[!Jindicators] = 1;*
but it returns to large of scores when the regressions are run. Note I would
change it back to how it is for `transform=NULL` and `transform="sqrt" as
they compute fine for this.
I don't think the log tests can work without the change though.
--
View this message in context: http://r.789695.n4.nabble.com/Change-log-J-to-log-J-1-to-stop-log-0-from-occurring-in-harModel-tp4637072.html
Sent from the R help mailing list archive at Nabble.com.
More information about the R-help
mailing list