[R] Change log(J) to log(J+1) to stop log(0) from occurring in harModel

Thu Jul 19 18:15:26 CEST 2012

I think the code is part of the RTAQ package but is not included in it, as I
obtained it from
https://r-forge.r-project.org/scm/viewvc.php/pkg/RTAQ/R/HAR_model.R?view=markup&root=blotter&sortby=author&pathrev=1028. 

It is not my code and I make no claim to other's good work, and apologize if
I should even be posting it I am not sure, but in the transform function it
allows to change the model to `log` or `sqrt`, but when then model is
changed to log and the model used is either "HARRVJ" or "HARRVCJ" it will
return the following error: 

x = harModel(dat, periods = c(1,5,22), periodsJ=c(1), RVest =
c("RCov","RBPCov"), 
+              type="HARRVJ", h=22, transform="log") ; # Estimate ....
[TRUNCATED] 
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) : 
  NA/NaN/Inf in foreign function call (arg 1) 

 which is due to it taking the log(0), the actual model should take log(J +
1) in case of a 0 value for the J in the time series, but unfortunately I do
not know how to rectify this. I was wondering if any one could tell me how
how I can achieve this as I am very naive with R still. 

  # Get the matrix for estimation of linear model 
  maxp      = max(periods,periodsJ); #max number of aggregation levels 
  if(!is.null(leverage)){ maxp = max(maxp,leverage) } 
  n         = length(RM1);  #Number of Days 

  # Aggregate RV: 
  RVmatrix1 = aggRV(RM1,periods); 
  if( nest==2 ){ RVmatrix2 = aggRV(RM2,periods); }  # In case a jumprobust
estimator is supplied 

  # Aggregate and subselect y: 
  y = aggY(RM1,h,maxp); 

  # Only keep useful parts: 
  x1 = RVmatrix1[(maxp:(n-h)),]; 
  if( nest==2 ){ x2 = RVmatrix2[(maxp:(n-h)),]; } # In case a jumprobust
estimator is supplied 

  # Jumps: 
  if(type!="HARRV"){ # If model type is as such that you need jump component 
    J = pmax( RM1 - RM2,0 ); # Jump contributions should be positive 
    J = aggJ(J,periodsJ);         
  } 

  if( !is.null(leverage) ){ 
    if( sum(data<0) == 0 ){ warning("You cannot use leverage variables in
the model in case your input consists of Realized Measures") } 
    # Get close-to-close returns 
    e = apply.daily(data,sum); #Sum logreturns daily     
    # Get the rmins: 
    rmintemp = pmin(e,0);     
    # Aggregate everything: 
    rmin = aggRV(rmintemp,periods=leverage,type="Rmin"); 
    # Select: 
    rmin = rmin[(maxp:(n-h)),]; 
  }else{ rmin = matrix(ncol=0,nrow=dim(x1)[1]) } 

  ############################### 
  # Estimate the model parameters, according to type of model : 
  # First model type: traditional HAR-RV: 
  if( type == "HARRV" ){ 
    if(!is.null(transform)){ y = Ftransform(y); x1 = Ftransform(x1) } 
    x1 = cbind(x1,rmin); 
    model     = estimhar(y=y,x=x1); 
    model$transform = transform; model$h = h; model$type = "HARRV";
model$dates = alldates[(maxp+h):n]; 
    class(model) = c("harModel","lm"); 
    return( model ) 
  } #End HAR-RV if cond 

  if( type == "HARRVJ" ){     
    J = J[(maxp:(n-h)),]; 
    x = cbind(x1,J);              # bind jumps to RV data 
    if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }       
    x = cbind(x,rmin); 
    model = estimhar(y=y,x=x); 
    model$transform = transform; model$h = h; model$type = "HARRVJ";
model$dates = alldates[(maxp+h):n]; 
    class(model) = c("harModel","lm"); 
    return( model )     
  }#End HAR-RV-J if cond 

  if( type == "HARRVCJ" ){ 
    # Are the jumps significant? if not set to zero: 
    if( jumptest=="ABDJumptest" ){ 
      TQ = apply.daily(data, TQfun); 
      J = J[,1]; 
      teststats    = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); 
    }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) }   
    Jindicators  = teststats > qnorm(1-alpha); 
    J[!Jindicators] = 0; 
    # Get continuus components if necessary RV measures if necessary: 
    Cmatrix = matrix( nrow = dim(RVmatrix1)[1], ncol = 1 ); 
    Cmatrix[Jindicators,]    = RVmatrix2[Jindicators,1];      #Fill with
robust one in case of jump 
    Cmatrix[(!Jindicators)]  = RVmatrix1[(!Jindicators),1];   #Fill with
non-robust one in case of no-jump   
    # Aggregate again: 
    Cmatrix <- aggRV(Cmatrix,periods,type="C"); 
    Jmatrix <- aggJ(J,periodsJ); 
    # subset again: 
    Cmatrix <- Cmatrix[(maxp:(n-h)),]; 
    Jmatrix <- Jmatrix[(maxp:(n-h)),];             
    x = cbind(Cmatrix,Jmatrix);               # bind jumps to RV data       
    if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }   
    x = cbind(x,rmin); 
    model = estimhar( y=y, x=x ); 
    model$transform = transform; model$h = h; model$type = "HARRVCJ";
model$dates = alldates[(maxp+h):n];       
    class(model) = c("harModel","lm"); 
    return(model) 
  } 

} #End function harModel 
################################################################# 
estimhar = function(y, x){ #Potentially add stuff here 
  colnames(y)="y"; 
    output = lm( formula(y~x), data=cbind(y,x)); 
} 

# Help function to get nicely formatted formula's for print/summary
methods.. 
getHarmodelformula = function(x){ 
  modelnames = colnames(x$model$x); 
  if(!is.null(x$transform)){ 

    modelnames = paste(x$transform,"(",modelnames,")",sep=""); } #Added
visual tingie for plotting transformed RV 
  betas      = paste("beta",(1:length(modelnames)),"",sep="") 
  betas2     = paste(" + ",betas,"*") 
  rightside  = paste(betas2, modelnames,collapse=""); 
  h = x$h; 
  left = paste("RV",h,sep=""); 
  if(!is.null(x$transform)){  left = paste(x$transform,"(",left,")",sep="" )
} 
  modeldescription = paste(left,"= beta0",rightside); 
  return(list(modeldescription,betas))   
} 

aggRV <- function(RM1,periods,type="RV"){ 
  n = length(RM1); 
  nperiods = length(periods); 
  RVmatrix1 = matrix(nrow=n,ncol=nperiods); 
  for(i in 1:nperiods){ 
    if(periods[i]==1){ RVmatrix1[,i] = RM1; 
    }else{ RVmatrix1[(periods[i]:n),i] =
rollmean(x=RM1,k=periods[i],align="left")  } 
  } #end loop over periods for standard RV estimator 
  colnames(RVmatrix1) = paste(type,periods,sep=""); 
  return(RVmatrix1); 
} 

aggJ <- function( J, periodsJ ){ 
  n = length(J); 
  nperiods = length(periodsJ); 
  JM = matrix(nrow=n,ncol=nperiods); 
  for(i in 1:nperiods){ 
    if(periodsJ[i]==1){ JM[,i] = J; 
    }else{ JM[(periodsJ[i]:n),i] = rollmean( x=J, k=periodsJ[i],
align="left")  } 
  } # End loop over periods for standard RV estimator 
  colnames(JM) = paste("J",periodsJ,sep=""); 
  return(JM) 
} 

aggY = function(RM1,h,maxp){ 
  n         = length(RM1); 
  if( h == 1 ){  y  = RM1[(maxp+1):n]; } 
  if( h != 1 ){ 
    y = matrix( nrow=length(RM1), ncol=1 ); colnames(y) = "y"; 
    y[(h:n),] = rollmean(x=RM1,k=h,align="left"); 
    y = matrix(y[((maxp+h):n),],ncol=1); y=as.data.frame(y) }   
  return(y); 
} 

######################################################################### 
# Print method for harmodel:   
print.harModel = function(x, digits = max(3, getOption("digits") - 3), ...){ 
  formula = getHarmodelformula(x); modeldescription = formula[[1]]; betas =
formula[[2]]; 

  cat("\nModel:\n", paste(modeldescription, sep = "\n", collapse = "\n"), 
      "\n\n", sep = "") 

  coefs = coef(x); 
  names(coefs)  = c("beta0",betas) 

  if (length(coef(x))){ 
    cat("Coefficients:\n") 
    print.default(format(coefs, digits = digits), print.gap = 2,quote =
FALSE); 
    cat("\n\n"); 
    Rs = summary(x)[c("r.squared", "adj.r.squared")] 
    zz = c(Rs$r.squared,Rs$adj.r.squared); 
    names(zz) = c("r.squared","adj.r.squared") 
    print.default((format(zz,digits=digits) ),print.gap = 2,quote=FALSE) 
  } 
  else cat("No coefficients\n") 
  cat("\n") 
  invisible(x) 
} 

summary.harModel = function(object, correlation = FALSE, symbolic.cor =
FALSE,...){ 
  x=object; 
  dd = summary.lm(x); 
  formula = getHarmodelformula(x); modeldescription = formula[[1]]; betas =
formula[[2]]; 
  dd$call = modeldescription; 
  rownames(dd$coefficients) = c("beta0",betas); 
  return(dd) 
} 

plot.harModel = function(x, which = c(1L:3L, 5L), caption = list("Residuals
vs Fitted", 
                                                                 "Normal
Q-Q", "Scale-Location", "Cook's distance", "Residuals vs Leverage", 

expression("Cook's dist vs Leverage  " * h[ii]/(1 - h[ii]))), 
                         panel = if (add.smooth) panel.smooth else points,
sub.caption = NULL, 
                         main = "", ask = prod(par("mfcol")) < length(which)
&& dev.interactive(), 
                         ..., id.n = 3, labels.id = names(residuals(x)),
cex.id = 0.75, 
                         qqline = TRUE, cook.levels = c(0.5, 1), add.smooth
= getOption("add.smooth"), 
                         label.pos = c(4, 2), cex.caption = 1){ 
  observed = x$model$y; 
  fitted   = x$fitted.values; 
  dates    = x$dates; 
  dates    = as.POSIXct(dates); 
  observed = xts(observed, order.by=dates); 
  fitted   = xts(fitted, order.by=dates); 
  type     = x$type; 

  g_range = range(fitted,observed) 
  g_range[1] = 0.95*g_range[1]; g_range[2]= 1.05 * g_range[2]; 
  #ind = seq(1,length(fitted),length.out=5); 
  title = paste("Observed and forecasted RV based on HAR Model:",type); 
  plot.zoo(observed,col="red",lwd=2,main=title,
ylim=g_range,xlab="Time",ylab="Realized Volatility"); 
  #  axis(1,time(b)[ind], format(time(b)[ind],), las=2, cex.axis=0.8); not
used anymore 
  #  axis(2); 
  lines(fitted,col="blue",lwd=2); 
  legend("topleft", c("Observed RV","Forecasted RV"), cex=1.1,
col=c("red","blue"),lty=1, lwd=2, bty="n"); 
} 

 I have tried changing 

  if( type == "HARRVJ" ){     
    J = J[(maxp:(n-h)),]; 
    *x = cbind(x1,J); *             # bind jumps to RV data 
    if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }       
    x = cbind(x,rmin); 
    model = estimhar(y=y,x=x); 
    model$transform = transform; model$h = h; model$type = "HARRVJ";
model$dates = alldates[(maxp+h):n]; 
    class(model) = c("harModel","lm"); 
    return( model )     
  }#End HAR-RV-J if cond 

to 

if( type == "HARRVJ" ){     
    J = J[(maxp:(n-h)),]; 
   * x = cbind(x1,J+1);*              # bind jumps to RV data 
    if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); }       
    x = cbind(x,rmin); 
    model = estimhar(y=y,x=x); 
    model$transform = transform; model$h = h; model$type = "HARRVJ";
model$dates = alldates[(maxp+h):n]; 
    class(model) = c("harModel","lm"); 
    return( model )     
  }#End HAR-RV-J if cond 

# and this 

 if( type == "HARRVCJ" ){ 
    # Are the jumps significant? if not set to zero: 
    if( jumptest=="ABDJumptest" ){ 
      TQ = apply.daily(data, TQfun); 
      J = J[,1]; 
      teststats    = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); 
    }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) }   
    Jindicators  = teststats > qnorm(1-alpha); 
   * J[!Jindicators] = 0;*

# to 

if( type == "HARRVCJ" ){ 
    # Are the jumps significant? if not set to zero: 
    if( jumptest=="ABDJumptest" ){ 
      TQ = apply.daily(data, TQfun); 
      J = J[,1]; 
      teststats    = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); 
    }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) }   
    Jindicators  = teststats > qnorm(1-alpha); 
    *J[!Jindicators] = 1;*

but it returns to large of scores when the regressions are run. Note I would
change it back to how it is for `transform=NULL` and `transform="sqrt" as
they compute fine for this. 

I don't think the log tests can work without the change though.

--
View this message in context: http://r.789695.n4.nabble.com/Change-log-J-to-log-J-1-to-stop-log-0-from-occurring-in-harModel-tp4637072.html
Sent from the R help mailing list archive at Nabble.com.