[Rd] Potential clue for Bug 16975 - lme fixed sigma - inconsistent REML estimation

Tue Mar 7 03:13:02 CET 2017

Dear list,

I was trying to create a VarClass for nlme to work with Fay-Herriot
(FH) models. The idea was to create a modification of VarComb that
instead of multiplying the variance functions made their sum (I called
it varSum). After some fails etc... I found that the I was not getting
the expected results because I needed to make sigma fixed. Trying to
find how to make sigma fixed I run into this bug (with uconfirmed
status but listed) report

https://bugs.r-project.org/bugzilla/show_bug.cgi?id=16975

The ideas they propose make unnecessary the use of the variance
function I was thinking of, and I left my idea aside for a couple
days. I recently tried the variance function I mentioned before and I
got estimates that were consistent with those provided by the packages
sae and metafor and with the s-plus version of nlme.

The way of fitting a FH model proposed by Maciej in the bug report is
different to the way I fitted the model using the additive variance
function varSum. Both formulations lead to a similar distribution of
the response. However, Maciej's formulation adds the unknown variance
component as a random effect and the formulation with the additive
varClass treat this variance as a variance of an error component.

Results using varSum are the same as those provided by packages sae
and metafor, while the alternative proposed by Maciej does not fit
with those two packages, which is the subject of the bug mentioned
above. Considering this and the fact that the bug is active I thought
that this example (below) could be helpful and provide some clues to
figure out what is the problem with the bug.

I'm not sure about what way would be better to fit models like the FH
model. I find Maciej's solution more flexible for several things than
the route I was taking, but the reported bug made me loose some
confidence. I believe that information about the bug 16975 can be very
interesting. I hope the example below can help or provide some clues.

Thanks

Paco Mauro

# TODO: Add comment
#
# Author: Paco
###############################################################################
sessionInfo()
#R version 3.3.2 (2016-10-31)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 10 x64 (build 14393)
#
#locale:
#  [1] LC_COLLATE=English_United States.1252
#[2] LC_CTYPE=English_United States.1252
#[3] LC_MONETARY=English_United States.1252
#[4] LC_NUMERIC=C
#[5] LC_TIME=English_United States.1252
#
#attached base packages:
#  [1] stats     graphics  grDevices utils     datasets  methods   base
#
#other attached packages:
#  [1] sae_1.1      MASS_7.3-45  nlme_3.1-131 rj_2.0.5-2
#
#loaded via a namespace (and not attached):
#  [1] tools_3.3.2     grid_3.3.2      lattice_0.20-34

#Package: nlme
#Version: 3.1-131
library(nlme)
library(sae)
####* varSum, a modification of varComb to make the combination
additive instead of multiplicative
varSum <-
  ## constructor for the varSum class
  function(...)
{
 val <- list(...)
 if (!all(unlist(lapply(val, inherits, "varFunc")))) {
  stop("all arguments to 'varSum' must be of class \"varFunc\".")
 }
 if (is.null(names(val))) {
  names(val) <- LETTERS[seq_along(val)]
 }
 class(val) <- c("varSum","varComb", "varFunc")
 val
}
varWeights.varSum <-
  function(object)
{
 apply(as.data.frame(lapply(object, varWeights)), 1, function(x){

    1/sqrt(sum((1/x)^2))

   })
}
Initialize.varSum <-
  function(object, data, ...)
{
 val <- lapply(object, Initialize, data)
 attr(val, "plen") <- unlist(lapply(val, function(el) length(coef(el))))
 class(val) <- c("varSum","varComb", "varFunc")
 val
}
logLik.varSum <-
  function(object, ...)
{
 lls <- lapply(object, logLik)

 lls2 <- apply(as.data.frame(lapply(object, varWeights)), 1, function(x){

    1/sqrt(sum((1/x)^2))

   })

 val <- sum(log(lls2))
 attr(val, "df") <- sum(unlist(lapply(lls, attr, "df")))
 class(val) <- "logLik"
 val
}

####* The methods from here to the example are just copies of the
varComb methods with different names
coef.varSum <-
  function(object, unconstrained = TRUE, allCoef = FALSE, ...)
{
 unlist(lapply(object, coef, unconstrained, allCoef))
}

"coef<-.varSum" <-
  function(object, ..., value)
{
 plen <- attr(object, "plen")
 if ((len <- sum(plen)) > 0) {  # varying parameters
  if (length(value) != len) {
   stop("cannot change parameter length of initialized \"varSum\" object")
  }
  start <- 0
  for (i in seq_along(object)) {
   if (plen[i] > 0) {
    coef(object[[i]]) <- value[start + (1:plen[i])]
    start <- start + plen[i]
   }
  }
 }
 object
}
formula.varSum <-
  function(x, ...) lapply(x, formula)
needUpdate.varSum <-
  function(object) any(unlist(lapply(object, needUpdate)))

print.varSum <-
  function(x, ...)
{
 cat("Sum of:\n")
 lapply(x, print)
 invisible(x)
}

print.summary.varSum <-
  function(x, ...)
{
 cat(attr(x, "structName"),"\n")
 lapply(x, print, FALSE)
 invisible(x)
}

summary.varSum <-
  function(object, structName = "Sum of variance functions:", ...)
{
 object[] <- lapply(object, summary)
 attr(object, "structName") <- structName
 class(object) <- c("summary.varSum", class(object))
 object
}

update.varSum <-
  function(object, data, ...)
{
 object[] <- lapply(object, update, data)
 object
}

###############################################################################
# Example bug report 16975
###############################################################################
data(milk)
milk$var<-milk$SD^2
#Fay-Herriot model using sae library
attach(milk)
resultREML <- eblupFH(yi ~ as.factor(MajorArea), SD^2)
resultREML
detach(milk)
# $fit
# $fit$method
# [1] "REML"
#
# $fit$convergence
# [1] TRUE
#
# $fit$iterations
# [1] 4
#
# $fit$estcoef
# beta  std.error    tvalue       pvalue
# (Intercept)            0.9681890 0.06936208 13.958476 2.793443e-44
# as.factor(MajorArea)2  0.1327801 0.10300072  1.289119 1.973569e-01
# as.factor(MajorArea)3  0.2269462 0.09232981  2.457995 1.397151e-02
# as.factor(MajorArea)4 -0.2413011 0.08161707 -2.956503 3.111496e-03
#
# $fit$refvar
# [1] 0.01855022
#
# $fit$goodness
# loglike        AIC        BIC        KIC       AICc      AICb1      AICb2
# 12.677478 -15.354956  -6.548956 -10.354956         NA         NA         NA
# KICc      KICb1      KICb2 nBootstrap
# NA         NA         NA   0.000000

#Bug 16975 report fitting of FH model proposed by Maciej Beresewicz
FH<-lme(yi ~ as.factor(MajorArea),random=~1|as.factor(SmallArea),
  data=milk,weights=varFixed(~var),
  control=lmeControl(sigma = 1,tolerance = 1e-4))
FH
# Linear mixed-effects model fit by REML
# Data: milk
# Log-restricted-likelihood: 10.34588
# Fixed: yi ~ as.factor(MajorArea)
# (Intercept) as.factor(MajorArea)2 as.factor(MajorArea)3
# 0.9680768             0.1316132             0.2269008
# as.factor(MajorArea)4
# -0.2415905
#
# Random effects:
#   Formula: ~1 | as.factor(SmallArea)
# (Intercept) Residual
# StdDev:   0.1332918        1
#
# Variance function:
#   Structure: fixed weights
# Formula: ~var
# Number of Observations: 43
# Number of Groups: 43
#Fay-Herriot model fitted using the variance function varSum defined above
 #A columns of zeros is added to tweak the varConstPower component
milk$zero<-0
FH2<-gls(yi ~ as.factor(MajorArea),data=milk,
  weights=varSum(varConstPower(1,1,~zero,fixed=list(power=1)),varFixed(~var)),
  control=lmeControl(sigma = 1))
FH2
# Generalized least squares fit by REML
# Model: yi ~ as.factor(MajorArea)
# Data: milk
# Log-restricted-likelihood: 5.165619
#
# Coefficients:
#   (Intercept) as.factor(MajorArea)2 as.factor(MajorArea)3
# 0.9681890             0.1327803             0.2269462
# as.factor(MajorArea)4
# -0.2413010
#
# Sum of variance functions:
#   Structure: Constant plus power of variance covariate
# Formula: ~zero
# Parameter estimates:
#   const     power
# 0.1361995 1.0000000
# Variance function:
#   Structure: fixed weights
# Formula: ~var
# Degrees of freedom: 43 total; 39 residual
# Residual standard error: 1