[R] turning R expressions into functions?

Mon Jul 2 16:07:56 CEST 2012

Dear Dirk,

On Sat, Jun 30, 2012 at 01:28:13PM -0500, Dirk Eddelbuettel wrote:
> And also look at the existing benchmark packages 'rbenchmark' and
> 'microbenchmark':

Many thanks for pointing out these packages, I wasn't aware of these.

>    R> library(microbenchmark)
>    R> x <- 5; microbenchmark( 1/x, x^-1 )
>    Unit: nanoseconds
>      expr min    lq median    uq  max
>    1  1/x 296 322.5    341 364.0 6298
>    2 x^-1 516 548.5    570 591.5 5422

My own code (current version attached, comments would be very welcome)
is much more "chatty":

>    R> source("timeit.R")
>    R> x <- 5; TimeIt(1/x, x^-1)
>    tuning ...
>    measuring 10*1466753 samples for each expression ...
>      |======================================================================| 100%
>    
>    execution time comparison:
>    1/x    (0.000571 ± 1.48e-05) ms/call
>    x^-1   (0.000864 ± 9.69e-06) ms/call
>    CI for difference: [-0.00031, -0.000275] ms/call
>    
>    '1/x' is about 33.9% faster (p=2.75e-11)

One of the things I would love to add to my package would be the
ability to compare more than two expressions in one call.  But
unfortunately, I haven't found out so far whether (and if so, how) it
is possible to extract the elements of a "..." object without
evaluating them.

Many thanks,
Jochen
-- 
http://seehuhn.de/
-------------- next part --------------
# timeit.R - pairwise comparison for the execution time of R expressions
#
# Copyright (c) 2012  Jochen Voss  <voss at seehuhn.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# ----------------------------------------------------------------------
#
# This file provides the R command 'TimeIt' to compare the execution
# time of two R expressions.

FuncIt <- function(k, expr) {
  # Return a function which executes an expression k times.
  #
  # Args:
  #   k: The number of times 'expr' is executed.
  #   expr: An R expression.
  #
  # Returns:
  #   An R function, executing 'expr' in a loop.
  k <- as.numeric(k)
  expr <- eval.parent(substitute(expr))
  fn <- eval(substitute(function() { for (funcit.i in 1:k) { expr } }))
  return(fn)
}

TuneIt <- function(expr, max.seconds=1) {
  # Determine the approximate cost of calling an R expression in a
  # loop.  This function tries loops of different length and the uses
  # linear interpolation to get the result.
  #
  # Args:
  #   expr: The R expression to test.
  #   max.seconds: How much time (approximately) to use for
  #                measurements, in seconds.  This should be much
  #                larger than the resolution of 'system.time'.
  #                Default is 1.
  #
  # Returns:
  #   A vector 'x' of length 2, such that the execution time for 'k'
  #   iterations is approximately 'x[0] + k * x[1]'.
  kk <- c()
  tt <- c()
  k <- 1
  repeat {
    f <- FuncIt(k, expr)
    t <- system.time(f())[1]
    kk <- c(kk, k)
    tt <- c(tt, t)
    if (t > max.seconds / 3) break
    k <- 2 * k
  }
  if (k > 1) {
    fit <- lm(tt ~ kk)
    return(coefficients(fit))
  } else {
    return(c(0, tt))
  }
  return(k)
}

TimeIt <- function(ex1, ex2, total.time=30, verbose=T) {
  # Compare the execution time of two R expressions.
  #
  # Args:
  #   ex1: The first R expression to evaluate
  #   ex2: The second R expression to evaluate
  #   total.time: How much time (approximately) to spend on
  #     measuring, in seconds.  Longer times lead to more
  #     accurate measurements and allow to detect smaller
  #     differences in run time.  Default is 30 seconds.
  #
  # Returns:
  #   An object of class 'TimeIt', summarising the difference in
  #   execution time of the two expressions.
  start <- proc.time()[1]

  ex1 <- substitute(ex1)
  ex2 <- substitute(ex2)

  if (verbose) {
    cat("tuning ...\n")
  }
  # Use at most 20% or 10 seconds (whatever is smaller) of our time
  # budget for tuning.
  tune.time <- min(.2*total.time, 10)
  c1 <- TuneIt(ex1, tune.time / 2)
  c2 <- TuneIt(ex2, tune.time / 2)
  mid <- proc.time()[1] - start
  total.time <- total.time - mid

  block.min <- 1
  block.target <- total.time^(1/4) * block.min^(3/4)
  c <- c1 + c2
  block.k <- max(round((block.target - c[1]) / c[2]), 1)
  f1 <- FuncIt(block.k, ex1)
  f2 <- FuncIt(block.k, ex2)
  ex1.time <- c1[1] + block.k * c1[2]
  ex2.time <- c2[1] + block.k * c2[2]
  pair.time <- ex1.time + ex2.time
  n <- max(round(total.time / pair.time), 2)

  if (verbose) {
    cat("measuring ", n, "*", block.k,
        " samples for each expression ...\n", sep="")
    flush.console()
    progress <- txtProgressBar(min=0, max=mid+n*pair.time, style=3)
  }
  X1 <- c()
  X2 <- c()
  for (i in 1:n) {
    if (verbose) {
      setTxtProgressBar(progress, mid + (i-1)*pair.time)
    }
    X1 <- c(X1, system.time(f1())[1])
    if (verbose) {
      setTxtProgressBar(progress, mid + (i-1)*pair.time + ex1.time)
    }
    X2 <- c(X2, system.time(f2())[1])
  }
  X1 <- X1 / block.k
  X2 <- X2 / block.k
  if (verbose) {
    setTxtProgressBar(progress, mid + n*pair.time)
    close(progress)
    cat("\n")
  }

  name1 <- deparse(ex1)
  name2 <- deparse(ex2)
  l <- max(nchar(name1), nchar(name2))
  pad1 <- rep(" ", l - nchar(name1))
  pad2 <- rep(" ", l - nchar(name2))

  test <- t.test(X1, X2, paired=T)

  res <- list(name=c(name1,name2),
              name.padded=c(paste(name1, pad1, sep=" "),
                            paste(name2, pad2, sep=" ")),
              mean=c(mean(X1),mean(X2)),
              sd=c(sd(X1),sd(X2)),
              ci=test$conf.int,
              p.value=test$p.value)
  class(res) <- "TimeIt"
  return(res)
}

print.TimeIt <- function(x, ...) {
  if (min(x$mean[1], x$mean[2]) >= 0.1) {
    q = 1
    unit = "seconds/call"
  } else {
    q = 1000
    unit = "ms/call"
  }

  old <- options(digits=3)
  cat("execution time comparison:\n")
  cat(x$name.padded[1], "  (", x$mean[1]*q, " ± ", x$sd[1]*q, ") ",
      unit, "\n", sep="")
  cat(x$name.padded[2], "  (", x$mean[2]*q, " ± ", x$sd[2]*q, ") ",
      unit, "\n", sep="")

  cat("CI for difference: [", x$ci[1]*q, ", ", x$ci[2]*q, "] ",
      unit, "\n", sep="")
  cat("\n")
  if (x$ci[1] > 0) {
    cat("'", x$name[2], "' is about ", 100*(x$mean[1]-x$mean[2])/x$mean[1],
        "% faster (p=", x$p.value, ")\n", sep="")
  } else if (x$ci[2] < 0) {
    cat("'", x$name[1], "' is about ", 100*(x$mean[2]-x$mean[1])/x$mean[2],
        "% faster (p=", x$p.value, ")\n", sep="")
  } else {
    cat("difference is less than ",
        50 * (x$ci[2]-x$ci[1]) / mean(x$mean[1], x$mean[2]),
        "% (p=", x$p.value, ")\n", sep="")
  }
  cat("\n")
  options(old)
}