[Rd] Moderating consequences of garbage collection when in C

Martin Morgan mtmorgan at fhcrc.org
Wed Oct 5 05:54:19 CEST 2011


Allocating many small objects triggers numerous garbage collections as R 
grows its memory, seriously degrading performance. The specific use case 
is in creating a STRSXP of several 1,000,000's of elements of 60-100 
characters each; a simplified illustration understating the effects 
(because there is initially little to garbage collect, in contrast to an 
R session with several packages loaded) is below.

A simple solution is to provide a mechanism for the C programmer to 
request sufficient memory in advance. R_gc_needed might also be re-used 
at two other locations in memory.c (2221 and 2361) and could be exposed 
at the R level via a new argument, with default 0, to gc().

%> time R --vanilla -e "dyn.load('gc.so');  x = .Call('doit', 1000000, 
FALSE)"
 > dyn.load('gc.so');  x = .Call('doit', 1000000, FALSE)

real    0m9.865s
user    0m9.697s
sys     0m0.146s

%> time R --vanilla -e "dyn.load('gc.so');  x = .Call('doit', 1000000, 
TRUE)"
 > dyn.load('gc.so');  x = .Call('doit', 1000000, TRUE)

real    0m6.952s
user    0m6.802s
sys     0m0.132s

This is the test code

#include <stdio.h>
#include "Rdefines.h"

SEXP doit(SEXP len, SEXP needed)
{
     int i, n = asInteger(len);
     char **s = Calloc(n, char *);
     SEXP ans;

     for (i = 0; i < n; ++i) {
         s[i] = Calloc(80, char);
         sprintf(s[i], "%78d", i);
     }

     if (asLogical(needed))
         R_gc_needed(80 * n);

     PROTECT(ans = allocVector(STRSXP, n));
     for (i = 0; i < n; ++i)
         SET_STRING_ELT(ans, i, mkChar(s[i]));
     UNPROTECT(1);

     return ans;
}

and a patch

Index: src/include/R_ext/Memory.h
===================================================================
--- src/include/R_ext/Memory.h  (revision 57169)
+++ src/include/R_ext/Memory.h  (working copy)
@@ -36,6 +36,7 @@
  void   vmaxset(const void *);

  void   R_gc(void);
+void   R_gc_needed(size_t);

  char*  R_alloc(size_t, int);
  char*  S_alloc(long, int);
Index: src/main/memory.c
===================================================================
--- src/main/memory.c   (revision 57169)
+++ src/main/memory.c   (working copy)
@@ -2503,6 +2503,17 @@
      R_gc_internal(0);
  }

+void R_gc_needed(R_size_t size_needed)
+{
+    if (FORCE_GC || NO_FREE_NODES() || VHEAP_FREE() < size_needed) {
+       R_gc_internal(size_needed);
+       if (NO_FREE_NODES())
+           mem_err_cons();
+       if (VHEAP_FREE() < size_needed)
+           mem_err_heap(0);
+    }
+}
+
  static void R_gc_full(R_size_t size_needed)
  {
      num_old_gens_to_collect = NUM_OLD_GENERATIONS;


-- 
Computational Biology
Fred Hutchinson Cancer Research Center
1100 Fairview Ave. N. PO Box 19024 Seattle, WA 98109

Location: M1-B861
Telephone: 206 667-2793



More information about the R-devel mailing list