[R] C Interface in R

ivo welch ivo.welch at anderson.ucla.edu
Wed Jul 17 20:14:09 CEST 2013


Dear R Users---

I spent some time implementing the AS75 WLS regression algorithm in C
in order to learn the R interface to C.  I did not find an easy, brief
C+R example of an algorithm that used persistent storage across calls.
 they probably exist, but I could not find a nice brief template for
me to start from, for learning purposes.  thus, I thought I would
share my own learning template example, which you can see in
http://R.ivo-welch.info/ .  as I will probably find small bugs, I will
update the file.  for the google cache of the r-help mailing list, I
am also enclosing the example below.  best to be ignored.

/iaw

----
Ivo Welch (ivo.welch at gmail.com)




--- the as75.R file, at least as of july 2013

###  see end of file for documentation

debug <- 0

if (debug) cat(rep("\n",10))  ## visual separation

dyn.load("Ras75.so")   # created with R CMD SHLIB -o Ras75.so Ras75.c

list.of.names <-  c("np.no.nrbar", "d", "rbar", "thetab", "ss.et")

################################################################
as75.new <- function(k) {
    stopifnot(k>1)  ## we need more than 1 variable to be interesting
    nrbar <- (k-1)*k/2
    object <- list( "np.no.nrbar"= c(k,0,nrbar), "d"= rep(0, k),
                   "rbar"= rep(0, nrbar), "thetab"= rep(0, k),
"ss.et"= rep(0, 2) );
    stopifnot( names(object) == list.of.names )
    class(object) <- "as75"
    object
}

################################################################
as75.include <- function(as75obj, weight, y, x ) {
    stopifnot(class(as75obj)=="as75")
    stopifnot( is.numeric(weight) & (length(weight)==1))
    stopifnot( is.numeric(y) & (length(y)==1))
    stopifnot( is.numeric(x) & (length(x)==as75obj[["np"]]))

    retobj <- .C("Ras75include",
                 ## start is persistent structure objects
                 as.integer( as75obj[[ "np.no.nrbar" ]]),
                 as.double( as75obj[[ "d" ]]),
                 as.double( as75obj[[ "rbar" ]]),
                 as.double( as75obj[[ "thetab" ]]),
                 as.double( as75obj[[ "ss.et" ]]),
                 ## rest are inputs
                 as.double(weight),
                 as.double(y),
                 as.double(x)
                 )

    retobj <- retobj[1:length(list.of.names)]  ## we no longer need
the last inputs, and the old
inputs are now obsolete

    names(retobj) <- list.of.names
    class(retobj) <- "as75"
    retobj
}

################################################################
as75.calcresults <- function(as75obj) {
    stopifnot(class(as75obj)=="as75")
    results.coefs <- rep(0, (as75obj[["np.no.nrbar"]])[1] )  ## allocate storage
    results.rsq <- c(-1,-1)
    results.N <- c(0)
    retobj <- .C("Ras75regress",
                 as.integer( as75obj[[ "np.no.nrbar" ]]),
                 as.double( as75obj[[ "d" ]]),
                 as.double( as75obj[[ "rbar" ]]),
                 as.double( as75obj[[ "thetab" ]]),
                 as.double( as75obj[[ "ss.et" ]]),
                 ## this is an output
                 coefs=as.double(results.coefs),
                 rsq=as.double(results.rsq),
                 N=as.integer(results.N)
                 )

    names(retobj) <- c(list.of.names, "coefs", "rsq", "N")
    class(retobj) <- "as75"
    retobj
}


################################################################
### The main program
################################################################

x2 <- c(3,5,31,11)
x3 <- c(-1,2,0,2)
y <- c(2,1,20,15)

as75o <- as75.new(3)   ## create storage
if (debug) { cat("[0] post new\n"); str(as75o) }

for (i in 1:length(y)) {
    as75o <- as75.include(as75o, weight=1,  y=y[i], x=c( 1.0,  x2[i],
x3[i] ))  ## include an obs
    if (debug) { cat("\n[",i,"] include y=", y[i], "on (1", x2[i],
x3[i], ")\n"); str(as75o) }
}

as75o <- as75.calcresults(as75o)  ## this will add a few elements to the list
cat("\n\n[5] Results:  Coefs=", as75o[["coefs"]], "  R^2=",
as75o[["rsq"]],  "  N=", as75o[["N"]]
, "\n" )

cat("\n\n The contents of the structure, initialized in R but
calculated in C, are\n")
str(as75o)




#################################################################################################
###############################
c(title='R AS75 in C',
  author='ivo.welch at gmail.com',
  date='2013',
  description='implements the WLS AS75 algorithm in C for R',
  usage='outside R:
     $ R CMD SHLIB Ras75.c   ## this creates Ras75.so (the shared
library $ R ...
inside R:
     > source("as75.R")
',
  arguments='',
  details='Example of C interface of R with a structure

 AS75 is an implementation of a sequential regression algorithm by WM
   Gentlemen, published in Applied Statistics 1974.  It makes it possible
   to include or remove observations and feed an infinite number of
   observations with R.  For more information, look it up.

 However, the main purpose here is to provides an example
   with multiple structures and use of the C interface in R.
   It may be quite clumsy, because this is my first use of
   this interface.

',
  seealso='',
  examples='',
  test= '',
  changes= '',
  version='0.0 --- july 2013')




------------ and the .C file Ras75.c

#include <R.h>
#include <Rmath.h>
#include <Rinternals.h>

#define iszero(x) (fabs(x)<1e-8)

/****************************************************************
 * R as75 interface.  AS75 is an implementation of a sequential
 *   regression algorithm by WM Gentlemen, published in Applied
 *   Statistics 1974.  It makes it possible to include or remove
 *   observations and feed an infinite number of observations
 *   with R.  For more information, look it up.
 *
 * However, the main purpose here is to provides an example with
 * multiple structures and use of the C interface in R.  It may
 * be quite clumsy, because this is my first use of this
 * interface.
 *
 * Use as follows:
 *
 *    $ R CMD SHLIB Ras75.c
 *      ## this creates Ras75.so (the shared library
 *    $ R
 *    ...
 *    > source("as75.R")
 *
 ****************************************************************/



/****************************************************************
 * Ras75include includes one observation in the regression.  All
 * variables passed into C by R can be modified.  In the R
 * interface to C, there are no return values afaik.
 ****************************************************************/

void Ras75include( int *npnonrbar, double *d, double *rbar, double
*thetab, double *sset,
  const double *w_in, const double *y_in, const double *x_in ) {


  // the input variables will be changed, so we need to copy them first
  const int np= npnonrbar[0]; const int nrbar= npnonrbar[2];

  double w= (*w_in);
  double y= (*y_in);
  double x[ np ];
  for (int i=0; i< np; ++i) x[i]= x_in[i];

  // announce your presence
  const int debug=0;
  if (debug) printf("\n[Ras75.c w=%.4lf y=%.4lf x=%.4lf %.4lf
%.4lf]\n", w, y, 1.0, x[1], x[2]);

  // this is useless.  R does not allow passing in NaN
  // if (isnan(y)) return; for (int i=0; i<(np); ++i) if (isnan(x[i])) return;

  if (iszero(w)) return;

  sset[1] += w * y * y;
  ++(npnonrbar[1]); // this is numobs

  for (int i1 = 0; i1 < np; ++i1) {
    if (iszero(w)) return;  // this is a necessary test.  weight will
be changed in loop
    if (iszero(x[i1])) continue;

    const double xi = x[i1];
    const double di = d[i1];
    const double dpi = di + w * xi * xi;
    const double cbar = di / dpi;
    const double sbar = w * xi / dpi;
    w = cbar * w;
    d[i1] = dpi;

    if (i1 <= np ) {
      int nextr = i1 * ( np  +  np  - (i1+1)) / 2;
      for (int k = (i1+1); k < np; ++k) {
        const double xk = x[k]; // temporary storage
        x[k] = xk - xi * rbar[nextr];
        rbar[nextr] = cbar * rbar[nextr] + sbar * xk;
        ++nextr;
      }
    }
    const double xk = y;
    y = xk - xi * thetab[i1];
    thetab[i1] = cbar * thetab[i1] + sbar * xk;
  }
  sset[0] += w * y * y;
}


/****************************************************************
 * Ras75regress calculates the results, primarily the
 * coefficient vector and the R^2.  In the R interface to C,
 * there are no return values afaik.
 ****************************************************************/

void Ras75regress( const int *npnonrbar, const double *d,
  const double *rbar, const double *thetab, const double *sset,
  double *coef, double *rsq, int *N ) {

  const int np= npnonrbar[0]; const int no= npnonrbar[1];

  for (int j=0; j < (np); ++j) {
    const int npmj= (np) - j;
    coef[npmj-1] = thetab[npmj-1];
    int nextr = (npmj - 1) * ( (np)  +  (np)  - npmj) / 2;
    for (int k = npmj; k < (np) ; ++k) {
      coef[npmj-1] -= rbar[nextr] * coef[k];
      ++nextr;
    }
  }
  rsq[0] = 1.0- sset[0]/sset[1];
  rsq[1] = 1.0- (1-sset[0]/sset[1])*((no)-1)/((no)-(np));
  *N = no;
}



More information about the R-help mailing list