[Rd] Updated rawConnection() patch

dhinds@sonic.net dhinds at sonic.net
Sun Sep 18 22:18:39 CEST 2005


Here's an update of my rawConnection() implementation.  In addition to
providing a raw version of textConnection(), this fixes two existing
issues with textConnection(): one is that the current textConnection()
implementation carries around unprotected SEXP pointers, the other is
a performance problem due to prolific copying of the output buffer as
output is accumulated line by line.

This new version uses a separate buffer for connection output, which
is extended in larger chunks, so that resize operations are less
frequent.  And the buffer is hidden behind an active binding, so that
the user can't corrupt it.

My original need for this is largely addressed by Brian Ripley's
recent extension of readBin/writeBin to operate on raw vectors as well
as connections, in the latest development tree.  But I think having a
raw version of textConnection is still a bit more orthogonal and
flexible, and requires very little code.

-- Dave


--- ./src/include/Internal.h.orig	2005-08-29 17:47:27.000000000 -0700
+++ ./src/include/Internal.h	2005-09-18 00:32:08.196336200 -0700
@@ -525,6 +525,7 @@
 SEXP do_pushbacklength(SEXP, SEXP, SEXP, SEXP);
 SEXP do_clearpushback(SEXP, SEXP, SEXP, SEXP);
 SEXP do_textconnection(SEXP, SEXP, SEXP, SEXP);
+SEXP do_graboutput(SEXP, SEXP, SEXP, SEXP);
 SEXP do_getallconnections(SEXP, SEXP, SEXP, SEXP);
 SEXP do_sumconnection(SEXP, SEXP, SEXP, SEXP);
 SEXP do_download(SEXP, SEXP, SEXP, SEXP);
--- ./src/include/Rconnections.h.orig	2005-08-03 08:50:36.000000000 -0700
+++ ./src/include/Rconnections.h	2005-09-17 23:56:01.875475000 -0700
@@ -94,8 +94,7 @@
 
 typedef struct outtextconn {
     int len;  /* number of lines */
-    SEXP namesymbol;
-    SEXP data;
+    SEXP namesymbol, data, venv;
     char *lastline;
     int lastlinelength; /* buffer size */
 } *Routtextconn;
--- ./src/library/base/man/rawconnections.Rd.orig	2005-09-18 11:37:18.004405000 -0700
+++ ./src/library/base/man/rawconnections.Rd	2005-09-18 11:37:00.535655300 -0700
@@ -0,0 +1,71 @@
+\name{rawConnection}
+\alias{rawConnection}
+\title{Raw Connections}
+\description{
+  Input and output raw connections.
+}
+\usage{
+rawConnection(object, open = "r", local = FALSE)
+}
+\arguments{
+  \item{object}{raw or character.  A description of the connection. 
+    For an input this is an \R raw vector object, and for an output
+    connection the name for the \R raw vector to receive the
+    output.
+  }
+  \item{open}{character.  Either \code{"rb"} (or equivalently \code{""})
+    for an input connection or \code{"wb"} or \code{"ab"} for an output
+    connection.}
+  \item{local}{logical.  Used only for output connections.  If \code{TRUE},
+    output is assigned to a variable in the calling environment.  Otherwise
+    the global environment is used.}
+}
+\details{
+  An input raw connection is opened and the raw vector is copied
+  at time the connection object is created, and \code{close}
+  destroys the copy.
+
+  An output raw connection is opened and creates an \R raw vector of
+  the given name in the user's workspace or in the calling
+  environment, depending on the value of the \code{local} argument.
+  This object will at all times hold the accumulated output to the
+  connection.
+
+  Opening a raw connection with \code{mode = "ab"} will attempt to
+  append to an existing raw vector with the given name in the user's
+  workspace or the calling environment.  If none is found (even if an
+  object exists of the right name but the wrong type) a new raw vector
+  wil be created, with a warning.
+
+  You cannot \code{seek} on a raw connection, and \code{seek} will
+  always return zero as the position.
+}
+
+\value{
+  A binary-mode connection object of class \code{"rawConnection"}
+  which inherits from class \code{"connection"}.
+}
+
+\seealso{
+  \code{\link{connections}}, \code{\link{showConnections}},
+  \code{\link{readBin}}, \code{\link{writeBin}},
+  \code{\link{textConnection}}.
+}
+
+\examples{
+zz <- rawConnection("foo", "wb")
+writeBin(1:2, zz)
+writeBin(1:8, zz, size=1)
+writeBin(pi, zz, size=4)
+close(zz)
+foo
+
+zz <- rawConnection(foo)
+readBin(zz, "integer", n=2)
+sprintf("\%04x", readBin(zz, "integer", n=2, size=2))
+sprintf("\%08x", readBin(zz, "integer", endian="swap"))
+readBin(zz, "numeric", n=1, size=4)
+close(zz)
+}
+\keyword{file}
+\keyword{connection}
--- ./src/library/base/man/textconnections.Rd.orig	2005-09-03 13:55:48.274305900 -0700
+++ ./src/library/base/man/textconnections.Rd	2005-09-18 11:37:03.457530300 -0700
@@ -45,16 +45,11 @@
 }
 
 \value{
-  A connection object of class \code{"textConnection"} which inherits
-  from class \code{"connection"}.
+  A text-mode connection object of class \code{"textConnection"} which
+  inherits from class \code{"connection"}.
 }
 
 \note{
-  As output text connections keep the character vector up to date
-  line-by-line, they are relatively expensive to use, and it is often
-  better to use an anonymous \code{\link{file}()} connection to collect
-  output.
-
   On platforms where \code{vsnprintf} does not return the needed length
   of output (e.g., Windows) there is a 100,000 character limit on the
   length of line for output connections: longer lines will be truncated
@@ -69,7 +64,8 @@
 
 \seealso{
   \code{\link{connections}}, \code{\link{showConnections}},
-  \code{\link{pushBack}}, \code{\link{capture.output}}.
+  \code{\link{pushBack}}, \code{\link{capture.output}},
+  \code{\link{rawConnection}}.
 }
 
 \examples{
--- ./src/library/base/R/connections.R.orig	2005-09-18 12:03:25.854437000 -0700
+++ ./src/library/base/R/connections.R	2005-09-18 11:18:50.479582300 -0700
@@ -84,9 +84,32 @@
     .Internal(socketConnection(host, port, server, blocking, open, encoding))
 
 textConnection <- function(object, open = "r", local = FALSE) {
+    if (!(open %in% c("","r","a","w")))
+        stop('unsupported mode')
     if (local) env <- parent.frame()
     else env <- .GlobalEnv
-    .Internal(textConnection(deparse(substitute(object)), object, open, env))
+    con <- .Internal(textConnection(deparse(substitute(object)),
+                                    object, open, env))
+    if (open %in% c("a", "w")) {
+        suppressWarnings(rm(list=object, envir=env))
+        makeActiveBinding(object, function(v) .Internal(grabOutput(con)), env)
+    }
+    con
+}
+
+rawConnection <- function(object, open = "rb", local = FALSE) {
+    if (open == "") open <- "rb"
+    if (!(open %in% c("rb","ab","wb")))
+        stop("unsupported mode")
+    if (local) env <- parent.frame()
+    else env <- .GlobalEnv
+    con <- .Internal(textConnection(deparse(substitute(object)),
+                                    object, open, env))
+    if (open %in% c("ab", "wb")) {
+        suppressWarnings(rm(list=object, envir=env))
+        makeActiveBinding(object, function(v) .Internal(grabOutput(con)), env)
+    }
+    con
 }
 
 seek <- function(con, ...)
--- ./src/main/connections.c.orig	2005-08-29 17:47:35.000000000 -0700
+++ ./src/main/connections.c	2005-09-18 11:54:49.752647400 -0700
@@ -59,7 +59,6 @@
 #define NSINKS 21
 
 static Rconnection Connections[NCONNECTIONS];
-static SEXP OutTextData;
 
 static int R_SinkNumber;
 static int SinkCons[NSINKS], SinkConsClose[NSINKS], R_SinkSplit[NSINKS];
@@ -76,16 +75,6 @@
     return i;
 }
 
-static int ConnIndex(Rconnection con)
-{
-    int i;
-    for(i = 0; i < NCONNECTIONS; i++)
-	if(Connections[i] == con) break;
-    if(i >= NCONNECTIONS)
-	error(_("connection not found"));
-    return i;
-}
-
 /* internal, not the same as R function getConnection */
 Rconnection getConnection(int n)
 {
@@ -1678,7 +1667,7 @@
     return ans;
 }
 
-/* ------------------- text connections --------------------- */
+/* ------------------- text and raw connections --------------------- */
 
 /* read a R character vector into a buffer */
 static void text_init(Rconnection con, SEXP text)
@@ -1702,6 +1691,22 @@
     this->cur = this->save = 0;
 }
 
+/* read a R raw vector into a buffer */
+static void raw_init(Rconnection con, SEXP raw)
+{
+    int nbytes = length(raw);
+    Rtextconn this = (Rtextconn)con->private;
+
+    this->data = (char *) malloc(nbytes);
+    if(!this->data) {
+	free(this); free(con->description); free(con->class); free(con);
+	error(_("cannot allocate memory for raw connection"));
+    }
+    memcpy(this->data, RAW(raw), nbytes);
+    this->nchars = nbytes;
+    this->cur = this->save = 0;
+}
+
 static Rboolean text_open(Rconnection con)
 {
     con->save = -1000;
@@ -1736,57 +1741,79 @@
 
 static double text_seek(Rconnection con, double where, int origin, int rw)
 {
-    if(where >= 0) error(_("seek is not relevant for text connection"));
+    if(where >= 0) error(_("seek is not relevant for this connection"));
     return 0; /* if just asking, always at the beginning */
 }
 
-static Rconnection newtext(char *description, SEXP text)
+static size_t raw_read(void *ptr, size_t size, size_t nitems,
+		       Rconnection con)
+{
+    Rtextconn this = (Rtextconn)con->private;
+    if (this->cur + size*nitems > this->nchars)
+	nitems = (this->nchars - this->cur)/size;
+    memcpy(ptr, this->data+this->cur, size*nitems);
+    this->cur += size*nitems;
+    return nitems;
+}
+
+static Rconnection newtext(char *description, SEXP data)
 {
     Rconnection new;
+    int isText = isString(data);
     new = (Rconnection) malloc(sizeof(struct Rconn));
-    if(!new) error(_("allocation of text connection failed"));
-    new->class = (char *) malloc(strlen("textConnection") + 1);
-    if(!new->class) {
-	free(new);
-	error(_("allocation of text connection failed"));
-    }
-    strcpy(new->class, "textConnection");
+    if(!new) goto f1;
+    new->class = (char *) malloc(strlen("xxxxConnection") + 1);
+    if(!new->class) goto f2;
+    sprintf(new->class, "%sConnection", isText ? "text" : "raw");
     new->description = (char *) malloc(strlen(description) + 1);
-    if(!new->description) {
-	free(new->class); free(new);
-	error(_("allocation of text connection failed"));
-    }
+    if(!new->description) goto f3;
     init_con(new, description, "r");
     new->isopen = TRUE;
     new->canwrite = FALSE;
     new->open = &text_open;
     new->close = &text_close;
     new->destroy = &text_destroy;
-    new->fgetc = &text_fgetc;
     new->seek = &text_seek;
     new->private = (void*) malloc(sizeof(struct textconn));
-    if(!new->private) {
-	free(new->description); free(new->class); free(new);
-	error(_("allocation of text connection failed"));
+    if(!new->private) goto f4;
+    new->text = isText;
+    if (new->text) {
+	new->fgetc = &text_fgetc;
+	text_init(new, data);
+    } else {
+	new->read = &raw_read;
+	raw_init(new, data);
     }
-    text_init(new, text);
     return new;
+
+f4: free(new->description);
+f3: free(new->class);
+f2: free(new);
+f1: error(_("allocation of %s connection failed"),
+	  isText ? "text" : "raw");
 }
 
 static void outtext_close(Rconnection con)
 {
     Routtextconn this = (Routtextconn)con->private;
-    SEXP tmp;
-    int idx = ConnIndex(con);
+    SEXP tmp, rm;
 
     if(strlen(this->lastline) > 0) {
-	PROTECT(tmp = lengthgets(this->data, ++this->len));
-	SET_STRING_ELT(tmp, this->len - 1, mkChar(this->lastline));
-	defineVar(this->namesymbol, tmp, VECTOR_ELT(OutTextData, idx));
-	this->data = tmp;
+	PROTECT(tmp = lengthgets(this->data, this->len+1));
+	SET_STRING_ELT(tmp, this->len, mkChar(this->lastline));
+    } else {
+	PROTECT(tmp = lengthgets(this->data, this->len));
+    }
+    /* remove current binding, then install result */
+    if (R_BindingIsActive(this->namesymbol, this->venv)) {
+	PROTECT(rm = lang2(install("rm"), this->namesymbol));
+	eval(rm, this->venv);
 	UNPROTECT(1);
     }
-    SET_VECTOR_ELT(OutTextData, idx, R_NilValue);
+    defineVar(this->namesymbol, tmp, this->venv);
+    UNPROTECT(1);
+    R_ReleaseObject(this->venv);
+    R_ReleaseObject(this->data);
 }
 
 static void outtext_destroy(Rconnection con)
@@ -1795,6 +1822,17 @@
     free(this->lastline); free(this);
 }
 
+static void outtext_grow(Routtextconn this, int need)
+{
+    SEXP tmp = this->data;
+    int len = length(tmp);
+    if (this->len + need > len) {
+	this->data = lengthgets(tmp, len + need + (len>>3) + 16);
+	R_PreserveObject(this->data);
+	R_ReleaseObject(tmp);
+    }
+}
+
 #define LAST_LINE_LEN 256
 
 static int text_vfprintf(Rconnection con, const char *format, va_list ap)
@@ -1803,7 +1841,6 @@
     char buf[BUFSIZE], *b = buf, *p, *q, *vmax = vmaxget();
     int res = 0, usedRalloc = FALSE, buffree,
 	already = strlen(this->lastline);
-    SEXP tmp;
 
     if(already >= BUFSIZE) {
 	/* This will fail so just call vsnprintf to get the length of
@@ -1841,13 +1878,9 @@
     for(p = b; ; p = q+1) {
 	q = Rf_strchr(p, '\n');
 	if(q) {
-	    int idx = ConnIndex(con);
 	    *q = '\0';
-	    PROTECT(tmp = lengthgets(this->data, ++this->len));
-	    SET_STRING_ELT(tmp, this->len - 1, mkChar(p));
-	    defineVar(this->namesymbol, tmp, VECTOR_ELT(OutTextData, idx));
-	    this->data = tmp;
-	    UNPROTECT(1);
+	    outtext_grow(this, 1);
+	    SET_STRING_ELT(this->data, this->len++, mkChar(p));
 	} else {
 	    /* retain the last line */
 	    if(strlen(p) >= this->lastlinelength) {
@@ -1864,75 +1897,94 @@
     return res;
 }
 
-static void outtext_init(Rconnection con, char *mode, int idx)
+static size_t raw_write(const void *ptr, size_t size, size_t nitems,
+			Rconnection con)
+{
+    Routtextconn this = (Routtextconn)con->private;
+    outtext_grow(this, size*nitems);
+    memcpy(RAW(this->data)+this->len, ptr, size*nitems);
+    this->len += size*nitems;
+    return nitems;
+}
+
+static void outtext_init(Rconnection con, SEXP venv, char *mode)
 {
     Routtextconn this = (Routtextconn)con->private;
     SEXP val;
+    int st = (con->text ? STRSXP : RAWSXP);
 
     this->namesymbol = install(con->description);
-    if(strcmp(mode, "w") == 0) {
+    R_PreserveObject(this->venv = venv);
+    if(strncmp(mode, "w", 1) == 0) {
 	/* create variable pointed to by con->description */
-	PROTECT(val = allocVector(STRSXP, 0));
-	defineVar(this->namesymbol, val, VECTOR_ELT(OutTextData, idx));
-	UNPROTECT(1);
+	val = allocVector(st, 0);
     } else {
 	/* take over existing variable */
-	val = findVar1(this->namesymbol, VECTOR_ELT(OutTextData, idx),
-		       STRSXP, FALSE);
+	val = findVar1(this->namesymbol, venv, st, FALSE);
 	if(val == R_UnboundValue) {
-	    warning(_("text connection: appending to a non-existent char vector"));
-	    PROTECT(val = allocVector(STRSXP, 0));
-	    defineVar(this->namesymbol, val, VECTOR_ELT(OutTextData, idx));
-	    UNPROTECT(1);
+	    warning(_("%s connection: appending to a non-existent vector"),
+		    con->text ? "text" : "raw");
+	    val = allocVector(st, 0);
 	}
     }
-    this->len = LENGTH(val);
-    this->data = val;
+    R_PreserveObject(this->data = val);
+    this->len = length(val);
     this->lastline[0] = '\0';
     this->lastlinelength = LAST_LINE_LEN;
 }
 
 
-static Rconnection newouttext(char *description, SEXP sfile, char *mode,
-			      int idx)
+static Rconnection newouttext(char *description, SEXP venv,
+			      SEXP sfile, char *mode)
 {
+    int isText = (mode[1] != 'b');
     Rconnection new;
     void *tmp;
 
     new = (Rconnection) malloc(sizeof(struct Rconn));
-    if(!new) error(_("allocation of text connection failed"));
-    new->class = (char *) malloc(strlen("textConnection") + 1);
-    if(!new->class) {
-	free(new);
-	error(_("allocation of text connection failed"));
-    }
-    strcpy(new->class, "textConnection");
+    if(!new) goto f1;
+    new->class = (char *) malloc(strlen("xxxxConnection") + 1);
+    if(!new->class) goto f2;
+    sprintf(new->class, "%sConnection", isText ? "text" : "raw");
     new->description = (char *) malloc(strlen(description) + 1);
-    if(!new->description) {
-	free(new->class); free(new);
-	error(_("allocation of text connection failed"));
-    }
+    if(!new->description) goto f3;
     init_con(new, description, mode);
+    new->text = isText;
     new->isopen = TRUE;
     new->canread = FALSE;
     new->open = &text_open;
     new->close = &outtext_close;
     new->destroy = &outtext_destroy;
-    new->vfprintf = &text_vfprintf;
     new->seek = &text_seek;
     new->private = (void*) malloc(sizeof(struct outtextconn));
-    if(!new->private) {
-	free(new->description); free(new->class); free(new);
-	error(_("allocation of text connection failed"));
-    }
+    if(!new->private) goto f4;
     ((Routtextconn)new->private)->lastline = tmp = malloc(LAST_LINE_LEN);
-    if(!tmp) {
-	free(new->private);
-	free(new->description); free(new->class); free(new);
-	error(_("allocation of text connection failed"));
+    if(!tmp) goto f5;
+    if (isText) {
+	new->vfprintf = &text_vfprintf;
+    } else {
+	new->write = &raw_write;
     }
-    outtext_init(new, mode, idx);
+    outtext_init(new, venv, mode);
     return new;
+
+f5: free(new->private);
+f4: free(new->description);
+f3: free(new->class);
+f2: free(new);
+f1: error(_("allocation of %s connection failed"),
+	  isText ? "text" : "raw");
+}
+
+SEXP do_graboutput(SEXP call, SEXP op, SEXP args, SEXP env)
+{
+    Routtextconn this;
+    checkArity(op, args);
+    if(!inherits(CAR(args), "textConnection") &&
+       !inherits(CAR(args), "rawConnection"))
+	errorcall(call, _("not a text or raw connection"));
+    this = getConnection(asInteger(CAR(args)))->private;
+    return lengthgets(this->data, this->len);
 }
 
 SEXP do_textconnection(SEXP call, SEXP op, SEXP args, SEXP env)
@@ -1948,26 +2000,24 @@
 	error(_("invalid '%s' argument"), "description");
     desc = CHAR(STRING_ELT(sfile, 0));
     stext = CADR(args);
-    if(!isString(stext))
-	error(_("invalid '%s' argument"), "text");
     sopen = CADDR(args);
     if(!isString(sopen) || length(sopen) != 1)
-    error(_("invalid '%s' argument"), "open");
+	error(_("invalid '%s' argument"), "open");
     open = CHAR(STRING_ELT(sopen, 0));
     venv = CADDDR(args);
     if (!isEnvironment(venv) && venv != R_BaseEnv)
 	error(_("invalid '%s' argument"), "environment");
     ncon = NextConnection();
-    if(!strlen(open) || strncmp(open, "r", 1) == 0)
+    if(!strlen(open) || (open[0] == 'r')) {
+ 	int isText = (!strlen(open) || (open[1] != 'b'));
+ 	if (TYPEOF(stext) != (isText ? STRSXP : RAWSXP))
+ 	    error(_("invalid '%s' argument"), "object");
 	con = Connections[ncon] = newtext(desc, stext);
-    else if (strncmp(open, "w", 1) == 0 || strncmp(open, "a", 1) == 0) {
-	if (OutTextData == NULL) {
-	    OutTextData = allocVector(VECSXP, NCONNECTIONS);
-	    R_PreserveObject(OutTextData);
-	}
-	SET_VECTOR_ELT(OutTextData, ncon, venv);
+    } else if ((open[0] == 'w') || (open[0] == 'a')) {
+	if (!isString(stext))
+	    error(_("invalid '%s' argument"), "object");
 	con = Connections[ncon] =
-	    newouttext(CHAR(STRING_ELT(stext, 0)), sfile, open, ncon);
+	    newouttext(CHAR(STRING_ELT(stext, 0)), venv, sfile, open);
     }
     else
 	errorcall(call, _("unsupported mode"));
@@ -1976,7 +2026,7 @@
     PROTECT(ans = allocVector(INTSXP, 1));
     INTEGER(ans)[0] = ncon;
     PROTECT(class = allocVector(STRSXP, 2));
-    SET_STRING_ELT(class, 0, mkChar("textConnection"));
+    SET_STRING_ELT(class, 0, mkChar(con->class));
     SET_STRING_ELT(class, 1, mkChar("connection"));
     classgets(ans, class);
     UNPROTECT(2);
--- ./src/main/names.c.orig	2005-08-29 17:47:35.000000000 -0700
+++ ./src/main/names.c	2005-09-18 00:29:48.089651300 -0700
@@ -870,6 +870,7 @@
 {"clearPushBack",do_clearpushback,0,  11,     1,      {PP_FUNCALL, PREC_FN,	0}},
 {"pushBackLength",do_pushbacklength,0,  11,     1,      {PP_FUNCALL, PREC_FN,	0}},
 {"textConnection",do_textconnection,0,	11,     4,      {PP_FUNCALL, PREC_FN,	0}},
+{"grabOutput",do_graboutput,0,	11,     1,      {PP_FUNCALL, PREC_FN,	0}},
 {"socketConnection",do_sockconn,0,	11,     6,      {PP_FUNCALL, PREC_FN,	0}},
 {"sockSelect",do_sockselect,0,	11,     3,      {PP_FUNCALL, PREC_FN,	0}},
 {"getAllConnections",do_getallconnections,0,11, 0,      {PP_FUNCALL, PREC_FN,	0}},



More information about the R-devel mailing list