[R] pretty report

Gavin Kelly gavinpaulkelly at gmail.com
Thu Jun 14 11:39:03 CEST 2007


At 5:01 PM -0400 6/12/07, Weiwei Shi wrote:
>Dear Listers:
>
>I have a couple of data frames to report and each corresponds to
>different condtions, e.g. conditions=c(10, 15, 20, 25). In this
>examples, four data frames need to be exported in a "pretty" report.
>
>I knew Perl has some module for exporting data to Excel and after
>googling, I found R does not.

Weiwei,

If you (or the users who are opening your reports) are going to using
a version of excel that supports the new Office XML formats, you can
write multi-sheeted workbooks as below: simply give spreadsheetML a
named list of dataframes.  You can add attributes to the components to
add things such as comments, subheadings that span multiple columns,
hyperlinks and named data-ranges.

If you can't guarantee that the opener won't have a modern Excel (I
don't believe Mac versions are yet at this stage), then you will need
to have a windows box to open the file, and save as 'proper' excel.
Below is a visual basic macro I have set up in a watched directory to
do this on the fly.  I use the program "filenotify" to watch the
directory.

If any of the package developers want to incorporate this function,
then please do get in touch.  It's probably not worth a package of
it's own, but I think the ability to have multi-sheeted excel books,
with the extra bits of formatting mentioned above might be useful.
I'ts fairly straightforward to add extra styling (colours, typefaces,
etc).

Regards - Gavin

###  The R function, and a demo
spreadsheetML <- function(dat, fname, style=NULL) {
  if (is.data.frame(dat))
    dat <- list(Sheet1=dat)
  if (is.null(names(dat)))
    names(dat) <- paste("Sheet",1:length(dat), sep="")
  names(dat)[names(dat)==""] <- paste("Sheet",1:length(dat),
sep="")[names(dat)==""]
  x <- xmlOutputDOM("Workbook", nameSpace="ss",
                    nsURI=list(
                      o="urn:schemas-microsoft-com:office:office",
                      x="urn:schemas-microsoft-com:office:excel",
                      ss="urn:schemas-microsoft-com:office:spreadsheet",
                      html="http://www.w3.org/TR/REC-html40"))
  if (!is.null(style))
    x$addNode(style)
### Annotate any named Ranges
  if (any(!is.null(lapply(dat, attr, "range")))) {
    x$addTag("Names", close=FALSE)
    for (sheet in names(dat)) {
      rngs <- attr(dat[[sheet]],"range")
      offset <- ifelse(is.null(attr(dat[[sheet]],"subhead")), 1, 2)
      for (i in names(rngs)) {
        refersTo <- sprintf("=%s!R%iC%i:R%iC%i",
                            sheet,
                            rngs[[i]]$rowStart+offset,
                            rngs[[i]]$colStart,
                            rngs[[i]]$rowEnd+offset,
                            rngs[[i]]$colEnd)
        x$addTag("NamedRange", attrs=c("ss:Name"=i,
                                 "ss:RefersTo"=refersTo))
      }
    }
    x$closeTag() #Names
  }
  for (sheet in 1:length(dat)) {
    ## For each dataframe, construct a worksheet
    x$addTag("Worksheet", attrs=c("ss:Name"=names(dat)[[sheet]]), close=FALSE)
    x$addTag("Table",close=FALSE)
    x$addTag("Row", close=FALSE)
    ## If there's a subheader, expand it, and remove entries from
relevant header
    headRow <- colnames(dat[[sheet]])
    if (!is.null(subhead <- attr(dat[[sheet]],"subhead"))) {
      subHeadRow <- rep("", length(headRow))
      for (i in names(subhead)) {
        start <- match(i, headRow)
        subHeadRow[start:(start+length(subhead[[i]])-1)] <-
          subhead[[i]]
        headRow[(start+1):(start+length(subhead[[i]])-1)] <- ""
      }
    }
    ## Create Header Row, with comments
    for (i in headRow) {
      x$addTag("Cell", close=FALSE)
      x$addTag("Data",i , attrs=c("ss:Type"="String"))
      if (!is.null(comment <- attr(dat[[sheet]],"xlComment")[[i]])) {
        if (is.character(comment)) {
          x$addTag("Comment", attrs=c("ss:Author"="BaBS"), close=FALSE)
          x$addTag("Data", comment)
          x$closeTag() #Comment
        }
      }
      x$closeTag() # Header entry
    }
    x$closeTag() # Header Row
    ## Create Sub-Header row, with comments
    if (!is.null(subhead)) {
      x$addTag("Row", close=FALSE)
      for (i in 1:length(subHeadRow)) {
        x$addTag("Cell", close=FALSE)
        x$addTag("Data",subHeadRow[i] , attrs=c("ss:Type"="String"))
        if (is.list(comment <- attr(dat[[sheet]],"xlComment")[[headRow[i]]])) {
          if (!is.null(comment <- comment[[subHeadRow[i]]])) {
            x$addTag("Comment", attrs=c("ss:Author"="BaBS"), close=FALSE)
            x$addTag("Data", comment)
            x$closeTag() #Comment
          }
        }
        x$closeTag()
      }
      x$closeTag() # subHeader Row
    }
    coltypes <- rep("String", ncol(dat[[sheet]]))
    coltypes[sapply(dat[[sheet]], is.numeric)] <- "Number"
    href <- attributes(dat[[sheet]])$href
    ## Enter the data row-wise
    for (i in 1:nrow(dat[[sheet]])) {
      x$addTag("Row", close=FALSE)
      for (j in 1:ncol(dat[[sheet]])) {
        ## Go through the row, expanding any hyperlinks
        cellAttr <- NULL
        if (!is.na(ind <- match(colnames(dat[[sheet]])[j], names(href))))
          cellAttr <- c("ss:Href"=gsub(" ", dat[[sheet]][i,j], href[ind]))
        x$addTag("Cell", attrs=cellAttr, close=FALSE)
        x$addTag("Data", as.character(dat[[sheet]][i,j]),
attrs=c("ss:Type"=coltypes[j]))
        x$closeTag()
      }
      x$closeTag() # data row
    }
    x$closeTag() # table
    x$closeTag() # Worksheet
  }
  x$closeTag() # Workbook
  con = file(fname, "w")
  saveXML(x$value(), file=con, prefix="<?xml
version=\"1.0\"?>\n<?mso-application progid=\"Excel.Sheet\"?>\n")
  close(con)
  x$reset()
}

### Example Usage
library(XML)
dat <- list(a=data.frame(A=1:10, B=LETTERS[1:10], b=letters[1:10]),
            b=data.frame(a=1:10, b=factor(LETTERS[1:2])))
attr(dat$a, "range") <- list(data=list(rowStart=1,
                               rowEnd=nrow(dat$a),
                               colStart=1,
                               colEnd=ncol(dat$a)))
attr(dat$a, "subhead") <- list(B=c("Upper","Lower"))
attr(dat$a, "xlComment") <- list(A="Hello",
                               B=list(Upper="World"))
attr(dat$b, "href") <- list(a="http://www.google.co.uk/search?q= ")
#save as .xml if using the vba script
spreadsheetML(dat, "tmp.xls")

###  Prototype script to saveas xml to xls
Dim appExcel
Dim strSource
Dim wbSource
Dim ArgObj
Set ArgObj = WScript.Arguments
Dim objRegExpr
Set objRegExpr = New regexp

objRegExpr.Pattern = ".*\.xml$"
objRegExpr.Global = True
objRegExpr.IgnoreCase = True
strSource = ArgObj(0)

if (objRegExpr.Test(strSource)) Then
  Set appExcel = CreateObject("Excel.Application")
  appExcel.DisplayAlerts = False
  Set wbSource = appExcel.Workbooks.Open(strSource)
  wbSource.SaveAs "c:\converted\tmp.xls", 1
  wbSource.Close False
  Set wbSource = Nothing
  appExcel.Quit
  Set appExcel = Nothing
End If


-- 
Gavin Kelly
Senior Statistician, Bioinformatics & Biostatistics Group
Cancer Research UK



More information about the R-help mailing list