[R] Is it R or I?

Anantha Prasad/NE/USDAFS aprasad at fs.fed.us
Fri Sep 29 23:02:27 CEST 2000


Salutations:
I have been trying to translate a S-PLUS/ArcInfo (GIS software) application
that I wrote on a SGI (IRIX) platform to  public domain R and GrassGIS on
a Linux platform. I am almost on the verge of abandoning it as I find R to
be
rather unstable, slow and frustrating.
I enclose a section of my code for R experts to examine
hoping that they'll point out that all the above three are due to my
ignorance rather than any deficiencies in R.
Thanks much to Prof. Ripley and Dr. Peter Wolf for earlier comments.

Platform:
Linux (Red Hat 6.2), 400 MHz Pentium II with 192 MB of RAM.
R version: 1.1.1 (August 15, 2000)

The foll. function (abridged from the original) is passed from a tcl/tk
applicaiton,
thru perl....

I get the foll. error (I have indicated in the comments on code where
things go wrong)....

The datafile fia802.dat has 2121 rows and 30 variables...

> R --vsize 50M --nsize 6M --no-restore
R> X11()
R>library(tcltk)
R>source("eda_lmtst.rf")
R>f.eda.lm(
dataset="fia802.dat",
pickall="ALFISOL + AVGT + BD + CEC + CLAY +
ERODFAC +INCEPTSL + JANT + JARPPET + JULT + MAXELV + MAYSEPT + MINELV +
MOLLISOL + OM + PERM + PET + PH + PPT + PROD + ROCKDEP + ROCKFRAG + SLOPE +
SPODOSOL + TAWC + TEXCOARS + TEXFINE + ULTISOL",
pickedlist="BD + CEC + CLAY + ERODFAC + INCEPTSL",
resp="iv802",
idvar="FIPS")

Reading in data
DONE Reading in data
Lm Model
Done building model
Plotting the model output
DONE-Plotting the model output
Before tcltk
Segmentation fault (core dumped)

NOTE: Sometimes the program runs (very slowly) without segmentation
fault...
File -> eda_lmtst.rf
NOTE: pickall -> is all the predictor variables in file
pickedlist -> is the subset of pickall that is used by the model
resp -> is the response variable
idvar -> is the Identifying variable

"f.eda.lm"<-
function(dataset="", pickall="", pickedlist="", resp="",idvar="")
{
   options(width=200, length=10000)

  # Create the main data object
  cat("Reading in data", "\n")
  xd <- read.table(dataset, header=T,as.is=T)
  xd <- read.table(dataset,header=T, as.is=T,
row.names=as.character(1:nrow(xd)))
  cat("DONE Reading in data", "\n")

  attach(xd)
   xd <- xd[, -c(charmatch(paste(resp), names(xd)))]

     assign(resp, get(resp))
     ndf <- as.data.frame(get(resp))
     names(ndf) <- "addcolx"
     xd <- data.frame(xd, ndf)
     names(xd)[charmatch("addcolx", names(xd))] <- resp

  cat("Lm Model", "\n")
  lmexp <- formula(paste(resp, " ~ ", pickedlist,sep=""))
  lmexpall <- formula(paste(resp, " ~ .", sep=""))


     assign("xd", xd, env=.GlobalEnv)

     tmpzx <- lm(lmexp, data=xd)
     # NOTE: THE STEP() TAKES FOREVER IN R

     # tmpzx2 <-lm(lmexpall,data=xd)
     # tmpzxall <- step(tmpzx2, trace=0)

  cat ("Done building model", "\n")

  # Get predictions

    xp <- predict(tmpzx,se.fit=T,interval=c("confidence"))
    xpnd <- xp
    xpndfit.df <- as.data.frame(xpnd$fit)
    xpnd.o <- data.frame(act=get(resp),fit.act=xp$fit)
    mapres.opact <- data.frame(act=get(resp))
    mapres.opprd <- data.frame(fit.act=xp$fit)
    write.table(xpnd.o,"lmtst.dat",quote=F,row.names=F)
    mapres.op <- data.frame(idvar=get(idvar), act=mapres.opact,
prd=mapres.opprd)
    # Output for mapping response variable
    write.table(mapres.op,"lm_mapres.txt",quote=F,row.names=F)

  # Output summary statistics
  xs <- summary(tmpzx)
  # xsall <- summary(tmpzxall)
  sink("lmsumm.txt")
  cat("\n")
  print(xs)
  cat("\n")
  print("******** STEPWISE REGRESSION *********")
  print ("Not done")
  sink()

  # Plot model output
  par(mfrow=c(2,2))
  cat("Plotting the model output","\n")
  # NOTE: THE LAST PLOT TAKES FOREVER
  plot(tmpzx,which=1:4)
  dev.print(png, "lmplot.png", width=640,height=640)
  par(mfrow=c(1,1))
  cat("DONE-Plotting the model output","\n")

  # TCLTK INTERFACE TO CHOOSE TYPE OF PLOT AND IDENTIFY OUTLIERS
  # NOTE THAT I WANT THE MAIN DIALOG (ACTUALVSPREDICTED AND RESIDUALPLOT)
  # TO PERSIST BETWEEN OUTLIER IDENTIFICATION...SO THAT THE USER CAN
  # KEEP PLOTTING AND IDENTIFYING TILL HE HITS THE EXIT BUTTON.
    xlabel <- paste(resp, "(Actual)",sep="")
    ylabel <- paste(resp, "(Predicted)", sep="")
    xdnc.act <- get(resp)
    xdnc.max <- max(xdnc.act)
    xpnd.max <- max(xpndfit.df$fit)
    xdnc.min <- min(xdnc.act)
    xpnd.min <- min(xpndfit.df$fit)
    if(xpnd.max > xdnc.max) max.lim <- xpnd.max
    else max.lim <- xdnc.max
    if(xpnd.min < xdnc.min) min.lim <- xpnd.min
    else min.lim <- xdnc.min

    outidap <- as.null()
    outidrs <- as.null()
    cat("Before tcltk","\n")
    tt <- tktoplevel()
    tktitle(tt) <- "Diagnostics"
    label.widget <- tklabel(tt, text="Choose type of plot!")

    idnfyplot <- function() {
         outi <- identify(idf.x, idf.y, label=get(idvar))
       if(flag == 1) {
       outidap <- outi
       assign("outidap", outidap, env=.GlobalEnv)
       }
       if(flag == 2) {
       outidrs <- outi
       assign("outidrs", outidrs, env=.GlobalEnv)
       }
       dev.print(png, paste(opfr,".png",sep=""), width=640,height=640)
    }

    actvsprd <- function() {
      plot(xdnc.act, xpndfit.df$fit, xlim=c(min.lim, max.lim),
ylim=c(min.lim,max.lim),xlab=xlabel, ylab=ylabel)
      abline(0,1)
      abline(lsfit(xdnc.act, xpndfit.df$fit), lty=2)
      opfr <- opf2
      idf.x <- xdnc.act
      idf.y <- xpndfit.df$fit
      flag <- 1
      assign("flag", flag, env=.GlobalEnv)
      assign("idf.x", idf.x, env=.GlobalEnv)
      assign("idf.y",idf.y,env=.GlobalEnv)
      assign("opfr",opfr,env=.GlobalEnv)
      # define second toplevel widget
      tt2a <- tktoplevel()
      tktitle(tt2a) <- "IdentifyOutliers"
      but.lab2 <- tklabel(tt2a, text="Left button\n to identify,\n middle
to quit",foreground="maroon")
      but.wid22 <- tkbutton(tt2a, text="Identify Outliers",
                              command=idnfyplot)
      but.wid23 <- tkbutton(tt2a, text="DONE", command=function(){
                                                     tclvar$done2<-"T"
                                                     tkdestroy(tt2a)
                                                   })
      tkpack(but.lab2, but.wid22, but.wid23)
      # wait until DONE is pressed
      tclvar$done2 <- "F"
      tkwait.variable("done2")
    }

    residplot <- function() {
      plot(tmpzx$fit, tmpzx$residuals)
      abline(h=0)
      opfr <- opf2b
      idf.x <- tmpzx$fit
      idf.y <- tmpzx$residuals
      flag <- 2
      assign("flag", flag, env=.GlobalEnv)
      assign("idf.x", idf.x, env=.GlobalEnv)
      assign("idf.y",idf.y,env=.GlobalEnv)
      assign("opfr",opfr,env=.GlobalEnv)
      # define second toplevel widget
      tt2b <- tktoplevel()
      tktitle(tt2b) <- "IdentifyOutliers"
      but.lab2 <- tklabel(tt2b, text="Left button\n to identify,\n middle
to quit",foreground="maroon")
      but.wid22 <- tkbutton(tt2b, text="Identify Outliers",
                               command=idnfyplot)
      but.wid23 <- tkbutton(tt2b, text="DONE", command=function(){
                                                      tclvar$done3<-"T"
                                                      tkdestroy(tt2b)
                                                    })
      tkpack(but.lab2, but.wid22, but.wid23)
      # wait until DONE is pressed
      tclvar$done3 <- "F"
      tkwait.variable("done3")
    }

    tclvar$choice <- 99
    # THE PROGRAM STARTS DISK SWAPPING HERE...AND FINALLY DOES A
    # SEGMENTATION FAULT...SOMETIMES....
    rbut.wid1 <- tkradiobutton(tt, text="ActualVsPredicted", value=0,
    variable="choice", command=actvsprd)
    rbut.wid2 <- tkradiobutton(tt, text="ResidualPlot", value=1,
    variable="choice", command=residplot)
    but.plot <- tkbutton(tt, text="Exit", command=function(){
                                                         tclvar$done <- "T"
                                                         tkdestroy(tt)
                                                       })
    tkpack(label.widget, rbut.wid1, rbut.wid2, but.plot)
    # wait until Exit is pressed
    tclvar$choice <- "99"
    tkwait.variable("done")
  # End Tcl/tk section

  # Plot more diagnostics..
  par(mfrow=c(1,2))
  par(pty="s")
  hist(tmpzx$residuals)
  qqnorm(tmpzx$residuals)
  qqline(tmpzx$residuals)
  par(mfrow=c(1,1))
  dev.print(png, "lmdiag.png", width=640,height=640)

  # I HAVE A PROBLEM HERE...OUTIDAP AND OUTIDRS ARE GENERATED BY IDENTIFY()
  # EVEN THOUGH I HAVE ASSIGNED IT TO .GLOBALENV, IT REPORTS NULL HERE..
  # WHY??

  cat(outidap, "\n")
  cat(outidrs, "\n")
  outid <- append(outidap, outidrs)
  cat(outid, "\n")
  if(idvar != "NULL") {
     idvardf <- data.frame(get(idvar))
     names(idvardf) <- idvar
     outidf <- as.data.frame(idvardf[outid, paste(idvar)])
     cat(dim(outidf), "\n")
     write.table(outidf, "lmoutliers.txt", quote=F,row.names=F)
    }
  detach("xd")
  rm(xd)
  rm(xdn)
  rm(xdnc)
}


***********************************************************
Mr. Anantha Prasad, Ecologist/GIS Specialist
USDA Forest Service, 359 Main Rd.
Delaware OHIO 43015    USA
Ph: 740-368-0103  Email: aprasad at fs.fed.us
Web: http://www.fs.fed.us/ne/delaware/index.html
Don't Miss Climate Change Tree Atlas at:
http://www.fs.fed.us/ne/delaware/atlas/index.html
******************************************************************

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._



More information about the R-help mailing list