[R] Memory allocation failed: Copying Node

ppatel3026 pratik.patel at us.rothschild.com
Wed Jun 25 16:50:50 CEST 2008


Following code bugs with "Memory allocation failed: Copying Node" error after
parsing n thousand files. I have included the main code(below) and
functions(after the main code).  

I am not sure which lines are causing the copying Node which results in
memory failure. Please advise.   

#Beginning of Code
        for(i in 1:nrow(newFile)) { 
            if(i%%3000 == 0) gc()            
            fname <- as.character(newFile$"File Name"[i])   
            file = strsplit(fname,"/")[[1]][4]
            filein = "C:\\foldername\\" %+% file
            
            if((!file.exists(filein)) || (length(readLines(filein)) == 0) )
{
              ftp <- paste("ftp://servername/", fname, sep="")
              fileout = filein
              try(download.file(url=ftp, destfile=fileout))              
            }
            
            txt <- readLines(filein)                          
            if(length(txt) == 0){
                next
            }

           xmlInside <- grep("</*XML", txt)                          
           xmlTxt <- txt[seq(xmlInside[1]+1, xmlInside[2]-1)]              
           xml <- tryCatch(xmlMalformed2(filein), error = function(err)
unProcessedFiles(filein) )
           if(is.null(xml)) next
                        
            if(is.null(xml)) {
              stop("File not processed: " %+% file)
            }
             
            processed=FALSE           
            owner <- tryCatch(
data.frame(datadate=xValHelper("periodOfReport"),
                                          CIK=xValHelper("issuerCik"),
                                          conm=xValHelper("issuerName"),
                                         
tic=xValHelper("issuerTradingSymbol")),
                              error = function(err) unProcessedFiles(filein)
)
            if(is.null(owner)) next
            
            nodes <- getNodeSet(xml, "//nonDerivativeTransaction")
            if(xmlSize(nodes) > 0){                                          
              processed <- tryCatch( processTransaction(owner, nodes,
outputFile), 
                                               error = function(err)
unProcessedFiles(filein) )
              if(is.null(processed)) next                                 
            } 
          }
#End of Code


#List of Functions
xmlMalformed2 <- function(filename) {
    quotes <- c("&\r\nquot;",
"&q\r\nuot;","&qu\r\not;","&quo\r\nt;","&quot\r\n;")
    amp <- c("&\r\namp;", "&a\r\nmp;","&am\r\np;","&amp\r\n;")
    
    xmlDoc<-NULL
    charStream <- readChar(filename, file.info(filename)$size)
    charStreamNew <- gsubfn("<[^>]*>", ~ gsub("[\r\n]", "", x), charStream)
    
    for(k in quotes) {      
      if(length(grep(k, charStreamNew)) > 0) {
          charStreamNew <- sub(k, "&quot;", charStreamNew)               
      } 
    }

    for(v in amp) {      
      if(length(grep(v, charStreamNew)) > 0) {
          charStreamNew <- sub(v, "&amp;", charStreamNew)               
      } 
    }
    charStreamNew <- gsub("&quot;", "\"", charStreamNew)
    charStreamNew <- gsub("&amp;", "and", charStreamNew)
     
    xmlVec<-readLines(textConnection(charStreamNew))        
    xmlInDoc <- grep("</*XML", xmlVec)
    xmlDoc <- xmlTreeParse(xmlVec[seq(xmlInDoc[1]+1, xmlInDoc[2]-1)],
useInternal=TRUE)        
}

processTransaction <- function(rptOwner, nodes, outFile) {
      transaction <- data.frame(                              
                       
transdate=xValHelperSpecial(nodes,"transactionDate"),
                       
securityTitle=xValHelperSpecial(nodes,"securityTitle"),                       
                       
transactionShares=if(length(xValHelperSpecial(nodes,"transactionShares")) ==
1)
                                                    
xValHelperSpecial(nodes,"transactionShares")[[1]] else 
                                                    
xValHelperSpecial(nodes,"transactionShares"))

      out <- merge(rptOwner,transaction, all.x=TRUE)
      output<-cbind(out,file) #file - variable containing filename that data
was read from 
      write.table(output, file=outFile, append=TRUE, sep="\t", eol="\n",
quote=FALSE, col.names=FALSE, 
                     row.names=FALSE)
      processed=TRUE
      return(processed)              
}

unProcessedFiles <- function(filename) {
    write.table(filename, file="C:/errorFile.txt", append=TRUE, sep="\t",
eol="\n", quote=FALSE, 
                    col.names=FALSE, row.names=FALSE)                                                                       
  return(NULL)
}

#xValHelperSpecial and xValHelper are prerty similar hence avoiding code for
xValHelper
xValHelperSpecial <- function(node, xtag) {
    nobs <- xmlSize(node)
    out<-NULL
    if(xtag == "tagName1") {
      for (n in seq(1:nobs)) {
        temp <- xpathApply(node[[n]], "//" %+% xtag, xmlValue)

        if(length(temp) > 0) {
          if (n==1) assign("out",gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))) else 
                       assign("out",rbind(out,gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))))
        } else {
          if (n==1) assign("out","NA") else assign("out",rbind(out,"NA"))        
        }         
      }
    } else if (xtag == "tagName2") {
      for (n in seq(1:nobs)) {        
        temp <- xpathApply(node[[n]], "//" %+% xtag, xmlValue)

        if(length(temp) > 0) {        
          if (n==1) assign("out",gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))) else 
                       assign("out",rbind(out,gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))))
        } else {
          if (n==1) assign("out","NA") else assign("out",rbind(out,"NA"))        
        }         
      }    
    } else {    
      for (n in seq(1:nobs)) {
        temp <- xpathApply(node[[n]], "//" %+% xtag, xmlValue)
        if(length(temp) > 0) {
          if (n==1) assign("out",gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))) else 
                       assign("out",rbind(out,gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))))
        } else { 
          if (n==1) assign("out","NA") else assign("out",rbind(out,"NA"))
        }
      } 
    }
    return (out)    
}

xValAll <- function(xtag) xpathApply(xml, "//" %+% xtag, xmlValue)

xParent<- function(xtag) xmlName(xpathApply(xml, paste("//" %+% xtag,
"/../.."))[[1]])  

#End of Functions
             
-- 
View this message in context: http://www.nabble.com/Memory-allocation-failed%3A-Copying-Node-tp18114389p18114389.html
Sent from the R help mailing list archive at Nabble.com.



More information about the R-help mailing list