[Rd] read.table leaves out data when reading multiple-line records (PR#4955)

joehl at gmx.de joehl at gmx.de
Wed Nov 5 16:14:13 MET 2003



Dear all,

I discovered that read.table (RW1.8.0) leaves out data when reading
multiple-line records.

Replication code at the end

Best regards


Jens Oehlschlägel


> filename <- "c:/tmp/c2.csv"
> 
> data <- data.frame(a=c("c", "e\nnewline"), b=c("d", '"quoted
simpleline"'))
> 
> #look at the data
> write.table(data, sep=",", row.names=FALSE)
"a","b"
"c","d"
"e
newline","\"quoted simpleline\""
> 
> # write it out
> write.table(data, sep=",", row.names=FALSE, file=filename)
> 
> # reading it in a line is missing
> read.csv(filename)
           a                     b
1 e\nnewline \\quoted simpleline\\
> 
> fc <- file(filename, open="r")
> 
> # the problem seems to be
> # readTableHead erroneously counts 3 lines as 4
> lines <- .Internal(readTableHead(fc, 4, "", TRUE))
> lines
[1] "\"a\",\"b\""                             "\"c\",\"d\""                 
           "\"e"                                    
[4] "newline\",\"\\\"quoted simpleline\\\"\""
> 
> # double pushback is fine
> pushBack(c(lines,lines), fc)
> 
> # but nlines tells us we had 4 lines, which in fact are only 3
> nlines <- length(lines)
> nlines
[1] 4
> 
> # and the first scan eats up more than the first pushback
> scan(fc, what="string", sep=",", nlines=nlines)
Read 8 items
[1] "a"                     "b"                     "c"                    
"d"                     "e\nnewline"           
[6] "\\quoted simpleline\\" "a"                     "b"                    
> 
> # thus the real scan misses data
> scan(fc, what="string", sep=",")
Read 4 items
[1] "c"                     "d"                     "e\nnewline"           
"\\quoted simpleline\\"
> 
> close(fc)
> 
> version
         _              
platform i386-pc-mingw32
arch     i386           
os       mingw32        
system   i386, mingw32  
status                  
major    1              
minor    8.0            
year     2003           
month    10             
day      08             
language R




filename <- "c:/tmp/c2.csv"

data <- data.frame(a=c("c", "e\nnewline"), b=c("d", '"quoted simpleline"'))

#look at the data
write.table(data, sep=",", row.names=FALSE)

# write it out
write.table(data, sep=",", row.names=FALSE, file=filename)

# reading it in a line is missing	
read.csv(filename)

fc <- file(filename, open="r")

# the problem seems to be
# readTableHead erroneously counts 3 lines as 4
lines <- .Internal(readTableHead(fc, 4, "", TRUE))
lines

# double pushback is fine
pushBack(c(lines,lines), fc)

# but nlines tells us we had 4 lines, which in fact are only 3
nlines <- length(lines)
nlines

# and the first scan eats up more than the first pushback
scan(fc, what="string", sep=",", nlines=nlines)

# thus the real scan misses data
scan(fc, what="string", sep=",")

close(fc)

version


--



More information about the R-devel mailing list