[R] Function that is giving me a headache- any help appreciated (automatic read )

Peter Ehlers ehlers at ucalgary.ca
Tue May 18 23:13:23 CEST 2010


On 2010-05-18 11:00, John Kane wrote:
>
> I don't think you can do this
> precipitation!="NA")

Actually, that will work here, although it should always be avoided.
Do use is.na().

The main problem seems to be that the ddply() call doesn't work.
I would just use tapply() and unlist():

  b <- with(precip.1, tapply(precipitation, gauge_name, cumsum))
  b <- unlist(b)

You'll also find that you need to wrap your qplot() calls in print().

And your if/else logic (at the end of USGS) looks faulty.

  -Peter Ehlers

>
> have a look at ?is.na
>
> --- On Tue, 5/18/10, stephen sefick<ssefick at gmail.com>  wrote:
>
>> From: stephen sefick<ssefick at gmail.com>
>> Subject: [R] Function that is giving me a headache- any help appreciated (automatic read )
>> To: r-help at r-project.org
>> Received: Tuesday, May 18, 2010, 12:38 PM
>> note: whole function is below- I am
>> sure I am doing something silly.
>>
>> when I use it like USGS(input="precipitation") it is
>> choking on the
>>
>>
>> precip.1<- subset(DF, precipitation!="NA")
>> b<- ddply(precip.1$precipitation,
>> .(precip.1$gauge_name), cumsum)
>> DF.precip<- precip.1
>> DF.precip$precipitation<- b$.data
>>
>> part, but runs fine outside of the function:
>>
>> days=7
>> input="precipitation"
>> require(chron)
>> require(gsubfn)
>> require(ggplot2)
>> require(plyr)
>> #021973269 is the Waynesboro Gauge on the Savannah River
>> Proper (SRS)
>> #02102908 is the Flat Creek Gauge (ftbrfcms)
>> #02133500 is the Drowning Creek (ftbrbmcm)
>> #02341800 is the Upatoi Creek Near Columbus (ftbn)
>> #02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
>> #02203000 is the Canoochee River Near Claxton (ftst)
>> #02196690 is the Horse Creek Gauge at Clearwater, S.C.
>>
>> a<- "http://waterdata.usgs.gov/nwis/uv?format=rdb&period="
>> b<-
>> "&site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690"
>> z<- paste(a, days, b, sep="")
>> L<- readLines(z)
>>
>> #look for the data with USGS in front of it (this take
>> advantage of
>> #the agency column)
>> L.USGS<- grep("^USGS", L, value = TRUE)
>> DF<- read.table(textConnection(L.USGS), fill = TRUE)
>> colnames(DF)<- c("agency", "gauge", "date", "time",
>> "time_zone",
>> "gauge_height",
>> "discharge", "precipitation")
>> pat<- "^# +USGS +([0-9]+) +(.*)"
>> L.DD<- grep(pat, L, value = TRUE)
>> library(gsubfn)
>> DD<- strapply(L.DD, pat, c, simplify = rbind)
>> DDdf<- data.frame(gauge = as.numeric(DD[,1]),
>> gauge_name = DD[,2])
>> both<- merge(DF, DDdf, by = "gauge", all.x = TRUE)
>>
>> dts<- as.character(both[,"date"])
>> tms<- as.character(both[,"time"])
>> date_time<- as.chron(paste(dts, tms), "%Y-%m-%d
>> %H:%M")
>> DF<- data.frame(Date=as.POSIXct(date_time), both)
>> #change precip to numeric
>> DF[,"precipitation"]<-
>> as.numeric(as.character(DF[,"precipitation"]))
>>
>> precip.1<- subset(DF, precipitation!="NA")
>> b<- ddply(precip.1$precipitation,
>> .(precip.1$gauge_name), cumsum)
>> DF.precip<- precip.1
>> DF.precip$precipitation<- b$.data
>>
>> #discharge
>> if(input=="data"){
>>
>> return(DF)
>>
>> }else{
>>
>> qplot(Date, discharge, data=DF,
>> geom="line", ylab="Date")+facet_wrap(~gauge_name,
>> scales="free_y")+coord_trans(y="log10")}
>>
>> if(input=="precipitation"){
>> #precipitation
>> qplot(Date, precipitation, data=DF.precip,
>> geom="line")+facet_wrap(~gauge_name, scales="free_y")
>>
>> }else{
>>
>> qplot(Date, discharge, data=DF,
>> geom="line", ylab="Date")+facet_wrap(~gauge_name,
>> scales="free_y")+coord_trans(y="log10")}
>>
>> below is the whole function:
>>
>> USGS<- function(input="discharge", days=7){
>> require(chron)
>> require(gsubfn)
>> require(ggplot2)
>> require(plyr)
>> #021973269 is the Waynesboro Gauge on the Savannah River
>> Proper (SRS)
>> #02102908 is the Flat Creek Gauge (ftbrfcms)
>> #02133500 is the Drowning Creek (ftbrbmcm)
>> #02341800 is the Upatoi Creek Near Columbus (ftbn)
>> #02342500 is the Uchee Creek Near Fort Mitchell (ftbn)
>> #02203000 is the Canoochee River Near Claxton (ftst)
>> #02196690 is the Horse Creek Gauge at Clearwater, S.C.
>>
>> a<- "http://waterdata.usgs.gov/nwis/uv?format=rdb&period="
>> b<-
>> "&site_no=021973269,02102908,02133500,02341800,02342500,02203000,02196690"
>> z<- paste(a, days, b, sep="")
>> L<- readLines(z)
>>
>> #look for the data with USGS in front of it (this take
>> advantage of
>> #the agency column)
>> L.USGS<- grep("^USGS", L, value = TRUE)
>> DF<- read.table(textConnection(L.USGS), fill = TRUE)
>> colnames(DF)<- c("agency", "gauge", "date", "time",
>> "time_zone",
>> "gauge_height",
>> "discharge", "precipitation")
>> pat<- "^# +USGS +([0-9]+) +(.*)"
>> L.DD<- grep(pat, L, value = TRUE)
>> library(gsubfn)
>> DD<- strapply(L.DD, pat, c, simplify = rbind)
>> DDdf<- data.frame(gauge = as.numeric(DD[,1]),
>> gauge_name = DD[,2])
>> both<- merge(DF, DDdf, by = "gauge", all.x = TRUE)
>>
>> dts<- as.character(both[,"date"])
>> tms<- as.character(both[,"time"])
>> date_time<- as.chron(paste(dts, tms), "%Y-%m-%d
>> %H:%M")
>> DF<- data.frame(Date=as.POSIXct(date_time), both)
>> #change precip to numeric
>> DF[,"precipitation"]<-
>> as.numeric(as.character(DF[,"precipitation"]))
>>
>> precip.1<- subset(DF, precipitation!="NA")
>> b<- ddply(precip.1$precipitation,
>> .(precip.1$gauge_name), cumsum)
>> DF.precip<- precip.1
>> DF.precip$precipitation<- b$.data
>>
>> #discharge
>> if(input=="data"){
>>
>> return(DF)
>>
>> }else{
>>
>> qplot(Date, discharge, data=DF,
>> geom="line", ylab="Date")+facet_wrap(~gauge_name,
>> scales="free_y")+coord_trans(y="log10")}
>>
>> if(input=="precipitation"){
>> #precipitation
>> qplot(Date, precipitation, data=DF.precip,
>> geom="line")+facet_wrap(~gauge_name, scales="free_y")
>>
>> }else{
>>
>> qplot(Date, discharge, data=DF,
>> geom="line", ylab="Date")+facet_wrap(~gauge_name,
>> scales="free_y")+coord_trans(y="log10")}
>>
>> }
>>



More information about the R-help mailing list