[R] [netcdfgroup] [ncdf4] error converting GEIA data to netCDF

Roy Mendelssohn roy.mendelssohn at noaa.gov
Tue Aug 28 21:45:58 CEST 2012


On Aug 28, 2012, at 12:24 PM, Tom Roche wrote:

> 
> Roy Mendelssohn Tue, 28 Aug 2012 09:07:32 -0700
>> here is the relevant section from your code:
> 
>>> netcdf.file <- nc_create(
>>>  filename=netcdf.fn,
>>> #  vars=list(emis.var),
>>> #  verbose=TRUE)
>>>  vars=list(emis.var))
> 
>>> # Write data to data variable: gotta have file first.
>>> # Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
>>> # Do this before adding _FillValue to prevent:
>>> # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or =
>> _FillValue type mismatch
>>> ## global.emis.arr <- global.emis.mx
>>> ## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
>>> ## global.emis.arr[1,,] <- global.emis.mx
> 
>>> # Note
>>> # * global.emis.mx[lat,lon]
>>> # * datavar needs [lon, lat, time] (with time *last*)
> 
>>> ncvar_put(
>>>  nc=netcdf.file,
>>>  varid=emis.var,
>>> #  vals=global.emis.arr,
>>>  vals=t(global.emis.mx),
>>> #  start=rep.int(1, length(dim(global.emis.arr))),
>>>  start=c(1, 1, 1),
>>> #  count=dim(global.emis.arr))
>>>  count=c(-1,-1, 1)) # -1 -> all data
> 
>> You can't write until all dimensions have been defined, and all
>> variables defined.
> 
> But in fact, that's only a part of the code ... which omits the prior
> dimension and variable definitions :-( See the current version @
> 
> https://github.com/TomRoche/GEIA_to_netCDF/blob/c380c0a28dc8c71dbf0c2ba18130a2439a4fe089/GEIA.to.netCDF.r
> 
> I've also attached that (quoted) following my .sig, with the top-most
> constant and function declarations removed for brevity. The dimension
> definitions are prefixed with '1', the variable definition is prefixed
> with '2'.
> 
> HTH, Tom Roche <Tom_Roche at pobox.com>
> current GEIA.to.netCDF.r code block follows to end of post------------
> # code----------------------------------------------------------------
> 
>> # process input
>> library(maps) # on tlrPanP5 as well as clusters
> 
>> # input file path
>> GEIA.emis.txt.fp <- sprintf('%s/%s', GEIA.emis.txt.dir, GEIA.emis.txt.fn)
>> # columns are grid#, mass
>> GEIA.emis.mx <-
>>  as.matrix(read.table(GEIA.emis.txt.fp, skip=GEIA.emis.txt.n.header))
>> # mask zeros? no, use NA for non-ocean areas
>> # GEIA.emis.mx[GEIA.emis.mx == 0] <- NA
>> # <simple input check>
>> dim(GEIA.emis.mx) ## [1] 36143 2
>> # start debug
>> GEIA.emis.mx.rows <- dim(GEIA.emis.mx)[1]
>> if (GEIA.emis.mx.rows > GEIA.emis.grids.dim) {
>>  cat(sprintf('ERROR: %s: GEIA.emis.mx.rows=%.0d > GEIA.emis.grids.dim=%.0d\n',
>>    this.fn, GEIA.emis.mx.rows, GEIA.emis.grids.dim))
>> } else {
>>  cat(sprintf('debug: %s: GEIA.emis.mx.rows < GEIA.emis.grids.dim\n',
>>    this.fn))
>> }
>> # end debug
>> # </simple input check>
> 
>> global.emis.vec <-
>>  create.global.emissions.vector(
>>    GEIA.emis.grids.dim, GEIA.emis.mx.rows, GEIA.emis.mx)
> 
>> # <visual input check>
>> # Need sorted lat and lon vectors: we know what those are a priori
>> # Add 0.5 since grid centers
>> lon.vec <- 0.5 +
>>  seq(from=grid.lon.degree.start, by=grid.lon.degree.per.cell, length.out=GEIA.emis.lon.dim)
>> lat.vec <- 0.5 +
>>  seq(from=grid.lat.degree.start, by=grid.lat.degree.per.cell, length.out=GEIA.emis.lat.dim)
> 
>> # Create emissions matrix corresponding to those dimensional vectors
>> # (i.e., global.emis.mx is the "projection" of global.emis.vec)
>> # First, create empty global.emis.mx? No, fill from global.emis.vec.
>> # Fill using byrow=T? or "bycol" == byrow=FALSE? (row=lat)
>> # I assigned (using lon.lat.vec.to.grid.index)
>> # "grid indices" (global.emis.vec.index values)
>> # "lon-majorly" (i.e., iterate over lats before incrementing lon),
>> # so we want to fill byrow=FALSE ... BUT,
>> # that will "fill from top" (i.e., starting @ 90N) and
>> # we want to "fill from bottom" (i.e., starting @ 90S) ...
>> # global.emis.mx <- matrix(
>> # global.emis.vec, nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim,
>> # # so flip/reverse rows/latitudes when done
>> # byrow=FALSE)[GEIA.emis.lat.dim:1,]
> 
>> # NO: I cannot just fill global.emis.mx from global.emis.vec:
>> # latter's/GEIA's grid numbering system ensures 1000 lons per lat!
>> # Which overflows the "real-spatial" global.emis.mx :-(
>> # So I need to fill global.emis.mx using a for loop to decode the grid indices :-(
>> # (but at least I can fill in whatever direction I want :-)
>> global.emis.mx <- matrix(
>>  rep(NA, GEIA.emis.grids.dim), nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim)
> 
>> # 1: works if subsequently transposed: TODO: FIXME
>> for (i.lon in 1:GEIA.emis.lon.dim) {
>>  for (i.lat in 1:GEIA.emis.lat.dim) {
>> # 2: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
>> # for (i.lat in 1:GEIA.emis.lat.dim) {
>> # for (i.lon in 1:GEIA.emis.lon.dim) {
>> # 3: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
>> # for (i.lon in GEIA.emis.lon.dim:1) {
>> # for (i.lat in GEIA.emis.lat.dim:1) {
>> # 4: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
>> # for (i.lat in GEIA.emis.lat.dim:1) {
>> # for (i.lon in GEIA.emis.lon.dim:1) {
>>    lon <- lon.vec[i.lon]
>>    lat <- lat.vec[i.lat]
>>    GEIA.emis.grid.val <-
>>      global.emis.vec[
>>        lon.lat.vec.to.grid.index(c(lon, lat),
>>          n.lon=GEIA.emis.lon.dim, n.lat=GEIA.emis.lat.dim)]
>>    if (!is.na(GEIA.emis.grid.val)) {
>>      if (is.na(global.emis.mx[i.lat, i.lon])) {
>>        global.emis.mx[i.lat, i.lon] <- GEIA.emis.grid.val
>> # start debug
>> # cat(sprintf(
>> # 'debug: %s: writing val=%f to global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
>> # this.fn, GEIA.emis.grid.val, i.lon, i.lat, lon, lat))
>> # end debug
>>      } else {
>>        # error if overwriting presumably-previously-written non-NA!
>>        cat(sprintf(
>>          'ERROR: %s: overwriting val=%f with val=%f at global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
>>          this.fn, global.emis.mx[i.lat, i.lon], GEIA.emis.grid.val,
>>          i.lon, i.lat, lon, lat))
>>        return # TODO: abend
>>      } # end testing target != NA (thus not previously written)
>>    } # end testing source != NA (don't write if is.na(lookup)
>>  } # end for loop over lats
>> } # end for loop over lons
> 
>> # Now draw the damn thing
>> # 1: TODO: FIXME: why do I need to transpose global.emis.mx?
>> image(lon.vec, lat.vec, t(global.emis.mx))
>> # 2,3,4: how it should work ?!?
>> # image(lon.vec, lat.vec, global.emis.mx)
>> map(add=TRUE)
>> # </visual input check>
> 
>> # write output to netCDF
>> library(ncdf4)
> 
>> # output file path (not currently used by package=ncdf4)
>> netcdf.fp <- sprintf('%s/%s', netcdf.dir, netcdf.fn)
> 
> 1> # create dimensions and dimensional variables
> 1> time.vec <- c(0) # annual value, corresponding to no specific year
> 1> time.dim <- ncdim_def(
> 1>   name=time.var.name,
> 1>   units=time.var.units,
> 1>   vals=time.vec,
> 1>   unlim=TRUE,
> 1>   create_dimvar=TRUE,
> 1>   calendar=time.var.calendar,
> 1>   longname=time.var.long_name)
> 
> 1> lon.dim <- ncdim_def(
> 1>   name=lon.var.name,
> 1>   units=lon.var.units,
> 1>   vals=lon.vec,
> 1>   unlim=FALSE,
> 1>   create_dimvar=TRUE,
> 1>   longname=lon.var.long_name)
> 
> 1> lat.dim <- ncdim_def(
> 1>   name=lat.var.name,
> 1>   units=lat.var.units,
> 1>   vals=lat.vec,
> 1>   unlim=FALSE,
> 1>   create_dimvar=TRUE,
> 1>   longname=lat.var.long_name)
> 
> 2> # create data variable (as container--can't put data until we have a file)
> 2> emis.var <- ncvar_def(
> 2>   name=emis.var.name,
> 2>   units=emis.var.units,
> 2> # dim=c(time.dim, lat.dim, lon.dim),
> 2> # dim=list(time.dim, lat.dim, lon.dim),
> 2> # note dim order desired for result=var(time, lat, lon)
> 2>   dim=list(lon.dim, lat.dim, time.dim),
> 2>   missval=as.double(emis.var._FillValue),
> 2>   longname=emis.var.long_name,
> 2>   prec="double")
> 
>> # get current time for creation_date
>> # system(intern=TRUE) -> return char vector, one member per output line)
>> netcdf.timestamp <- system('date', intern=TRUE)
> 
>> # create netCDF file
>> netcdf.file <- nc_create(
>>  filename=netcdf.fn,
>> # vars=list(emis.var),
>> # verbose=TRUE)
>>  vars=list(emis.var))
> 
>> # Write data to data variable: gotta have file first.
>> # Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
>> # Do this before adding _FillValue to prevent:
>> # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or _FillValue type mismatch
>> ## global.emis.arr <- global.emis.mx
>> ## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
>> ## global.emis.arr[1,,] <- global.emis.mx
> 
>> # Note
>> # * global.emis.mx[lat,lon]
>> # * datavar needs [lon, lat, time] (with time *last*)
> 
>> ncvar_put(
>>  nc=netcdf.file,
>>  varid=emis.var,
>> # vals=global.emis.arr,
>>  vals=t(global.emis.mx),
>> # start=rep.int(1, length(dim(global.emis.arr))),
>>  start=c(1, 1, 1),
>> # count=dim(global.emis.arr))
>>  count=c(-1,-1, 1)) # -1 -> all data
> 
>> # Write netCDF attributes
>> # Note: can't pass *.dim as varid, even though these are coordinate vars :-(
> 
>> # add datavar attributes
>> ncatt_put(
>>  nc=netcdf.file,
>> # varid=lon.var,
>>  varid=lon.var.name,
>>  attname="comment",
>>  attval=lon.var.comment,
>>  prec="text")
> 
>> ncatt_put(
>>  nc=netcdf.file,
>> # varid=lat.var,
>>  varid=lat.var.name,
>>  attname="comment",
>>  attval=lat.var.comment,
>>  prec="text")
> 
>> # put _FillValue after putting data!
>> ncatt_put(
>>  nc=netcdf.file,
>>  varid=emis.var,
>>  attname="_FillValue",
>>  attval=emis.var._FillValue,
>>  prec="float") # why is "emi_n2o:missing_value = -999."?
> 
>> # add global attributes (varid=0)
>> ncatt_put(
>>  nc=netcdf.file,
>>  varid=0,
>>  attname="creation_date",
>>  attval=netcdf.timestamp,
>>  prec="text")
> 
>> ncatt_put(
>>  nc=netcdf.file,
>>  varid=0,
>>  attname="source_file",
>>  attval=netcdf.source_file,
>>  prec="text")
> 
>> ncatt_put(
>>  nc=netcdf.file,
>>  varid=0,
>>  attname="Conventions",
>>  attval=netcdf.Conventions,
>>  prec="text")
> 
>> # flush to file (there may not be data on disk before this point)
>> # nc_sync(netcdf.file) # so we don't hafta reopen the file, below
>> # Nope: per David W. Pierce Mon, 27 Aug 2012 21:35:35 -0700, ncsync is not enough
>> nc_close(netcdf.file)
>> nc_open(netcdf.fn,
>>        write=FALSE, # will only read below
>>        readunlim=TRUE) # it's a small file
> 
>> # <simple output check>
>> system(sprintf('ls -alth %s', netcdf.fp))
>> system(sprintf('ncdump -h %s', netcdf.fp))
>> # </simple output check>
> 
>> # <visual output check>
>> # TODO: do plot-related refactoring! allow to work with projects={ioapi, this}
>> # <copied from plotLayersForTimestep.r>
>> library(fields)
>> # double-sprintf-ing to set precision by constant: cool or brittle?
>> stats.precision <- 3 # sigdigs to use for min, median, max of obs
>> stat.str <- sprintf('%%.%ig', stats.precision)
>> # use these in function=subtitle.stats as sprintf inputs
>> max.str <- sprintf('max=%s', stat.str)
>> med.str <- sprintf('med=%s', stat.str)
>> min.str <- sprintf('min=%s', stat.str)
>> # </copied from plotLayersForTimestep.r>
> 
>> # Get the data out of the datavar, to test reusability
>> # target.data <- emis.var[,,1] # fails, with
>> # > Error in emis.var[, , 1] : incorrect number of dimensions
>> target.data <- ncvar_get(
>>  nc=netcdf.file,
>> # varid=emis.var,
>>  varid=emis.var.name,
>>  # read all the data
>> # start=rep(1, emis.var$ndims),
>>  start=c(1, 1, 1),
>> # count=rep(-1, emis.var$ndims))
>>  count=c(-1, -1, 1))
>> # MAJOR: all of the above fail with
>> # > Error in if (nc$var[[li]]$hasAddOffset) addOffset = nc$var[[li]]$addOffset else addOffset = 0 :
>> # > argument is of length zero
> 
>> # Note that, if just using the raw data, the following plot code works.
>> target.data <- t(global.emis.mx)
>> # <simple output check/>
>> dim(target.data) # n.lon, n.lat
> 
>> # <copied from windowEmissions.r>
>> palette.vec <- c("grey","purple","deepskyblue2","green","yellow","orange","red","brown")
>> colors <- colorRampPalette(palette.vec)
>> probabilities.vec <- seq(0, 1, 1.0/(length(palette.vec) - 1))
>> # </copied from windowEmissions.r>
> 
>> # <copy/mod from plotLayersForTimestep.r>
>> plot.layer(target.data,
>>  title=netcdf.title,
>>  subtitle=subtitle.stats(target.data),
>>  x.centers=lon.vec,
>>  y.centers=lat.vec,
>>  q.vec=probabilities.vec,
>>  colors=colors)
>> # </copy/mod from plotLayersForTimestep.r>
>> map(add=TRUE)
>> # </visual output check>
> 
>> # teardown
>> dev.off()
>> nc_close(netcdf.file)
> 
> _______________________________________________
> netcdfgroup mailing list
> netcdfgroup at unidata.ucar.edu
> For list information or to unsubscribe,  visit: http://www.unidata.ucar.edu/mailing_lists/ 

**********************
"The contents of this message do not reflect any position of the U.S. Government or NOAA."
**********************
Roy Mendelssohn
Supervisory Operations Research Analyst
NOAA/NMFS
Environmental Research Division
Southwest Fisheries Science Center
1352 Lighthouse Avenue
Pacific Grove, CA 93950-2097

e-mail: Roy.Mendelssohn at noaa.gov (Note new e-mail address)
voice: (831)-648-9029
fax: (831)-648-8440
www: http://www.pfeg.noaa.gov/

"Old age and treachery will overcome youth and skill."
"From those who have been given much, much will be expected" 
"the arc of the moral universe is long, but it bends toward justice" -MLK Jr.




More information about the R-help mailing list