[R] [netcdfgroup] [ncdf4] error converting GEIA data to netCDF

Tue Aug 28 21:24:48 CEST 2012

Roy Mendelssohn Tue, 28 Aug 2012 09:07:32 -0700
> here is the relevant section from your code:

> > netcdf.file <- nc_create(
> >   filename=netcdf.fn,
> > #  vars=list(emis.var),
> > #  verbose=TRUE)
> >   vars=list(emis.var))

> > # Write data to data variable: gotta have file first.
> > # Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
> > # Do this before adding _FillValue to prevent:
> > # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or =
> _FillValue type mismatch
> > ## global.emis.arr <- global.emis.mx
> > ## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
> > ## global.emis.arr[1,,] <- global.emis.mx

> > # Note
> > # * global.emis.mx[lat,lon]
> > # * datavar needs [lon, lat, time] (with time *last*)

> > ncvar_put(
> >   nc=netcdf.file,
> >   varid=emis.var,
> > #  vals=global.emis.arr,
> >   vals=t(global.emis.mx),
> > #  start=rep.int(1, length(dim(global.emis.arr))),
> >   start=c(1, 1, 1),
> > #  count=dim(global.emis.arr))
> >   count=c(-1,-1, 1)) # -1 -> all data

> You can't write until all dimensions have been defined, and all
> variables defined.

But in fact, that's only a part of the code ... which omits the prior
dimension and variable definitions :-( See the current version @

https://github.com/TomRoche/GEIA_to_netCDF/blob/c380c0a28dc8c71dbf0c2ba18130a2439a4fe089/GEIA.to.netCDF.r

I've also attached that (quoted) following my .sig, with the top-most
constant and function declarations removed for brevity. The dimension
definitions are prefixed with '1', the variable definition is prefixed
with '2'.

HTH, Tom Roche <Tom_Roche at pobox.com>
current GEIA.to.netCDF.r code block follows to end of post------------
# code----------------------------------------------------------------

 > # process input
 > library(maps) # on tlrPanP5 as well as clusters

 > # input file path
 > GEIA.emis.txt.fp <- sprintf('%s/%s', GEIA.emis.txt.dir, GEIA.emis.txt.fn)
 > # columns are grid#, mass
 > GEIA.emis.mx <-
 >   as.matrix(read.table(GEIA.emis.txt.fp, skip=GEIA.emis.txt.n.header))
 > # mask zeros? no, use NA for non-ocean areas
 > # GEIA.emis.mx[GEIA.emis.mx == 0] <- NA
 > # <simple input check>
 > dim(GEIA.emis.mx) ## [1] 36143 2
 > # start debug
 > GEIA.emis.mx.rows <- dim(GEIA.emis.mx)[1]
 > if (GEIA.emis.mx.rows > GEIA.emis.grids.dim) {
 >   cat(sprintf('ERROR: %s: GEIA.emis.mx.rows=%.0d > GEIA.emis.grids.dim=%.0d\n',
 >     this.fn, GEIA.emis.mx.rows, GEIA.emis.grids.dim))
 > } else {
 >   cat(sprintf('debug: %s: GEIA.emis.mx.rows < GEIA.emis.grids.dim\n',
 >     this.fn))
 > }
 > # end debug
 > # </simple input check>

 > global.emis.vec <-
 >   create.global.emissions.vector(
 >     GEIA.emis.grids.dim, GEIA.emis.mx.rows, GEIA.emis.mx)

 > # <visual input check>
 > # Need sorted lat and lon vectors: we know what those are a priori
 > # Add 0.5 since grid centers
 > lon.vec <- 0.5 +
 >   seq(from=grid.lon.degree.start, by=grid.lon.degree.per.cell, length.out=GEIA.emis.lon.dim)
 > lat.vec <- 0.5 +
 >   seq(from=grid.lat.degree.start, by=grid.lat.degree.per.cell, length.out=GEIA.emis.lat.dim)

 > # Create emissions matrix corresponding to those dimensional vectors
 > # (i.e., global.emis.mx is the "projection" of global.emis.vec)
 > # First, create empty global.emis.mx? No, fill from global.emis.vec.
 > # Fill using byrow=T? or "bycol" == byrow=FALSE? (row=lat)
 > # I assigned (using lon.lat.vec.to.grid.index)
 > # "grid indices" (global.emis.vec.index values)
 > # "lon-majorly" (i.e., iterate over lats before incrementing lon),
 > # so we want to fill byrow=FALSE ... BUT,
 > # that will "fill from top" (i.e., starting @ 90N) and
 > # we want to "fill from bottom" (i.e., starting @ 90S) ...
 > # global.emis.mx <- matrix(
 > # global.emis.vec, nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim,
 > # # so flip/reverse rows/latitudes when done
 > # byrow=FALSE)[GEIA.emis.lat.dim:1,]

 > # NO: I cannot just fill global.emis.mx from global.emis.vec:
 > # latter's/GEIA's grid numbering system ensures 1000 lons per lat!
 > # Which overflows the "real-spatial" global.emis.mx :-(
 > # So I need to fill global.emis.mx using a for loop to decode the grid indices :-(
 > # (but at least I can fill in whatever direction I want :-)
 > global.emis.mx <- matrix(
 >   rep(NA, GEIA.emis.grids.dim), nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim)

 > # 1: works if subsequently transposed: TODO: FIXME
 > for (i.lon in 1:GEIA.emis.lon.dim) {
 >   for (i.lat in 1:GEIA.emis.lat.dim) {
 > # 2: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
 > # for (i.lat in 1:GEIA.emis.lat.dim) {
 > # for (i.lon in 1:GEIA.emis.lon.dim) {
 > # 3: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
 > # for (i.lon in GEIA.emis.lon.dim:1) {
 > # for (i.lat in GEIA.emis.lat.dim:1) {
 > # 4: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
 > # for (i.lat in GEIA.emis.lat.dim:1) {
 > # for (i.lon in GEIA.emis.lon.dim:1) {
 >     lon <- lon.vec[i.lon]
 >     lat <- lat.vec[i.lat]
 >     GEIA.emis.grid.val <-
 >       global.emis.vec[
 >         lon.lat.vec.to.grid.index(c(lon, lat),
 >           n.lon=GEIA.emis.lon.dim, n.lat=GEIA.emis.lat.dim)]
 >     if (!is.na(GEIA.emis.grid.val)) {
 >       if (is.na(global.emis.mx[i.lat, i.lon])) {
 >         global.emis.mx[i.lat, i.lon] <- GEIA.emis.grid.val
 > # start debug
 > # cat(sprintf(
 > # 'debug: %s: writing val=%f to global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
 > # this.fn, GEIA.emis.grid.val, i.lon, i.lat, lon, lat))
 > # end debug
 >       } else {
 >         # error if overwriting presumably-previously-written non-NA!
 >         cat(sprintf(
 >           'ERROR: %s: overwriting val=%f with val=%f at global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
 >           this.fn, global.emis.mx[i.lat, i.lon], GEIA.emis.grid.val,
 >           i.lon, i.lat, lon, lat))
 >         return # TODO: abend
 >       } # end testing target != NA (thus not previously written)
 >     } # end testing source != NA (don't write if is.na(lookup)
 >   } # end for loop over lats
 > } # end for loop over lons

 > # Now draw the damn thing
 > # 1: TODO: FIXME: why do I need to transpose global.emis.mx?
 > image(lon.vec, lat.vec, t(global.emis.mx))
 > # 2,3,4: how it should work ?!?
 > # image(lon.vec, lat.vec, global.emis.mx)
 > map(add=TRUE)
 > # </visual input check>

 > # write output to netCDF
 > library(ncdf4)

 > # output file path (not currently used by package=ncdf4)
 > netcdf.fp <- sprintf('%s/%s', netcdf.dir, netcdf.fn)

1> # create dimensions and dimensional variables
1> time.vec <- c(0) # annual value, corresponding to no specific year
1> time.dim <- ncdim_def(
1>   name=time.var.name,
1>   units=time.var.units,
1>   vals=time.vec,
1>   unlim=TRUE,
1>   create_dimvar=TRUE,
1>   calendar=time.var.calendar,
1>   longname=time.var.long_name)

1> lon.dim <- ncdim_def(
1>   name=lon.var.name,
1>   units=lon.var.units,
1>   vals=lon.vec,
1>   unlim=FALSE,
1>   create_dimvar=TRUE,
1>   longname=lon.var.long_name)

1> lat.dim <- ncdim_def(
1>   name=lat.var.name,
1>   units=lat.var.units,
1>   vals=lat.vec,
1>   unlim=FALSE,
1>   create_dimvar=TRUE,
1>   longname=lat.var.long_name)

2> # create data variable (as container--can't put data until we have a file)
2> emis.var <- ncvar_def(
2>   name=emis.var.name,
2>   units=emis.var.units,
2> # dim=c(time.dim, lat.dim, lon.dim),
2> # dim=list(time.dim, lat.dim, lon.dim),
2> # note dim order desired for result=var(time, lat, lon)
2>   dim=list(lon.dim, lat.dim, time.dim),
2>   missval=as.double(emis.var._FillValue),
2>   longname=emis.var.long_name,
2>   prec="double")

 > # get current time for creation_date
 > # system(intern=TRUE) -> return char vector, one member per output line)
 > netcdf.timestamp <- system('date', intern=TRUE)

 > # create netCDF file
 > netcdf.file <- nc_create(
 >   filename=netcdf.fn,
 > # vars=list(emis.var),
 > # verbose=TRUE)
 >   vars=list(emis.var))

 > # Write data to data variable: gotta have file first.
 > # Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
 > # Do this before adding _FillValue to prevent:
 > # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or _FillValue type mismatch
 > ## global.emis.arr <- global.emis.mx
 > ## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
 > ## global.emis.arr[1,,] <- global.emis.mx

 > # Note
 > # * global.emis.mx[lat,lon]
 > # * datavar needs [lon, lat, time] (with time *last*)

 > ncvar_put(
 >   nc=netcdf.file,
 >   varid=emis.var,
 > # vals=global.emis.arr,
 >   vals=t(global.emis.mx),
 > # start=rep.int(1, length(dim(global.emis.arr))),
 >   start=c(1, 1, 1),
 > # count=dim(global.emis.arr))
 >   count=c(-1,-1, 1)) # -1 -> all data

 > # Write netCDF attributes
 > # Note: can't pass *.dim as varid, even though these are coordinate vars :-(

 > # add datavar attributes
 > ncatt_put(
 >   nc=netcdf.file,
 > # varid=lon.var,
 >   varid=lon.var.name,
 >   attname="comment",
 >   attval=lon.var.comment,
 >   prec="text")

 > ncatt_put(
 >   nc=netcdf.file,
 > # varid=lat.var,
 >   varid=lat.var.name,
 >   attname="comment",
 >   attval=lat.var.comment,
 >   prec="text")

 > # put _FillValue after putting data!
 > ncatt_put(
 >   nc=netcdf.file,
 >   varid=emis.var,
 >   attname="_FillValue",
 >   attval=emis.var._FillValue,
 >   prec="float") # why is "emi_n2o:missing_value = -999."?

 > # add global attributes (varid=0)
 > ncatt_put(
 >   nc=netcdf.file,
 >   varid=0,
 >   attname="creation_date",
 >   attval=netcdf.timestamp,
 >   prec="text")

 > ncatt_put(
 >   nc=netcdf.file,
 >   varid=0,
 >   attname="source_file",
 >   attval=netcdf.source_file,
 >   prec="text")

 > ncatt_put(
 >   nc=netcdf.file,
 >   varid=0,
 >   attname="Conventions",
 >   attval=netcdf.Conventions,
 >   prec="text")

 > # flush to file (there may not be data on disk before this point)
 > # nc_sync(netcdf.file) # so we don't hafta reopen the file, below
 > # Nope: per David W. Pierce Mon, 27 Aug 2012 21:35:35 -0700, ncsync is not enough
 > nc_close(netcdf.file)
 > nc_open(netcdf.fn,
 >         write=FALSE, # will only read below
 >         readunlim=TRUE) # it's a small file

 > # <simple output check>
 > system(sprintf('ls -alth %s', netcdf.fp))
 > system(sprintf('ncdump -h %s', netcdf.fp))
 > # </simple output check>

 > # <visual output check>
 > # TODO: do plot-related refactoring! allow to work with projects={ioapi, this}
 > # <copied from plotLayersForTimestep.r>
 > library(fields)
 > # double-sprintf-ing to set precision by constant: cool or brittle?
 > stats.precision <- 3 # sigdigs to use for min, median, max of obs
 > stat.str <- sprintf('%%.%ig', stats.precision)
 > # use these in function=subtitle.stats as sprintf inputs
 > max.str <- sprintf('max=%s', stat.str)
 > med.str <- sprintf('med=%s', stat.str)
 > min.str <- sprintf('min=%s', stat.str)
 > # </copied from plotLayersForTimestep.r>

 > # Get the data out of the datavar, to test reusability
 > # target.data <- emis.var[,,1] # fails, with
 > # > Error in emis.var[, , 1] : incorrect number of dimensions
 > target.data <- ncvar_get(
 >   nc=netcdf.file,
 > # varid=emis.var,
 >   varid=emis.var.name,
 >   # read all the data
 > # start=rep(1, emis.var$ndims),
 >   start=c(1, 1, 1),
 > # count=rep(-1, emis.var$ndims))
 >   count=c(-1, -1, 1))
 > # MAJOR: all of the above fail with
 > # > Error in if (nc$var[[li]]$hasAddOffset) addOffset = nc$var[[li]]$addOffset else addOffset = 0 :
 > # > argument is of length zero

 > # Note that, if just using the raw data, the following plot code works.
 > target.data <- t(global.emis.mx)
 > # <simple output check/>
 > dim(target.data) # n.lon, n.lat

 > # <copied from windowEmissions.r>
 > palette.vec <- c("grey","purple","deepskyblue2","green","yellow","orange","red","brown")
 > colors <- colorRampPalette(palette.vec)
 > probabilities.vec <- seq(0, 1, 1.0/(length(palette.vec) - 1))
 > # </copied from windowEmissions.r>

 > # <copy/mod from plotLayersForTimestep.r>
 > plot.layer(target.data,
 >   title=netcdf.title,
 >   subtitle=subtitle.stats(target.data),
 >   x.centers=lon.vec,
 >   y.centers=lat.vec,
 >   q.vec=probabilities.vec,
 >   colors=colors)
 > # </copy/mod from plotLayersForTimestep.r>
 > map(add=TRUE)
 > # </visual output check>

 > # teardown
 > dev.off()
 > nc_close(netcdf.file)