[R] Aggregate matrix in a 2 by 2 manor

Sun Jul 31 08:13:04 CEST 2016

If you don't need all that FUN flexibility, you can get this done way 
faster with the aperm and colMeans functions:

tst <- matrix( seq.int( 1440 * 360 )
              , ncol = 1440
              , nrow = 360
              )
tst.small <- matrix( seq.int( 8 * 4 )
                    , ncol = 8
                    , nrow = 4
                    )
aggregate.nx.ny.expand.grid <- function( dta, nx = 2, ny = 2, FUN = mean, ... )
{
   ilon <- seq( 1, ncol( dta ), nx )
   ilat <- seq( 1, nrow( dta ), ny )
   cells <- as.matrix( expand.grid( ilat, ilon ) )
   blocks <- apply( cells
                  , 1
                  , function( x ) dta[ x[ 1 ]:( x[ 1 ] + 1 ), x[ 2 ]:( x[ 2 ] + 1 ) ] )
   block.means <- colMeans( blocks )
   matrix( block.means
         , nrow( dta ) / ny
         , ncol( dta ) / nx
         )
}

aggregate.nx.ny.array.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) {
   a <- array( dta
             , dim = c( ny
                      , nrow( dta ) %/% ny
                      , nx
                      , ncol( dta ) %/% nx
                      )
             )
   apply( a, c( 2, 4 ), FUN, ... )
}

aggregate.nx.ny.array.aperm.mean <- function( dta, nx = 2, ny = 2, ... ) {
   # number of rows in result
   nnr <- nrow( dta ) %/% ny
   # number of columns in result
   nnc <- ncol( dta ) %/% nx
   # number of values to take mean of
   nxny <- nx * ny
   # describe existing layout of values in dta as 4-d array
   a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
   # swap data in dimensions 2 and 3
   a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
   # treat first two dimensions as column vectors, remaining as columns
   a3 <- matrix( a2, nrow = nxny )
   # fast calculation of column means
   v <- colMeans( a3, ... )
   # reframe result vector as a matrix
   matrix( v, ncol = nnc )
}

aggregate.nx.ny.array.aperm.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) {
   # number of rows in result
   nnr <- nrow( dta ) %/% ny
   # number of columns in result
   nnc <- ncol( dta ) %/% nx
   # number of values to apply FUN to
   nxny <- nx * ny
   # describe existing layout of values in dta as 4-d array
   a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
   # swap data in dimensions 2 and 3
   a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
   # treat first two dimensions as column vectors, remaining as columns
   a3 <- matrix( a2, nrow = nxny )
   # apply FUN to column vectors
   v <- apply( a3, 2, FUN = FUN, ... )
   matrix( v, ncol = nnc )
}
test1 <- aggregate.nx.ny.expand.grid( tst )
test2 <- aggregate.nx.ny.array.apply( tst )
test3 <- aggregate.nx.ny.array.aperm.mean( tst )
test4 <- aggregate.nx.ny.array.aperm.apply( tst )
library(microbenchmark)
microbenchmark(
   aggregate.nx.ny.expand.grid( tst, 2, 2, mean, na.rm = TRUE )
, aggregate.nx.ny.array.apply( tst, 2, 2, mean, na.rm = TRUE )
, aggregate.nx.ny.array.aperm.mean( tst, 2, 2, na.rm = TRUE )
, aggregate.nx.ny.array.aperm.apply( tst, 2, 2, mean, na.rm = TRUE )
)
#Unit: milliseconds
#                                                             expr        min
#       aggregate.nx.ny.expand.grid(tst, 2, 2, mean, na.rm = TRUE) 628.528322
#       aggregate.nx.ny.array.apply(tst, 2, 2, mean, na.rm = TRUE) 846.883314
#        aggregate.nx.ny.array.aperm.mean(tst, 2, 2, na.rm = TRUE)   8.904369
# aggregate.nx.ny.array.aperm.apply(tst, 2, 2, mean, na.rm = TRUE) 619.691851
#         lq       mean     median        uq      max neval cld
# 675.470967  916.39630  778.54090  873.9754 2452.695   100  b
# 920.831966 1126.94691 1000.33830 1094.9233 3412.639   100   c
#   9.191747   21.98528   10.30099   15.9169  158.687   100 a
# 733.246331  936.73359  757.58383  844.2016 2824.557   100  b

On Sat, 30 Jul 2016, Jeff Newmiller wrote:

> For the record, the array.apply code can be fixed as below, but then it is slower than the expand.grid version.
>
> aggregate.nx.ny.array.apply <- function(dta,nx=2,ny=2, FUN=mean,...)
> {
>   a <- array(dta, dim = c(ny, nrow( dta ) %/% ny, nx, ncol( dta ) %/% nx))
>  apply( a, c(2, 4), FUN, ... )
> }
>
> -- 
> Sent from my phone. Please excuse my brevity.
>
> On July 30, 2016 11:06:16 AM PDT, "Anthoni, Peter (IMK)" <peter.anthoni at kit.edu> wrote:
>> Hi all,
>>
>> thanks for the suggestions, I did some timing tests, see below.
>> Unfortunately the aggregate.nx.ny.array.apply, does not produce the
>> expected result.
>> So the fastest seems to be the aggregate.nx.ny.expand.grid, though the
>> double for loop is not that much slower.
>>
>> many thanks
>> Peter
>>
>>> tst=matrix(1:(1440*360),ncol=1440,nrow=360)
>>> system.time( {for(i in 1:10)
>> tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)})
>>   user  system elapsed
>> 11.227   0.073  11.371
>>> system.time( {for(i in 1:10)
>> tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)})
>>   user  system elapsed
>> 26.354   0.475  26.880
>>> system.time( {for(i in 1:10)
>> tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)})
>>   user  system elapsed
>>  9.683   0.055   9.763
>>> system.time( {for(i in 1:10)
>> tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)})
>>   user  system elapsed
>>  7.693   0.055   7.800
>>
>>> tst.small=matrix(1:(8*4),ncol=8,nrow=4)
>>> aggregate.nx.ny.forloop = function(data,nx=2,ny=2, FUN=mean,...)
>> + {
>> +   nlon=nrow(data)
>> +   nlat=ncol(data)
>> +   newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
>> +   dim(newdata)
>> +   for(ilon in seq(1,nlon,nx)) {
>> +     for(ilat in seq(1,nlat,ny)) {
>> +       ilon_new=1+(ilon-1)/nx
>> +       ilat_new=1+(ilat-1)/ny
>> +       newdata[ilon_new,ilat_new] = FUN(data[ilon+0:1,ilat+0:1],...)
>> +     }
>> +   }
>> +   newdata
>> + }
>>> aggregate.nx.ny.forloop(tst.small)
>>     [,1] [,2] [,3] [,4]
>> [1,]  3.5 11.5 19.5 27.5
>> [2,]  5.5 13.5 21.5 29.5
>>>
>>> aggregate.nx.ny.interaction = function(data,nx=2,ny=2, FUN=mean,...)
>> + {
>> +
>> +   nlon=nrow(data)
>> +   nlat=ncol(data)
>> +   newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
>> +   newdata[] <- tapply( data, interaction( (row(data)+1) %/% 2,
>> (col(data)+1) %/% 2 ), FUN, ...)
>> +   newdata
>> + }
>>> aggregate.nx.ny.interaction(tst.small)
>>     [,1] [,2] [,3] [,4]
>> [1,]  3.5 11.5 19.5 27.5
>> [2,]  5.5 13.5 21.5 29.5
>>>
>>> aggregate.nx.ny.expand.grid = function(data,nx=2,ny=2, FUN=mean,...)
>> + {
>> +   ilon <- seq(1,ncol(data),nx)
>> +   ilat <- seq(1,nrow(data),ny)
>> +   cells <- as.matrix(expand.grid(ilat, ilon))
>> +   blocks <- apply(cells, 1, function(x)
>> data[x[1]:(x[1]+1),x[2]:(x[2]+1)])
>> +   block.means <- colMeans(blocks)
>> +   matrix(block.means, nrow(data)/ny, ncol(data)/nx)
>> + }
>>> aggregate.nx.ny.expand.grid(tst.small)
>>     [,1] [,2] [,3] [,4]
>> [1,]  3.5 11.5 19.5 27.5
>> [2,]  5.5 13.5 21.5 29.5
>>>
>>> aggregate.nx.ny.array.apply = function(data,nx=2,ny=2, FUN=mean,...)
>> {
>> +   a <- array(data, dim = c(ny, nrow( data ) %/% ny, ncol( data ) %/%
>> nx))
>> +   apply( a, c(2, 3), FUN, ... )
>> + }
>>> aggregate.nx.ny.array.apply(tst.small)
>>     [,1] [,2] [,3] [,4]
>> [1,]  1.5  5.5  9.5 13.5
>> [2,]  3.5  7.5 11.5 15.5
>>
>>
>>
>>> On 28 Jul 2016, at 00:26, David Winsemius <dwinsemius at comcast.net>
>> wrote:
>>>
>>>
>>>> On Jul 27, 2016, at 12:02 PM, Jeff Newmiller
>> <jdnewmil at dcn.davis.ca.us> wrote:
>>>>
>>>> An alternative (more compact, not necessarily faster, because apply
>> is still a for loop inside):
>>>>
>>>> f <- function( m, nx, ny ) {
>>>> # redefine the dimensions of my
>>>> a <- array( m
>>>>            , dim = c( ny
>>>>                   , nrow( m ) %/% ny
>>>>                   , ncol( m ) %/% nx )
>>>>           )
>>>> # apply mean over dim 1
>>>> apply( a, c( 2, 3 ), FUN=mean )
>>>> }
>>>> f( tst, nx, ny )
>>>
>>> Here's an apparently loopless strategy, although I suspect the code
>> for interaction (and maybe tapply as well?) uses a loop.
>>>
>>>
>>> tst_2X2 <- matrix(NA, ,ncol=4,nrow=2)
>>>
>>> tst_2x2[] <- tapply( tst, interaction( (row(tst)+1) %/% 2,
>> (col(tst)+1) %/% 2 ), mean)
>>>
>>> tst_2x2
>>>
>>>     [,1] [,2] [,3] [,4]
>>> [1,]  3.5 11.5 19.5 27.5
>>> [2,]  5.5 13.5 21.5 29.5
>>>
>>> --
>>> David.
>>>
>>>
>>>>
>>>> --
>>>> Sent from my phone. Please excuse my brevity.
>>>>
>>>> On July 27, 2016 9:08:32 AM PDT, David L Carlson <dcarlson at tamu.edu>
>> wrote:
>>>>> This should be faster. It uses apply() across the blocks.
>>>>>
>>>>>> ilon <- seq(1,8,nx)
>>>>>> ilat <- seq(1,4,ny)
>>>>>> cells <- as.matrix(expand.grid(ilat, ilon))
>>>>>> blocks <- apply(cells, 1, function(x) tst[x[1]:(x[1]+1),
>>>>> x[2]:(x[2]+1)])
>>>>>> block.means <- colMeans(blocks)
>>>>>> tst_2x2 <- matrix(block.means, 2, 4)
>>>>>> tst_2x2
>>>>>   [,1] [,2] [,3] [,4]
>>>>> [1,]  3.5 11.5 19.5 27.5
>>>>> [2,]  5.5 13.5 21.5 29.5
>>>>>
>>>>> -------------------------------------
>>>>> David L Carlson
>>>>> Department of Anthropology
>>>>> Texas A&M University
>>>>> College Station, TX 77840-4352
>>>>>
>>>>>
>>>>>
>>>>> -----Original Message-----
>>>>> From: R-help [mailto:r-help-bounces at r-poject.org] On Behalf Of
>> Anthoni,
>>>>> Peter (IMK)
>>>>> Sent: Wednesday, July 27, 2016 6:14 AM
>>>>> To: r-help at r-project.org
>>>>> Subject: [R] Aggregate matrix in a 2 by 2 manor
>>>>>
>>>>> Hi all,
>>>>>
>>>>> I need to aggregate some matrix data (1440x720) to a lower
>> dimension
>>>>> (720x360) for lots of years and variables
>>>>>
>>>>> I can do double for loop, but that will be slow. Anybody know a
>> quicker
>>>>> way?
>>>>>
>>>>> here an example with a smaller matrix size:
>>>>>
>>>>> tst=matrix(1:(8*4),ncol=8,nrow=4)
>>>>> tst_2x2=matrix(NA,ncol=4,nrow=2)
>>>>> nx=2
>>>>> ny=2
>>>>> for(ilon in seq(1,8,nx)) {
>>>>> for (ilat in seq(1,4,ny)) {
>>>>>  ilon_2x2=1+(ilon-1)/nx
>>>>>  ilat_2x2=1+(ilat-1)/ny
>>>>>  tst_2x2[ilat_2x2,ilon_2x2] = mean(tst[ilat+0:1,ilon+0:1])
>>>>> }
>>>>> }
>>>>>
>>>>> tst
>>>>> tst_2x2
>>>>>
>>>>>> tst
>>>>>   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
>>>>> [1,]    1    5    9   13   17   21   25   29
>>>>> [2,]    2    6   10   14   18   22   26   30
>>>>> [3,]    3    7   11   15   19   23   27   31
>>>>> [4,]    4    8   12   16   20   24   28   32
>>>>>
>>>>>> tst_2x2
>>>>>   [,1] [,2] [,3] [,4]
>>>>> [1,]  3.5 11.5 19.5 27.5
>>>>> [2,]  5.5 13.5 21.5 29.5
>>>>>
>>>>>
>>>>> I though a cast to 3d-array might do the trick and apply over the
>> new
>>>>> dimension, but that does not work, since it casts the data along
>> the
>>>>> row.
>>>>>> matrix(apply(array(tst,dim=c(nx,ny,8)),3,mean),nrow=nrow(tst)/ny)
>>>>>   [,1] [,2] [,3] [,4]
>>>>> [1,]  2.5 10.5 18.5 26.5
>>>>> [2,]  6.5 14.5 22.5 30.5
>>>>>
>>>>>
>>>>> cheers
>>>>> Peter
>>>>>
>>>>> ______________________________________________
>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>> PLEASE do read the posting guide
>>>>> http://www.R-project.org/posting-guide.html
>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>>
>>>>> ______________________________________________
>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>> PLEASE do read the posting guide
>>>>> http://www.R-project.org/posting-guide.html
>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>
>>>> ______________________________________________
>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>> PLEASE do read the posting guide
>> http://www.R-project.org/posting-guide.html
>>>> and provide commented, minimal, self-contained, reproducible code.
>>>
>>> David Winsemius
>>> Alameda, CA, USA
>>>
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>

---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                       Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k