[R] Aggregate matrix in a 2 by 2 manor

Anthoni, Peter (IMK) peter.anthoni at kit.edu
Sun Jul 31 09:31:40 CEST 2016


Hi Jeff,

many thanks, that one is the Speedy Gonzalles out of all. Can also do some FUN stuff.

aggregate.nx.ny.array.aperm <- function( dta, nx = 2, ny = 2, FUN=colMeans, ... ) {
 # number of rows in result
 nnr <- nrow( dta ) %/% ny
 # number of columns in result
 nnc <- ncol( dta ) %/% nx
 # number of values to take mean of
 nxny <- nx * ny
 # describe existing layout of values in dta as 4-d array
 a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
 # swap data in dimensions 2 and 3
 a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
 # treat first two dimensions as column vectors, remaining as columns
 a3 <- matrix( a2, nrow = nxny )
 # fast calculation of column means
 v <- FUN( a3, ... )
 # reframe result vector as a matrix
 matrix( v, ncol = nnc )
}


> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)})
   user  system elapsed 
 14.003   0.271  14.663 
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)})
   user  system elapsed 
 32.686   1.175  35.012 
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)})
   user  system elapsed 
  9.590   0.197   9.951 
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)})
   user  system elapsed 
  8.391   0.174   8.737 
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.array.aperm(tst,2,2,colMeans,na.rm=T)})
   user  system elapsed 
  0.195   0.019   0.216 
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.array.aperm(tst,2,2,colSums,na.rm=T)})
   user  system elapsed 
  0.169   0.017   0.188 


> aggregate.nx.ny.array.aperm(tst.small,FUN=colMeans)
     [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5
> aggregate.nx.ny.array.aperm(tst.small,FUN=colSums)
     [,1] [,2] [,3] [,4]
[1,]   14   46   78  110
[2,]   22   54   86  118
> aggregate.nx.ny.forloop(tst.small,FUN=sum)
     [,1] [,2] [,3] [,4]
[1,]   14   46   78  110
[2,]   22   54   86  118

cheers
Peter



> On 31 Jul 2016, at 08:13, Jeff Newmiller <jdnewmil at dcn.davis.ca.us> wrote:
> 
> If you don't need all that FUN flexibility, you can get this done way faster with the aperm and colMeans functions:
> 
> tst <- matrix( seq.int( 1440 * 360 )
>             , ncol = 1440
>             , nrow = 360
>             )
> tst.small <- matrix( seq.int( 8 * 4 )
>                   , ncol = 8
>                   , nrow = 4
>                   )
> aggregate.nx.ny.expand.grid <- function( dta, nx = 2, ny = 2, FUN = mean, ... )
> {
>  ilon <- seq( 1, ncol( dta ), nx )
>  ilat <- seq( 1, nrow( dta ), ny )
>  cells <- as.matrix( expand.grid( ilat, ilon ) )
>  blocks <- apply( cells
>                 , 1
>                 , function( x ) dta[ x[ 1 ]:( x[ 1 ] + 1 ), x[ 2 ]:( x[ 2 ] + 1 ) ] )
>  block.means <- colMeans( blocks )
>  matrix( block.means
>        , nrow( dta ) / ny
>        , ncol( dta ) / nx
>        )
> }
> 
> aggregate.nx.ny.array.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) {
>  a <- array( dta
>            , dim = c( ny
>                     , nrow( dta ) %/% ny
>                     , nx
>                     , ncol( dta ) %/% nx
>                     )
>            )
>  apply( a, c( 2, 4 ), FUN, ... )
> }
> 
> aggregate.nx.ny.array.aperm.mean <- function( dta, nx = 2, ny = 2, ... ) {
>  # number of rows in result
>  nnr <- nrow( dta ) %/% ny
>  # number of columns in result
>  nnc <- ncol( dta ) %/% nx
>  # number of values to take mean of
>  nxny <- nx * ny
>  # describe existing layout of values in dta as 4-d array
>  a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
>  # swap data in dimensions 2 and 3
>  a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
>  # treat first two dimensions as column vectors, remaining as columns
>  a3 <- matrix( a2, nrow = nxny )
>  # fast calculation of column means
>  v <- colMeans( a3, ... )
>  # reframe result vector as a matrix
>  matrix( v, ncol = nnc )
> }
> 
> aggregate.nx.ny.array.aperm.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) {
>  # number of rows in result
>  nnr <- nrow( dta ) %/% ny
>  # number of columns in result
>  nnc <- ncol( dta ) %/% nx
>  # number of values to apply FUN to
>  nxny <- nx * ny
>  # describe existing layout of values in dta as 4-d array
>  a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
>  # swap data in dimensions 2 and 3
>  a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
>  # treat first two dimensions as column vectors, remaining as columns
>  a3 <- matrix( a2, nrow = nxny )
>  # apply FUN to column vectors
>  v <- apply( a3, 2, FUN = FUN, ... )
>  matrix( v, ncol = nnc )
> }
> test1 <- aggregate.nx.ny.expand.grid( tst )
> test2 <- aggregate.nx.ny.array.apply( tst )
> test3 <- aggregate.nx.ny.array.aperm.mean( tst )
> test4 <- aggregate.nx.ny.array.aperm.apply( tst )
> library(microbenchmark)
> microbenchmark(
>  aggregate.nx.ny.expand.grid( tst, 2, 2, mean, na.rm = TRUE )
> , aggregate.nx.ny.array.apply( tst, 2, 2, mean, na.rm = TRUE )
> , aggregate.nx.ny.array.aperm.mean( tst, 2, 2, na.rm = TRUE )
> , aggregate.nx.ny.array.aperm.apply( tst, 2, 2, mean, na.rm = TRUE )
> )
> #Unit: milliseconds
> #                                                             expr        min
> #       aggregate.nx.ny.expand.grid(tst, 2, 2, mean, na.rm = TRUE) 628.528322
> #       aggregate.nx.ny.array.apply(tst, 2, 2, mean, na.rm = TRUE) 846.883314
> #        aggregate.nx.ny.array.aperm.mean(tst, 2, 2, na.rm = TRUE)   8.904369
> # aggregate.nx.ny.array.aperm.apply(tst, 2, 2, mean, na.rm = TRUE) 619.691851
> #         lq       mean     median        uq      max neval cld
> # 675.470967  916.39630  778.54090  873.9754 2452.695   100  b
> # 920.831966 1126.94691 1000.33830 1094.9233 3412.639   100   c
> #   9.191747   21.98528   10.30099   15.9169  158.687   100 a
> # 733.246331  936.73359  757.58383  844.2016 2824.557   100  b
> 
> 
> On Sat, 30 Jul 2016, Jeff Newmiller wrote:
> 
>> For the record, the array.apply code can be fixed as below, but then it is slower than the expand.grid version.
>> 
>> aggregate.nx.ny.array.apply <- function(dta,nx=2,ny=2, FUN=mean,...)
>> {
>>  a <- array(dta, dim = c(ny, nrow( dta ) %/% ny, nx, ncol( dta ) %/% nx))
>> apply( a, c(2, 4), FUN, ... )
>> }
>> 
>> -- 
>> Sent from my phone. Please excuse my brevity.
>> 
>> On July 30, 2016 11:06:16 AM PDT, "Anthoni, Peter (IMK)" <peter.anthoni at kit.edu> wrote:
>>> Hi all,
>>> 
>>> thanks for the suggestions, I did some timing tests, see below.
>>> Unfortunately the aggregate.nx.ny.array.apply, does not produce the
>>> expected result.
>>> So the fastest seems to be the aggregate.nx.ny.expand.grid, though the
>>> double for loop is not that much slower.
>>> 
>>> many thanks
>>> Peter
>>> 
>>>> tst=matrix(1:(1440*360),ncol=1440,nrow=360)
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)})
>>>  user  system elapsed
>>> 11.227   0.073  11.371
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)})
>>>  user  system elapsed
>>> 26.354   0.475  26.880
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)})
>>>  user  system elapsed
>>> 9.683   0.055   9.763
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)})
>>>  user  system elapsed
>>> 7.693   0.055   7.800
>>> 
>>>> tst.small=matrix(1:(8*4),ncol=8,nrow=4)
>>>> aggregate.nx.ny.forloop = function(data,nx=2,ny=2, FUN=mean,...)
>>> + {
>>> +   nlon=nrow(data)
>>> +   nlat=ncol(data)
>>> +   newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
>>> +   dim(newdata)
>>> +   for(ilon in seq(1,nlon,nx)) {
>>> +     for(ilat in seq(1,nlat,ny)) {
>>> +       ilon_new=1+(ilon-1)/nx
>>> +       ilat_new=1+(ilat-1)/ny
>>> +       newdata[ilon_new,ilat_new] = FUN(data[ilon+0:1,ilat+0:1],...)
>>> +     }
>>> +   }
>>> +   newdata
>>> + }
>>>> aggregate.nx.ny.forloop(tst.small)
>>>    [,1] [,2] [,3] [,4]
>>> [1,]  3.5 11.5 19.5 27.5
>>> [2,]  5.5 13.5 21.5 29.5
>>>> 
>>>> aggregate.nx.ny.interaction = function(data,nx=2,ny=2, FUN=mean,...)
>>> + {
>>> +
>>> +   nlon=nrow(data)
>>> +   nlat=ncol(data)
>>> +   newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
>>> +   newdata[] <- tapply( data, interaction( (row(data)+1) %/% 2,
>>> (col(data)+1) %/% 2 ), FUN, ...)
>>> +   newdata
>>> + }
>>>> aggregate.nx.ny.interaction(tst.small)
>>>    [,1] [,2] [,3] [,4]
>>> [1,]  3.5 11.5 19.5 27.5
>>> [2,]  5.5 13.5 21.5 29.5
>>>> 
>>>> aggregate.nx.ny.expand.grid = function(data,nx=2,ny=2, FUN=mean,...)
>>> + {
>>> +   ilon <- seq(1,ncol(data),nx)
>>> +   ilat <- seq(1,nrow(data),ny)
>>> +   cells <- as.matrix(expand.grid(ilat, ilon))
>>> +   blocks <- apply(cells, 1, function(x)
>>> data[x[1]:(x[1]+1),x[2]:(x[2]+1)])
>>> +   block.means <- colMeans(blocks)
>>> +   matrix(block.means, nrow(data)/ny, ncol(data)/nx)
>>> + }
>>>> aggregate.nx.ny.expand.grid(tst.small)
>>>    [,1] [,2] [,3] [,4]
>>> [1,]  3.5 11.5 19.5 27.5
>>> [2,]  5.5 13.5 21.5 29.5
>>>> 
>>>> aggregate.nx.ny.array.apply = function(data,nx=2,ny=2, FUN=mean,...)
>>> {
>>> +   a <- array(data, dim = c(ny, nrow( data ) %/% ny, ncol( data ) %/%
>>> nx))
>>> +   apply( a, c(2, 3), FUN, ... )
>>> + }
>>>> aggregate.nx.ny.array.apply(tst.small)
>>>    [,1] [,2] [,3] [,4]
>>> [1,]  1.5  5.5  9.5 13.5
>>> [2,]  3.5  7.5 11.5 15.5
>>> 
>>> 
>>> 
>>>> On 28 Jul 2016, at 00:26, David Winsemius <dwinsemius at comcast.net>
>>> wrote:
>>>> 
>>>> 
>>>>> On Jul 27, 2016, at 12:02 PM, Jeff Newmiller
>>> <jdnewmil at dcn.davis.ca.us> wrote:
>>>>> 
>>>>> An alternative (more compact, not necessarily faster, because apply
>>> is still a for loop inside):
>>>>> 
>>>>> f <- function( m, nx, ny ) {
>>>>> # redefine the dimensions of my
>>>>> a <- array( m
>>>>>           , dim = c( ny
>>>>>                  , nrow( m ) %/% ny
>>>>>                  , ncol( m ) %/% nx )
>>>>>          )
>>>>> # apply mean over dim 1
>>>>> apply( a, c( 2, 3 ), FUN=mean )
>>>>> }
>>>>> f( tst, nx, ny )
>>>> 
>>>> Here's an apparently loopless strategy, although I suspect the code
>>> for interaction (and maybe tapply as well?) uses a loop.
>>>> 
>>>> 
>>>> tst_2X2 <- matrix(NA, ,ncol=4,nrow=2)
>>>> 
>>>> tst_2x2[] <- tapply( tst, interaction( (row(tst)+1) %/% 2,
>>> (col(tst)+1) %/% 2 ), mean)
>>>> 
>>>> tst_2x2
>>>> 
>>>>    [,1] [,2] [,3] [,4]
>>>> [1,]  3.5 11.5 19.5 27.5
>>>> [2,]  5.5 13.5 21.5 29.5
>>>> 
>>>> --
>>>> David.
>>>> 
>>>> 
>>>>> 
>>>>> --
>>>>> Sent from my phone. Please excuse my brevity.
>>>>> 
>>>>> On July 27, 2016 9:08:32 AM PDT, David L Carlson <dcarlson at tamu.edu>
>>> wrote:
>>>>>> This should be faster. It uses apply() across the blocks.
>>>>>> 
>>>>>>> ilon <- seq(1,8,nx)
>>>>>>> ilat <- seq(1,4,ny)
>>>>>>> cells <- as.matrix(expand.grid(ilat, ilon))
>>>>>>> blocks <- apply(cells, 1, function(x) tst[x[1]:(x[1]+1),
>>>>>> x[2]:(x[2]+1)])
>>>>>>> block.means <- colMeans(blocks)
>>>>>>> tst_2x2 <- matrix(block.means, 2, 4)
>>>>>>> tst_2x2
>>>>>>  [,1] [,2] [,3] [,4]
>>>>>> [1,]  3.5 11.5 19.5 27.5
>>>>>> [2,]  5.5 13.5 21.5 29.5
>>>>>> 
>>>>>> -------------------------------------
>>>>>> David L Carlson
>>>>>> Department of Anthropology
>>>>>> Texas A&M University
>>>>>> College Station, TX 77840-4352
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: R-help [mailto:r-help-bounces at r-poject.org] On Behalf Of
>>> Anthoni,
>>>>>> Peter (IMK)
>>>>>> Sent: Wednesday, July 27, 2016 6:14 AM
>>>>>> To: r-help at r-project.org
>>>>>> Subject: [R] Aggregate matrix in a 2 by 2 manor
>>>>>> 
>>>>>> Hi all,
>>>>>> 
>>>>>> I need to aggregate some matrix data (1440x720) to a lower
>>> dimension
>>>>>> (720x360) for lots of years and variables
>>>>>> 
>>>>>> I can do double for loop, but that will be slow. Anybody know a
>>> quicker
>>>>>> way?
>>>>>> 
>>>>>> here an example with a smaller matrix size:
>>>>>> 
>>>>>> tst=matrix(1:(8*4),ncol=8,nrow=4)
>>>>>> tst_2x2=matrix(NA,ncol=4,nrow=2)
>>>>>> nx=2
>>>>>> ny=2
>>>>>> for(ilon in seq(1,8,nx)) {
>>>>>> for (ilat in seq(1,4,ny)) {
>>>>>> ilon_2x2=1+(ilon-1)/nx
>>>>>> ilat_2x2=1+(ilat-1)/ny
>>>>>> tst_2x2[ilat_2x2,ilon_2x2] = mean(tst[ilat+0:1,ilon+0:1])
>>>>>> }
>>>>>> }
>>>>>> 
>>>>>> tst
>>>>>> tst_2x2
>>>>>> 
>>>>>>> tst
>>>>>>  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
>>>>>> [1,]    1    5    9   13   17   21   25   29
>>>>>> [2,]    2    6   10   14   18   22   26   30
>>>>>> [3,]    3    7   11   15   19   23   27   31
>>>>>> [4,]    4    8   12   16   20   24   28   32
>>>>>> 
>>>>>>> tst_2x2
>>>>>>  [,1] [,2] [,3] [,4]
>>>>>> [1,]  3.5 11.5 19.5 27.5
>>>>>> [2,]  5.5 13.5 21.5 29.5
>>>>>> 
>>>>>> 
>>>>>> I though a cast to 3d-array might do the trick and apply over the
>>> new
>>>>>> dimension, but that does not work, since it casts the data along
>>> the
>>>>>> row.
>>>>>>> matrix(apply(array(tst,dim=c(nx,ny,8)),3,mean),nrow=nrow(tst)/ny)
>>>>>>  [,1] [,2] [,3] [,4]
>>>>>> [1,]  2.5 10.5 18.5 26.5
>>>>>> [2,]  6.5 14.5 22.5 30.5
>>>>>> 
>>>>>> 
>>>>>> cheers
>>>>>> Peter
>>>>>> 
>>>>>> ______________________________________________
>>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>>> PLEASE do read the posting guide
>>>>>> http://www.R-project.org/posting-guide.html
>>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>>> 
>>>>>> ______________________________________________
>>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>>> PLEASE do read the posting guide
>>>>>> http://www.R-project.org/posting-guide.html
>>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>> 
>>>>> ______________________________________________
>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>> PLEASE do read the posting guide
>>> http://www.R-project.org/posting-guide.html
>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>> 
>>>> David Winsemius
>>>> Alameda, CA, USA
>>>> 
>> 
>> ______________________________________________
>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>> 
> 
> ---------------------------------------------------------------------------
> Jeff Newmiller                        The     .....       .....  Go Live...
> DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
>                                      Live:   OO#.. Dead: OO#..  Playing
> Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
> /Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k
> ---------------------------------------------------------------------------



More information about the R-help mailing list