# [R] Help with Mahalanobis

Gabor Grothendieck ggrothendieck at gmail.com
Mon Jul 11 01:26:34 CEST 2005

```This one adds the labels:

D2Mah4 = function(y, x) {

stopifnot(is.data.frame(y), !missing(x))
stopifnot(dim(y)[1] != dim(x)[1])
y    = as.matrix(y)
x    = as.factor(x)
man  = manova(y ~ x)
E    = summary(man)\$SS[2] #Matrix E
S    = as.matrix(E\$Residuals)/man\$df.residual
InvS = solve(S)
mds  = matrix(unlist(by(y, x, mean)), byrow=T, ncol=ncol(y))

f <- function(a,b) mapply(function(a,b)
mahalanobis(mds[a,],mds[b,],InvS,TRUE), a, b)
seq. <- seq(length = nrow(mds))
names(seq.) <- levels(x)
D2 <- outer(seq., seq., f)
}

#
# test
#
D2M4 = D2Mah4(iris[,1:4], iris[,5])
print(D2M4)

On 7/10/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
> Indeed, it is very nice Gabor (as always)!
>
> So, a doubt: how to preserve the 'rowname' and 'colname' of D2, like in the
> first function? I think it is useful to posterior analyzes (as cluster, for
> example).
>
> Regards,
>
> # A small correction (reference to gtools was eliminated)
> D2Mah2 = function(y, x) {
>   stopifnot(is.data.frame(y), !missing(x))
>   stopifnot(dim(y)[1] != dim(x)[1])
>   y    = as.matrix(y)
>   x    = as.factor(x)
>   man  = manova(y ~ x)
>   E    = summary(man)\$SS[2] #Matrix E
>   S    = as.matrix(E\$Residuals)/man\$df.residual
>   InvS = solve(S)
>   mds  = matrix(unlist(by(y, x, mean)), byrow=T, ncol=ncol(y))
>   nObjects = nrow(mds)
>   f = function(a,b) mapply(function(a,b)
>     (mds[a,] - mds[b,])%*%InvS%*%(mds[a,] - mds[b,]), a, b)
>   D2 = outer(seq(nObjects), seq(nObjects), f)
> }
>
> #
> # test
> #
> D2M2 = D2Mah2(iris[,1:4], iris[,5])
> print(D2M2)
>
> Gabor Grothendieck wrote:
> > I think you could simplify this by replacing everything after the
> > nObjects = nrow(mds) line with just these two statements.
> >
> >   f <- function(a,b) mapply(function(a,b)
> >     (mds[a,] - mds[b,])%*%InvS%*%(mds[a,] - mds[b,]), a,b)
> >
> >   D2 <- outer(seq(nObjects), seq(nObjects), f)
> >
> > This also eliminates dependence on gtools and the complexity
> > of dealing with triangular matrices.
> >
> > Regards.
> >
> > Here it is in full:
> >
> > D2Mah2 = function(y, x) {
> >
> >   stopifnot(is.data.frame(y), !missing(x))
> >   stopifnot(dim(y)[1] != dim(x)[1])
> >   y    = as.matrix(y)
> >   x    = as.factor(x)
> >   man  = manova(y ~ x)
> >   E    = summary(man)\$SS[2] #Matrix E
> >   S    = as.matrix(E\$Residuals)/man\$df.residual
> >   InvS = solve(S)
> >   mds  = matrix(unlist(by(y, x, mean)), byrow=T, ncol=ncol(y))
> >
> >   library(gtools)
> >   nObjects = nrow(mds)
> >
> >   ### changed part is next two statements
> >   f <- function(a,b) mapply(function(a,b)
> >     (mds[a,] - mds[b,])%*%InvS%*%(mds[a,] - mds[b,]), a,b)
> >
> >   D2 <- outer(seq(nObjects), seq(nObjects), f)
> > }
> >
> > #
> > # test
> > #
> > D2M2 = D2Mah2(iris[,1:4], iris[,5])
> > print(D2M2)
> >
> >
> >
> >
> > On 7/10/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
> >
> >>Well, as I did not get a satisfactory reply to the original question I tried to
> >>make a basic function that, I find, solve the question.
> >>
> >>I think it is not the better function, but it is working.
> >>
> >>So, perhaps it can be useful to other people.
> >>
> >>#
> >># Calculate the matrix of Mahalanobis Distances between groups
> >># from data.frames
> >>#
> >># by: José Cláudio Faria
> >># date: 10/7/05 13:23:48
> >>#
> >>
> >>D2Mah = function(y, x) {
> >>
> >>  stopifnot(is.data.frame(y), !missing(x))
> >>  stopifnot(dim(y)[1] != dim(x)[1])
> >>  y    = as.matrix(y)
> >>  x    = as.factor(x)
> >>  man  = manova(y ~ x)
> >>  E    = summary(man)\$SS[2] #Matrix E
> >>  S    = as.matrix(E\$Residuals)/man\$df.residual
> >>  InvS = solve(S)
> >>  mds  = matrix(unlist(by(y, x, mean)), byrow=T, ncol=ncol(y))
> >>
> >>  colnames(mds) = names(y)
> >>  Objects       = levels(x)
> >>  rownames(mds) = Objects
> >>
> >>  library(gtools)
> >>  nObjects = nrow(mds)
> >>  comb     = combinations(nObjects, 2)
> >>
> >>  tmpD2 = numeric()
> >>  for (i in 1:dim(comb)[1]){
> >>    a = comb[i,1]
> >>    b = comb[i,2]
> >>    tmpD2[i] = (mds[a,] - mds[b,])%*%InvS%*%(mds[a,] - mds[b,])
> >>  }
> >>
> >>  # Thanks Gabor for the below
> >>  tmpMah = matrix(0, nObjects, nObjects, dimnames=list(Objects, Objects))
> >>  tmpMah[lower.tri(tmpMah)] = tmpD2
> >>  D2 = tmpMah + t(tmpMah)
> >>  return(D2)
> >>}
> >>
> >>#
> >># To try
> >>#
> >>D2M = D2Mah(iris[,1:4], iris[,5])
> >>print(D2M)
> >>
> >>Thanks all for the complementary aid (specially to Gabor).
> >>
> >>Regards,
> >>--
> >>Jose Claudio Faria
> >>Brasil/Bahia/UESC/DCET
> >>mails:
> >> joseclaudio.faria at terra.com.br
> >> jc_faria at uesc.br
> >> jc_faria at uol.com.br
> >>tel: 73-3634.2779
> >>
> >>______________________________________________
> >>R-help at stat.math.ethz.ch mailing list
> >>https://stat.ethz.ch/mailman/listinfo/r-help
> >>
> >
> >
> > Esta mensagem foi verificada pelo E-mail Protegido Terra.
> > Scan engine: McAfee VirusScan / Atualizado em 08/07/2005 / Versão: 4.4.00 - Dat 4531
> > Proteja o seu e-mail Terra: http://mail.terra.com.br/
> >
> >
> >
>
>
> --
> Jose Claudio Faria
> Brasil/Bahia/UESC/DCET