[R] Plots with k-means

eduardo san miguel eduardosanmi at gmail.com
Mon Nov 2 13:53:16 CET 2009


Hello all,

I have almost finished the development of a new package where ideas
from Tamara Munzner, George Furnas and Costa and Venturini are
implemented.

1.- Da Costa, David & Venturini, Gilles (2006). An Interactive
Visualization Environment for Data Exploration Using Points of
Interest. adma 2006: 416-423

2.- Furnas, George (1986). Generalized Fisheye Views. Human Factors in
computing systems, CHI '86 conference proceedings, ACM, New York, pp.
16-23.

3.- Heidi Lam, Ronald A. Rensink, and Tamara Munzner (2006). Effects
of 2D Geometric Transformations on Visual Memory. Proc. Applied
Perception in Graphics and Visualization (APGV 2006), 119-126, 2006.

4.- Keith Lau, Ron Rensink, and Tamara Munzner (2004). Perceptual
Invariance of Nonlinear Focus+Context Transformations. Proc. First
Symposium on Applied Perception in Graphics and Visualization (APGV
04) 2004, pp 65-72.

This is a sample with some basic functionality and a VERY BASIC
example with kmeans plotting.

Comments will be greatly appreciated.

Regards

-- R CODE
require(methods)

 setClass(Class = 'POI',
        representation(matrizSim = 'matrix',cos.query.docs = 'vector',
      wordsInQuery = 'ANY',docs = 'matrix', objeto = 'matrix', objetoC
= 'matrix',
      Pcoords = 'matrix', PcoordsFI = 'matrix', newPcoords = 'matrix',
newcoords = 'numeric' ,
      newcoords_1 = 'numeric',  M = 'numeric', poisTextCol =
'character' , colores = 'vector' ,
      poisCircleCol = 'character' , linesCol = 'character', itemsCol =
'character',
      LABELS =  'logical',  vscale = 'numeric',  hscale = 'numeric',
circleCol = 'character',
      plotCol = 'character',  itemsFamily = 'character',  lenteDefault
= 'numeric',
      zoomDefault = 'numeric' ,  rateDefault = 'numeric' ,
topKDefault = 'numeric'  ,
      pal = 'character',  selected = 'numeric' ,  circRadio =
'numeric' , IncVscale = 'numeric',
      cgnsphrFont = 'numeric', xClick_old = 'numeric',  yClick_old = 'numeric',
      wordsInQueryFull = 'character' ),
      prototype(cos.query.docs = 0, colores = 0, newcoords = 0,
newcoords_1 = 0, M = 3,
               vscale = 0.5 , hscale = 1.5 , circleCol = 'black' ,
itemsCol = 'white',
               poisTextCol =  '#fff5ee',  poisCircleCol = '#fff5ee',
linesCol = 'white',
               plotCol = 'black', itemsFamily = 'sans', lenteDefault =
1, zoomDefault = 15 ,
               rateDefault = 0.1 , topKDefault = 25,  pal = 'topo' ,
selected = 1 ,
               circRadio = 0.25  , IncVscale = 0.05  ,  cgnsphrFont =
1.01, LABELS = T)
 )

 setGeneric("puntosMedios" ,
             function(Pcoords, detalle = 5){standardGeneric ("puntosMedios")})

 setMethod("puntosMedios" ,
            signature = "matrix",
            function(Pcoords, detalle = 5){

  for (i in 1:detalle){
    new_pcoords = matrix(rep(0,4*nrow(Pcoords)), nrow = 2*
nrow(Pcoords), byrow = T )
    cont = 0
    for (i in 1:nrow(Pcoords)){
	   if (i == nrow(Pcoords)) {
  		cont = cont + 1
  		new_pcoords[cont,] = Pcoords[i,]
  		cont = cont + 1
  		new_pcoords[cont,] = Pcoords[i,] - ((Pcoords[i,]-Pcoords[1,])/2)
  	}else{
  		cont = cont + 1
  		new_pcoords[cont,] = Pcoords[i,]
  		cont = cont + 1
  		new_pcoords[cont,] = Pcoords[i,] - ((Pcoords[i,]-Pcoords[i+1,])/2)}}
    Pcoords = new_pcoords}
    return(Pcoords)

   }
 )

 setGeneric("fishIout" ,
             function(x, value){standardGeneric ("fishIout")})

 setMethod("fishIout" ,
            signature = "numeric",
            function(x, value){

  d = value
 	if (x > 0){
		signo = 1
	}else{
		signo = -1
	}
	x = abs(x)
	return(signo*(-(x/((d*x)-d-1))))
   }
 )

 setGeneric("fishIin" ,
             function(x, value){standardGeneric ("fishIin")})

 setMethod("fishIin" ,
            signature = "numeric",
            function(x, value){

  d = value
	if (x > 0){
		signo = 1
	}else{
		signo = -1
	}
	x = abs(x)

	return(signo*(((d+1)*x)/(d*x+1)))
   }
 )

 setGeneric("toPolar" ,
             function(x, y){standardGeneric ("toPolar")})

 setMethod("toPolar" ,
            signature = "numeric",
            function(x, y){

	t1 = atan2(y,x)
	rP = sqrt(x^2+y^2)
	return(c(t1 = t1,rP = rP))

   }
 )

 setGeneric("toCartesian" ,
             function(t1, rP){standardGeneric ("toCartesian")})

 setMethod("toCartesian" ,
            signature = "numeric",
            function(t1, rP){

	x1 = rP*cos(t1)
	y1 = rP*sin(t1)
	return(c(x = x1,y = y1))

   }
 )

 setGeneric("circulo" ,
             function(cx, cy, r, circleCol, PLOT =
TRUE){standardGeneric ("circulo")})

 setMethod("circulo" ,
            signature = "numeric",
            function(cx, cy, r, circleCol, PLOT = TRUE){

	t = seq(0,2*pi,length=100)
	circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
	if (PLOT == TRUE) plot(circle,type='l',,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
		ann=FALSE, axes=F, col = circleCol)
	return(circle)

   }
 )

 setGeneric("circulin" ,
             function(cx, cy, r = 0.045,
                      objeto, col = 'blue', PLOT = TRUE, label = 0){
                      standardGeneric ("circulin")})

 setMethod("circulin" ,
            signature = "ANY",
            function(cx, cy, r = 0.045, objeto, col = 'blue', PLOT =
TRUE, label = 0){

	t = seq(0,2*pi,length=100)
	circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
	points(circle,type='l', col = col)
	if (label != 0) text(cx,cy,label,cex = .7)
	insiders <- apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
  assign('insiders', insiders , envir = POI.env)

   }
 )

 setGeneric("addNoise" ,
             function(m, tamanyo = 0.01){standardGeneric ("addNoise")})

 setMethod("addNoise" ,
            signature = "matrix",
            function(m, tamanyo = 0.01){

	noise = function(m, t = tamanyo){
		ruido = rnorm(length(m), 0,t)
		return(m+ruido)
	}
	noised = noise(m)
	unicos = which(duplicated(m) == FALSE)
	m[-unicos,] = noised[-unicos,]
	return(m)

   }
 )

 setGeneric("toHiperbolico" ,
             function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
             standardGeneric ("toHiperbolico")})

 setMethod("toHiperbolico" ,
            signature = "matrix",
            function(objeto, M = 1 , cx = 0, cy = 0, r = 1){

	insiders = apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
	outers = which(insiders < 1)
	objetoP = matrix(toPolar(objeto[,1],objeto[,2]),nc=2)
	if (length(outers)){
			objetoP[outers,2] = 1
	}
	objetoP[,2] = sapply(objetoP[,2],fishIin,M)
	objetoC = matrix(toCartesian(objetoP[,1],objetoP[,2]),nc=2)
  return(list(objetoC = objetoC,
              objetoP = objetoP))

   }
 )

 setGeneric("POIcoords<-" , function(object, value){standardGeneric
("POIcoords<-")})

 setReplaceMethod( f ="POIcoords",
                   signature = 'POI',
                   definition = function(object, value){
                                   object at Pcoords <- value$Pcoords
                                   object at PcoordsFI <- value$PcoordsFI
                                   object at newPcoords <- value$newPcoords
                                   object at objeto <- value$objeto

                                   return(object)
                                }
 )

 setGeneric("POICalc" ,
             function(objeto, NC, cx=0, cy=0, r=1,
...){standardGeneric ("POICalc")})

 setMethod("POICalc" ,
            signature = "POI",
            function(objeto, NC, cx=0, cy=0, r=1, ...){

   MatrizSim = objeto at matrizSim
   secuencia = seq(2/NC,2,2/NC)
   Pcoords = matrix(rep(0,NC*2),nc=2)
   n = 1
   for (i in secuencia){
      Pcoords[n,] = c(r * cos(i*pi), r * sin(i*pi))
      n = n+1
   }
   PcoordsFI = matrix(toPolar(Pcoords[,1],Pcoords[,2]),nc=2)
   PcoordsFI[,2] = PcoordsFI[,2]+.15
   PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)

   if (nrow(Pcoords) != 1){
   newPcoords = puntosMedios(Pcoords)
   } else {
      newPcoords = Pcoords
   }

   MatrizSim[is.nan(MatrizSim/rowSums(MatrizSim))] <- 0

   W = MatrizSim / rowSums(MatrizSim)
   W[is.nan(W)] <- 0
   nwords = nrow(W)
   objeto = matrix(rep(0,2*nwords),nc=2)
   for (j in 1:nwords){
      for (nPOI in 1:NC){
         objeto[j,1] = objeto[j,1]+(W[j,nPOI]*Pcoords[nPOI,1])
         objeto[j,2] = objeto[j,2]+(W[j,nPOI]*Pcoords[nPOI,2])
      }
   }

   objeto = addNoise(objeto)

   return(list(Pcoords = Pcoords,
               PcoordsFI = PcoordsFI,
               newPcoords = newPcoords,
               objeto = objeto))

   }
 )

 setGeneric("POIPlot" ,
             function(POI){standardGeneric ("POIPlot")})

 setMethod("POIPlot" ,
            signature = "POI",
            function(POI){

   par(bg=POI at plotCol, mar = c(0.1,0.1,0.1,0.1), family = POI at itemsFamily)


   if (exists('POI.env')) {
      if (exists('POI', envir = POI.env)) {
        POI <- get('POI', envir = POI.env)
      }
   }

   selected = POI at selected
   objeto = POI at objeto
   newcoords = POI at newcoords
   newcoords_1 = POI at newcoords_1
   NC = length(POI at wordsInQuery)
   cx=0
   cy=0
   r=1
   etiq2 = POI at docs[,1]
   etiq = POI at wordsInQuery
   fishEYE = TRUE
   M = POI at M
   poisTextCol = POI at poisTextCol
   colores = POI at colores[POI at docs]
   poisCircleCol = POI at poisCircleCol
   linesCol = POI at linesCol
   itemsCol = POI at itemsCol
   circleCol = POI at circleCol
   LABELS =  POI at LABELS
   Pcoords = POI at Pcoords
   newPcoords = POI at newPcoords
   cgnsphrFont = POI at cgnsphrFont

   newcoords_par = newcoords

   newcoords_Pcoords = matrix(rep( c(newcoords,newcoords_1 ),
                              nrow(Pcoords)),nc=2,byrow=TRUE)

   newcoords_puntosMediosPcoords = matrix(rep( c(newcoords,newcoords_1),
                                          nrow(newPcoords)),nc=2,byrow=TRUE)

   newcoords = matrix(rep( c(newcoords,newcoords_1),
                      nrow(objeto)),nc=2,byrow=TRUE)

   objeto = objeto+newcoords
   objetoH = toHiperbolico(objeto, M)
   objetoC = objetoH$objetoC
   objetoP = objetoH$objetoP

   Pcoords = Pcoords + newcoords_Pcoords
   PcoordsH = toHiperbolico(Pcoords, M)
   PcoordsC = PcoordsH$objetoC
   PcoordsP = PcoordsH$objetoP

   newPcoords = newPcoords + newcoords_puntosMediosPcoords
   newPcoordsH = toHiperbolico(newPcoords, M)
   Pcoords_objetoC = newPcoordsH$objetoC

   if (LABELS) {
      PcoordsFI = matrix(toPolar(PcoordsC[,1],PcoordsC[,2]),nc=2)
      PcoordsFI[,2] = 1 +.15
      PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
   }

   plot(circulo(0,0,1, circleCol, PLOT =
FALSE),cex=.5,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
                  ann=FALSE, axes=F,type='l', col = circleCol)

   points(objetoC, pch=19, col = colores, cex = 1.5 - objetoP[,2])

   text(objetoC[,1], objetoC[,2], labels = etiq2, cex = cgnsphrFont -
objetoP[,2],
        pos = 3, col = itemsCol)

   abline(h = cx, col = 'grey', lty = 'dashed')
   abline(v = cy, col = 'grey', lty = 'dashed')


   points(PcoordsC,cex = 2, col = poisCircleCol)

   lines(Pcoords_objetoC, col = linesCol)

   segments(Pcoords_objetoC[nrow(Pcoords_objetoC),1],Pcoords_objetoC[nrow(Pcoords_objetoC),2],
            Pcoords_objetoC[1,1],Pcoords_objetoC[1,2], col = linesCol)

   if (LABELS) {
      text(PcoordsFI[,1],PcoordsFI[,2],toupper(etiq),cex=.75, col = poisTextCol)
   }

   if (selected != 1) {
      circulin(0,0, .5, objeto = objetoC)   # probando
   }

   if (!exists('POI.env')){
      POI.env <<- new.env()
   }
   poiCOPY = POI
   poiCOPY at objeto <- objeto
   poiCOPY at objetoC <- objetoC
   poiCOPY at newPcoords <- newPcoords
   poiCOPY at Pcoords <- Pcoords
   assign('POI',poiCOPY , envir = POI.env)

   }
 )


# *strong*VERY*strong* basic kmeans example with 6 clusters and 10 variables
x <- matrix(rnorm(100, mean = 1, sd = .3), ncol = 10)
x <- rbind(x,matrix(rnorm(200, mean = 5, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(100, mean = 10, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(100, mean = 15, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(200, mean = 20, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(100, mean = 25, sd = .3), ncol = 10))

cl <- kmeans(x, 6, iter.max = 100 ,nstart = 25)

# *strong*VERY*strong* basic way of reordering cluster output for
better plotting
# here we reorder using just the first cluster
reorder.cl <- as.numeric(names(sort(rank((as.matrix(dist(cl$centers,
diag = T)))[,1]))))
cl$centers <- cl$centers[reorder.cl, ]
cl$size    <- cl$size[reorder.cl]

# distance matrix between each element and its cluster center
matrizSim = matrix(rep(0, nrow(cl$centers) * nrow(x)), ncol = nrow(cl$centers))
for (n in 1:nrow(cl$centers)){
  for (i in 1:nrow(x)) {
    a = x[i,]
    b = cl$centers[n,]
    matrizSim[[i,n]] = dist(rbind(a,b)) # eucl dist
  }
}

# From dist to similarity (0 - 1)
matrizSim = 1 - (matrizSim / rowSums(matrizSim) )
# exagerate similarity
matrizSim  = matrizSim^3

# Create POI plot
clusterPOI = new('POI')
clusterPOI at M = 1          # no fisheye distorsion
clusterPOI at matrizSim <- matrizSim
clusterPOI at wordsInQuery <- paste('"',
as.character(round(cl$centers[,1]),2),'"', '
size',as.character(cl$size))
POIcoords(clusterPOI) <- POICalc(clusterPOI ,length(clusterPOI at wordsInQuery))
clusterPOI at docs <-
cbind(matrix(seq(1:nrow(clusterPOI at objeto))),matrix(seq(1:nrow(clusterPOI at objeto))))
clusterPOI at colores <- cl$cluster  + 1
clusterPOI at cos.query.docs <- rep(1, length(cl$cluster))
POI.env <<- new.env()
POIPlot(clusterPOI)




More information about the R-help mailing list