[R] packages for extracting subtrees

Wiener, Matthew matthew_wiener at merck.com
Fri Feb 8 18:55:08 CET 2002


Hi.  I did write those functions, and sent them (I thought) to one of the R
maintainers to see whether they would be appropriate for inclusion (because
I'd seen some requests on the mailing lists). 

However, I'm happy to post them -- I should have thought of it before.

WARNING:  I've tested these functions on some data arising in my work and
also on the USArrests data that comes with R, and they worked for me.
However, I make no guarantee that they will work for you, and neither Merck
(my employer) nor I will be responsible for anything bad that happens to you
if you use them.  (This is repeated in the main function.)

That said, I'd be happy to get suggestions for improvements (including any
examples that it breaks down on).

Regards,

Matthew Wiener
Applied Computer Science and Mathematics Department
Merck Research Labs
Rahway, NJ  07065-0900
732-594-5303


-----------------------------------------

First the two functions:  f.get.subtrees is really just a shell to loop
through f.make.subtree.  (After the functions there's a .Rd file.)


"f.get.subtrees" <-
function(tree, k = NULL, h = NULL,
           add.element = NULL){
    ## tree is a tree, k and h are as in cutree.
    ## additional elements is the name of any additional info
    ## connected with tree nodes.  The additional elements referred
    ## to should have the same length as tree$labels.
    ##
    groups <- cutree(tree, k, h)
    subtrees <- list()
    for(i in 1:length(unique(groups))){
      subtrees[[i]] <- f.make.subtree(tree, groups, i,
                                      add.element = add.element)
    }
    subtrees
  }

"f.make.subtree" <-
function(tree, groups, n = 1, add.element = NULL){
    ##
    ##  (Required legal stuff)
    ##  Copyright:  Matthew Wiener 
    ##		    Applied Computer Science & Mathematics Dept.
    ##		    Merck Research Labs
    ##		    Rahway, NJ 07090  USA
    ##
    ##  This program is free software; you can redistribute it and/or modify
    ## it under the terms of the GNU General Public License as published by
    ## the Free Software Foundation; either version 2 of the License, or
    ## (at your option) any later version.
    ##
    ## This program is distributed in the hope that it will be useful,
    ## but WITHOUT ANY WARRANTY; without even the implied warranty of
    ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    ## GNU General Public License for more details.
    ##
    ##  
    ## #####################################################################
    ##
    ##  Now here's the program itself.
    ##
    ## this function can be called by f.get.subtrees, in which
    ## case the groups will be defined using cutree.
    ##
    ## which nodes are in the subtree?
    which.nodes <- which(groups == n)
    names(which.nodes) <- NULL
    ## get the order elements for those labels that are in
    ## the subtree
    subtree.labels <- tree$labels[which.nodes]
    subtree.order <- tree$order[tree$labels[tree$order] %in% subtree.labels]
    ## Figure out which rows of the merge tree
    ## are going to need to be included:  first find out which
    ## rows contain leaves in the group (leaves indicated by
    ## negative numbers), then get the associated rows
    which.merge.elements <- which(-tree$merge %in% which.nodes)
    which.merge.rows <- sort(unique(which.merge.elements %%
nrow(tree$merge)))
    ## if the last row is in there, it will show up as 0
    ## needs to be changed to number of rows
    which.merge.rows[which.merge.rows == 0] <- nrow(tree$merge)

    ## now collect rows referred to be the rows you've got
    ## until there's nothing more to get
    old.length <- 0
    new.length <- length(which.merge.rows)
    ## trap the condition with only one row
    if(length(which.merge.elements) == 1){
      res <- list(merge = NULL,
                  heights = tree$height[which.merge.rows],
                  order = 1,
                  labels = subtree.labels,
                  method = tree$method,
                  call = tree$call,
                  dist.method = tree$dist.method)
    }
    else{
      while(new.length - old.length > 0){
        old.length <- new.length
        ## get new rows
        ## we need any row in which a row we've already got is referred to
        new.rows <- numeric(0)
        more.new.rows <- numeric(0)
        row.elements <- as.vector(tree$merge[which.merge.rows,])
        pos.elements <- row.elements[row.elements > 0]
        ## need rows we refer to
        ## as long as they involve an element we're keeping
        if(length(pos.elements) > 0)
          new.rows <-
            pos.elements[apply(tree$merge[pos.elements,, drop = F], 1,
                               function(x){
                                 any(x > 0) &
                                 all(-x[x<0] %in% which.nodes)})]
        ## also need the ones referring to rows we already have
        more.new.rows <-
          which(apply(tree$merge, 1,
                      function(x){all(x %in% which.merge.rows)}))
        which.merge.rows <-
          sort(unique(c(which.merge.rows, new.rows, more.new.rows)))
        new.length <- length(which.merge.rows)
      }
      merge.list <- tree$merge[which.merge.rows,,drop = F]
      merge.height <- tree$height[which.merge.rows]
      ## fix references -- row numbers need to change
      pos.elements <- sort(merge.list[merge.list > 0])
      
      for(i in seq(along = pos.elements))
        merge.list[merge.list == pos.elements[i]] <-
          which(pos.elements[i] == which.merge.rows)
      
      ## fix references -- element numbers need to change
      neg.elements <- merge.list[merge.list < 0]
      minus.neg.elements <- -neg.elements
      num.elements <- length(neg.elements)
      ## change.table is the dictionary for changes
      change.table <- cbind(sort(minus.neg.elements), 1:num.elements)
      ## apply the dictionary
      for(i in 1:num.elements){
        merge.list[merge.list== -change.table[i,1]] <- -change.table[i,2]
      }
      old.order <- subtree.order
      subtree.order <- match(tree$labels[old.order],
                             subtree.labels)
      res <- list(merge = merge.list,
                  height = merge.height,
                  order = subtree.order,
                  labels = subtree.labels,
                  method = tree$method,
                  call = list(match.call(), tree$call),
                  dist.method = tree$dist.method)
      class(res) <- "hclust"
      ## if any additional elements specified, get them too
      for(i in seq(along = add.element)){
        res[[add.element[i]]] <-
          tree[[add.element[i]]][which.nodes]

      }
    }
    res
  }

----------------------------------------------------------------------------
--

Here's the .Rd file.  It compiled OK for me.

\name{Subtree extraction for hclust trees}
\alias{f.get.subtrees}
\alias(f.make.subtree}
\title{Extract hclust Subtrees}
\description{Extract subtrees from an "hclust"-class tree object.}
\usage{
f.get.subtrees(tree, k = NULL, h = NULL, add.element = NULL)
f.make.subtree(tree, groups, n = 1, add.element = NULL)
}
\arguments{
  \item{tree}{a tree object with class "hclust"}
  \item{k}{The number of groups desired (as in \code{cutree})}
  \item{h}{The height at which tree should be cut (as in \code{cutree})}
  \item{groups}{A vector of subgroup memberships.  This should be the
    same length as \code{tree$label}, that is, one element for each leaf
  of the tree.  Can be created by \code{cutree}.}
  \item{n}{Index of subgroup for the subtree to be created}
  \item{add.element}{A vector of element names for any elements added to
    the tree}
}
\value{
  an hclust-class tree object representing a single subtree
  (f.make.subtree) or a list of such objects (f.get.subtrees).  
}
\details{
}
\references{
}
\seealso{
  \code{\link[mva]{hclust}}, \code{\link[mva]{cutree}}
}
\examples{
data(USArrests)
t1 <- hclust(dist(USArrests))
t2 <- f.get.subtrees(t1, k = 3)    # a list with 3 subtrees
plot(t2[[1]])                      # plot the first subtree
t3 <- f.get.subtrees(t1, h = 125)  # same as t2
}
\author{Matthew Wiener}
\email{Matthew_Wiener at merck.com}
\keyword{}


-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._



More information about the R-help mailing list