[R] Retrieve all names of nested list and index list based on these names

wwa418 at ku-eichstaett.de wwa418 at ku-eichstaett.de
Wed Nov 10 02:26:00 CET 2010


Hi all,

I looked for a function that would retrieve all(!) names of an arbitrary
deeply nested named list. Also, names should optionally be arranged in a
way that reflects the list's hierarchy structure (i.e. 'a$a.1$a.1.1' etc.)

Also, there should be a recursive index linked to a respective list branch
that could be used to index a list by names (as you would do with named
vectors, only that now there's also a hierarchy structure coming into
play.

Example:

name    index
a       1
a.1     1-1
a.1.1   1-1-1

As I didn't really find anything that suited my needs, I ended up trying
to write a recursive function that loops through the individual branches
via lapply() and came to find this to be pretty nasty to debug/manually
test ;-).

I think I found a acceptable implementation now and thought I'd share it
in case someone is up to a similar task. Two function defs, then an
example:

##### FUNCTION DEFS #####

listnames.get <- function(
    list.obj,
    do.basename=TRUE,
    do.name.chain=TRUE,
    ...
)
{
    # VALIDATE
    if(!is.list(list.obj)) stop("Argument 'list.obj' must be a list.")
    # /

         #---------------------------------------------------------------------------
    # CORE FUNCTION
    #---------------------------------------------------------------------------

    listnames.get.core <- function(
    	list.obj,
    	do.basename=TRUE,
    	do.name.chain=TRUE,
    	buffer,
    	...
    )
    {
        if(!exists("index", buffer))
        {
           buffer$index 	<- new.env(parent=emptyenv())
           buffer$index 	<- NULL
           buffer$name		<- NULL
        }

        jnk <- sapply(1:length(list.obj), function(x)
        {
            list.branch 	<- list.obj[x]
            list.branch.nme	<- names(list.branch)
            if(do.basename) list.branch.nme <- basename(list.branch.nme)
            list.obj.updt	<- list.branch[[1]]

            # UPDATE BUFFER
            buffer$run		<- c(buffer$run, x)
            if(do.name.chain)
            {
                buffer$name		<- c(buffer$name, list.branch.nme)
            } else
            {
            	buffer$name		<- list.branch.nme
            }
            # /

        	index.crnt		<- paste(as.character(buffer$run), collapse="-")
        	index.crnt		<- data.frame(
                name=paste(buffer$name, collapse="$"),
                index=index.crnt,
                stringsAsFactors=FALSE
        	)
        	index.updt		<- rbind(buffer$index, index.crnt)
        	buffer$index 	<- index.updt

        	if(is.list(list.obj.updt))
        	{
                jcore.listnames.get.core(
                    list.obj=list.obj.updt,
                    do.basename=do.basename,
                    do.name.chain=do.name.chain,
                    buffer=buffer
                )
        	}

        	# UPDATE BUFFER
        	buffer$run	<- buffer$run[-length(buffer$run)]
        	buffer$name	<- buffer$name[-length(buffer$name)]
        	# /

        	return(NULL)
        })

        return(TRUE)
    }

    # /CORE FUNCTION ----------
    #---------------------------------------------------------------------------
    # APPLICATION
    #---------------------------------------------------------------------------

    assign("buffer", new.env(parent=emptyenv()), envir=environment())

    listnames.get.core(
    	list.obj=list.obj,
    	do.basename=do.basename,
    	buffer=buffer
    )

    # /APPLICATION ----------

    return(buffer$index)
}

listbranch.get <- function(
    list.obj,
    query,
    do.strict=TRUE,
    do.rtn.val=TRUE,
    msg.error=NULL,
    ...
)
{
    # VALIDATE
    if(!is.list(list.obj)) stop("Argument 'list.obj' must be a list.")
    # /

    # ESTABLISH LIST INDEX
    list.index	<- jcore.listnames.get(
        list.obj=list.obj,
        do.basename=TRUE,
        do.name.chain=TRUE
    )
    list.index.nms <- list.index$name
    # /

    # SEARCH FOR QUERY
    if(do.strict)
    {
        query.0	<- query
        query <- gsub("\\$", "\\\\$", query)
        query <- gsub("\\.", "\\\\.", query)
        query <- paste("^", query, "$", sep="")
    }
    idx <- grep(query, list.index.nms, perl=TRUE)

    if(!length(idx))
    {
        if(is.null(msg.error))
        {
            msg.error <- paste("Query not successful: '", query.0, "' ('",
query, "')", sep="")
        }
        stop(cat(msg.error, sep="\n"))
    }
    # /

    # BUILDING RECURSIVE INDEX
    idx <- list.index$index[idx]
    idx <- as.numeric(unlist(strsplit(idx, split="-")))
    # /

    if(do.rtn.val)
    {
        # RECURSIVE INDEXING
        rtn <- list.obj[[idx]]
        # /
    } else
    {
        rtn <- idx
    }

    return(rtn)
}

##### EXAMPLE #####

my.list <- list(
    a=list(a.1="a", a.2=list(a.2.1="a", a.2.2="b"), a.3=list(a.3.1="a"),
    b=list(b.1=list(b.1.1="a"), b.2="b"),
    c="a"
)

listnames.get(list.obj=my.list, do.basename=TRUE, do.name.chain=TRUE)

listbranch.get(list.obj=my.list, query="a$a.2$a.2.2",
    do.strict=TRUE, do.rtn.val=TRUE)
listbranch.get(list.obj=my.list, query="a$a.2$a.2.2",
    do.strict=TRUE, do.rtn.val=FALSE)



More information about the R-help mailing list