[R] deduplication

Wu Gong wg2f at mtmail.mtsu.edu
Fri Jun 4 11:22:43 CEST 2010


Please try this

## Import data
id1<-c(4,17,9,1,1,1,3,3,6,15,1,1,1,1,3,3,3,3,4,4,4,5,5,12,9,9,10,10)
id2<-c(8,18,10,3,6,7,6,7,7,16,4,5,12,18,4,5,12,18,5,12,18,12,18,18,15,16,15,16)
id<-data.frame(id1 = id1, id2 = id2)

## Create same structure table
id <- id0 <- unique(id)
leng <- nrow(id)

n <- 0
repeat {
	if (n == leng) {break}
	n <- 0
	id <- id[order(-id$id1, -id$id2),]
	for (i in 1:leng) {
		if (id$id1[i] == id$id2[i]) { 
		n <- n+1
		next }
		smal <- min(id[i,])
		larg <- max(id[i,])
		id$id2[which(id$id2 == larg)] <- smal
		id$id1[which(id$id1 == larg)] <- smal
		}}

## Create results
tab <- table(as.matrix(id0),
as.matrix(id[order(as.numeric(rownames(id))),]))
res <- list()
for (i in 1:ncol(tab)) {
	res[[i]] <- rownames(tab[(tab[,i] != 0),])}
res

-----
A R learner.
-- 
View this message in context: http://r.789695.n4.nabble.com/deduplication-tp2241637p2242921.html
Sent from the R help mailing list archive at Nabble.com.



More information about the R-help mailing list