[R] IP-Address

Gabor Grothendieck ggrothendieck at gmail.com
Thu Jun 4 15:10:14 CEST 2009


Suggest you be more specific.  The solution I posted does handle missing values
sorting them to the beginning:

> Lines <- "id rank color status ip
+ 138 29746 yellow no 162.131.58.26
+ 138 29746 red  yes  162.131.58.16
+ 138 29746 blue yes  162.131.58.10
+ 138 29746 red no  162.131.58.17
+ 138 29746 yellow no 162.131.58.14
+ 138 29746 red no  162.131.58.13
+ 138 29746 yellow  no 162.132.58.15
+ 139 29746 green no  162.252.20.69
+ 140 29746 red yes  162.254.20.71
+ 141 29746 yellow no  163.253.7.153
+ 142 31804 green yes  163.253.20.114
+ 142 31804 green yes
+ 144 32360 black yes  161.138.45.226"
>
> DF <- read.table(textConnection(Lines), header = TRUE, fill = TRUE)
> library(gtools)
> DF[mixedorder(DF$ip), ]
    id  rank  color status             ip
12 142 31804  green    yes
13 144 32360  black    yes 161.138.45.226
3  138 29746   blue    yes  162.131.58.10
6  138 29746    red     no  162.131.58.13
5  138 29746 yellow     no  162.131.58.14
2  138 29746    red    yes  162.131.58.16
4  138 29746    red     no  162.131.58.17
1  138 29746 yellow     no  162.131.58.26
7  138 29746 yellow     no  162.132.58.15
8  139 29746  green     no  162.252.20.69
9  140 29746    red    yes  162.254.20.71
11 142 31804  green    yes 163.253.20.114
10 141 29746 yellow     no  163.253.7.153


On Thu, Jun 4, 2009 at 6:39 AM,  <edwin7 at web.de> wrote:
> Hi,
>
> Unfortunately, they can't handle NA. Any suggestion? Some row for Ip don't have ip address. This cause an error/ wrong result.
>
> Eddie
>
>
>> library(gsubfn)
>> library(gtools)
>> library(rbenchmark)
>>
>> n <- 10000
>> df <- data.frame(
>>   a = rnorm(n),
>>   b = rnorm(n),
>>   c = rnorm(n),
>>   ip = replicate(n, paste(sample(255, 4), collapse='.'), simplify=TRUE)
>> )
>>
>> res <- benchmark(columns=c('test', 'elapsed'), replications=10, order=NULL,
>>   peda = {
>>     connection <- textConnection(as.character(df$ip))
>>     o <- do.call(order, read.table(connection, sep='.'))
>>     close(connection)
>>     df[o, ]
>>   },
>>
>>   peda2 = {
>>     connection <- textConnection(as.character(df$ip))
>>     dfT <- read.table(connection, sep='.', colClasses=rep("integer",
>> 4), quote="", na.strings=NULL, blank.lines.skip=FALSE)
>>     close(connection)
>>     o <- do.call(order, dfT)
>>     df[o, ]
>>   },
>>
>>   hb = {
>>     ip <- strsplit(as.character(df$ip), split=".", fixed=TRUE)
>>     ip <- unlist(ip, use.names=FALSE)
>>     ip <- as.integer(ip)
>>     dim(ip) <- c(4, nrow(df))
>>     ip <- 256^3*ip[1,] + 256^2*ip[2,] + 256*ip[3,] + ip[4,]
>>     o <- order(ip)
>>     df[o, ]
>>   },
>>
>>   hb2 = {
>>     ip <- strsplit(as.character(df$ip), split=".", fixed=TRUE)
>>     ip <- unlist(ip, use.names=FALSE)
>>     ip <- as.integer(ip);
>>     dim(ip) <- c(4, nrow(df))
>>     o <- sort.list(ip[4,], method="radix", na.last=TRUE)
>>     for (kk in 3:1) {
>>       o <- o[sort.list(ip[kk,o], method="radix", na.last=TRUE)]
>>     }
>>     df[o, ]
>>   }
>> )
>>
>> print(res)
>>
>>    test elapsed
>> 1  peda    4.12
>> 2 peda2    4.08
>> 3    hb    0.28
>> 4   hb2    0.25
>>
>>
>> On Sun, May 31, 2009 at 12:42 AM, Wacek Kusnierczyk
>>
>> <Waclaw.Marcin.Kusnierczyk at idi.ntnu.no> wrote:
>> > edwin Sendjaja wrote:
>> >> Hi VQ,
>> >>
>> >> Thank you. It works like charm. But I think Peter's code is faster. What
>> >> is the difference?
>> >
>> > i think peter's code is more r-elegant, though less generic.  here's a
>> > quick test, with not so surprising results.  gsubfn is implemented in r,
>> > not c, and it is painfully slow in this test. i also added gabor's
>> > suggestion.
>> >
>> >    library(gsubfn)
>> >    library(gtools)
>> >    library(rbenchmark)
>> >
>> >    n = 1000
>> >    df = data.frame(
>> >       a=rnorm(n),
>> >       b = rnorm(n),
>> >       c = rnorm(n),
>> >       ip = replicate(n, paste(sample(255, 4), collapse='.'),
>> > simplify=TRUE))
>> >    benchmark(columns=c('test', 'elapsed'), replications=10, order=NULL,
>> >       peda={
>> >          connection = textConnection(as.character(df$ip))
>> >          o = do.call(order, read.table(connection, sep='.'))
>> >          close(connection)
>> >          df[o, ] },
>> >       waku=df[order(gsubfn(perl=TRUE,
>> >          '[0-9]+',
>> >          ~ sprintf('%03d', as.integer(x)),
>> >          as.character(df$ip))), ],
>> >       gagr=df[mixedorder(df$ip), ] )
>> >
>> >    # peda 0.070
>> >    # waku 7.070
>> >    # gagr 4.710
>> >
>> >
>> > vQ
>> >
>> > ______________________________________________
>> > R-help at r-project.org mailing list
>> > https://stat.ethz.ch/mailman/listinfo/r-help
>> > PLEASE do read the posting guide
>> > http://www.R-project.org/posting-guide.html and provide commented,
>> > minimal, self-contained, reproducible code.
>>
>> ______________________________________________
>> R-help at r-project.org mailing list
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide
>> http://www.R-project.org/posting-guide.html and provide commented, minimal,
>> self-contained, reproducible code.
>
>
>
>        [[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>




More information about the R-help mailing list