[R] subset data using a vector

DIGHE, NILESH [AG/2362] nilesh.dighe at monsanto.com
Tue Nov 24 23:13:58 CET 2015


Jim & Michael:  I really appreciate your guidance in creating the function I wanted.  I took suggestions from both of you and was able to complete this function.  I had to split the process into two functions as listed below.
I just thought to send the results to the list in case someone might be interested in doing similar task in the future.
Thanks.
Nilesh

getcheckmeans<- function (dataset)
{
    row_check_mean <- c()
    dat1 <- data.frame()
    check_mean <- c()
    x <- length(dataset$plotid)
    for (i in (1:x)) {
        r1 <- dataset[i, 1]
        r2 <- r1 - 1
        r3 <- r1 + 1
        r4 <- c(r1, r2, r3)
        dat1 <- split(dataset, dataset$rows %in% r4)[[2]]
        row_check_mean[i] <- tapply(dat1$yield, dat1$linecode,
            mean, na.rm = TRUE)[1]
        check_mean <- round(unlist(row_check_mean)[1:x], digits = 2)
    }
    check_mean
}


adjustdata<- function (dataset, trait, control)

{

    check_mean <- getcheckmeans(dataset)

    dat_check_mean <- as.data.frame(check_mean)

    dataset <- cbind(dataset, dat_check_mean)

    adj_yield <- c()

    x <- length(trait)

    for (i in 1:x) {

        adj_yield[i] <- ifelse(control[i] == "variety", round(trait[i]/dataset$check_mean[i],

            digits = 3), round(trait[i]/trait[i], digits = 3))

    }

    data.frame(dataset, adj_yield)

}


dat<- structure(list(rows = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,

1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,

2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,

3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,

4L, 4L, 4L, 4L, 4L, 4L), cols = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,

8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 16L, 15L, 14L, 13L,

12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 1L, 2L, 3L,

4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 16L,

15L, 14L, 13L, 12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L,

1L), plotid = c(289L, 290L, 291L, 292L, 293L, 294L, 295L, 296L,

297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 369L, 370L, 371L,

372L, 373L, 374L, 375L, 376L, 377L, 378L, 379L, 380L, 381L, 382L,

383L, 384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L,

394L, 395L, 396L, 397L, 398L, 399L, 400L, 465L, 466L, 467L, 468L,

469L, 470L, 471L, 472L, 473L, 474L, 475L, 476L, 477L, 478L, 479L,

480L), yield = c(5.1, 5.5, 5, 5.5, 6.2, 5.1, 5.5, 5.2, 5, 5,

3.9, 4.6, 5, 4.4, 5.1, 4.3, 4.4, 4.2, 3.9, 4.6, 4.8, 5.4, 4.7,

5.5, 5.3, 4.8, 5.8, 4.6, 5.8, 5.5, 5.3, 5.6, 5.6, 5, 4.8, 4.9,

5.2, 5.3, 4.6, 4.8, 5.3, 4.2, 4.6, 4.2, 4.2, 4, 3.9, 4.5, 5.4,

4.8, 4.6, 5.2, 4.9, 5.1, 4.5, 5.8, 5.2, 4.7, 4.8, 5.3, 5.8, 4.9,

5.9, 4.5), line = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,

9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,

1L, 21L, 22L, 1L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,

32L, 33L, 1L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 1L,

43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 1L, 51L, 52L, 53L, 54L,

1L, 55L, 56L, 57L), .Label = c("CHK", "V002", "V003", "V004",

"V005", "V006", "V007", "V008", "V009", "V010", "V011", "V012",

"V013", "V014", "V015", "V016", "V017", "V018", "V019", "V020",

"V021", "V022", "V023", "V024", "V025", "V026", "V027", "V028",

"V029", "V030", "V031", "V032", "V033", "V034", "V035", "V036",

"V037", "V038", "V039", "V040", "V041", "V042", "V043", "V044",

"V045", "V046", "V047", "V048", "V049", "V050", "V051", "V052",

"V053", "V054", "V055", "V056", "V057"), class = "factor"), linecode = structure(c(1L,

2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,

2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,

2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,

2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("check",

"variety"), class = "factor")), .Names = c("rows", "cols", "plotid",

"yield", "line", "linecode"), class = "data.frame", row.names = c(NA,

-64L))

From: Jim Lemon [mailto:drjimlemon at gmail.com]
Sent: Tuesday, November 24, 2015 2:53 AM
To: DIGHE, NILESH [AG/2362]
Cc: r-help at r-project.org
Subject: Re: [R] subset data using a vector

Hi Nilesh,
I simplified your code a bit:

fun1<-function (dataset, plot.id<http://plot.id>, ranges2use, control) {
 m1 <- strsplit(as.character(ranges2use), ",")
 dat1 <- data.frame()
 row_check_mean <- NA
 row_check_adj_yield <- NA
 x <- length(plot.id<http://plot.id>)
 for (i in 1:x) {
  cat(i,"\n")
  dat1 <- dataset[dataset$ranges %in% m1[[i]], ]
  row_check_mean[i] <- tapply(unlist(dat1$trait),unlist(dat1$control),
   mean, na.rm = TRUE)[1]
  row_check_adj_yield[i] <- ifelse(control[i] == "variety",
  trait[i]/dataset$row_check_mean[i], trait[i]/trait[i])
 }
 data.frame(dataset, row_check_adj_yield)
}

 and got it to run down to this line:

row_check_mean[i]<-tapply(dat1$trait,dat1$control,mean,na.rm=TRUE)[1]

which generates the error:

Error in split.default(X, group) : first argument must be a vector

As far as I can see, there is no element in "mydata" named "trait" and "control" is not an element of the local variable "dat1". I can't get past this, but perhaps it will help you to sort it out.

Jim


On Tue, Nov 24, 2015 at 10:10 AM, DIGHE, NILESH [AG/2362] <nilesh.dighe at monsanto.com<mailto:nilesh.dighe at monsanto.com>> wrote:
Michael:  I tried using your suggestion of using length and still get the same error:
Error in m1[[i]] : subscript out of bounds

I also checked the length of m1 and x and they both are of same length (64).

After trying several things, I was able to extract the list but this was done outside the function I am trying to create.
Code that worked is listed below:

for(i in (1:length(mydata$plotid))){
        v1<-as.numeric(strsplit(as.character(mydata$rangestouse), ",")[[i]])
        print(head(v1))}

However, when I try to get this code in a function (fun3) listed below, I get the following error:
Error in strsplit(as.character(dataset$ranges2use), ",")[[i]] :
  subscript out of bounds

fun3<- function (dataset, plot.id<http://plot.id>, ranges2use, control)
{
    m1 <- c()
    x <- length(plot.id<http://plot.id>)
    for (i in (1:x)) {
        m1 <- as.numeric(strsplit(as.character(dataset$ranges2use),
            ",")[[i]])
    }
    m2
}

I am not sure where I am making a mistake.
Thanks.
Nilesh

-----Original Message-----
From: Michael Dewey [mailto:lists at dewey.myzen.co.uk<mailto:lists at dewey.myzen.co.uk>]
Sent: Monday, November 23, 2015 12:11 PM
To: DIGHE, NILESH [AG/2362]; r-help at r-project.org<mailto:r-help at r-project.org>
Subject: Re: [R] subset data using a vector

Try looking at your function and work through what happens if the length is what I suggested.

 >>       x <- length(plot.id<http://plot.id>)
 >>
 >>       for (i in (1:x)) {
 >>
 >>           m2[i] <- m1[[i]]

So unless m1 has length at least x you are doomed.

On 23/11/2015 16:26, DIGHE, NILESH [AG/2362] wrote:
> Michael:  I like to use the actual range id's listed in column "rangestouse" to subset my data and not the length of that vector.
>
> Thanks.
> Nilesh
>
> -----Original Message-----
> From: Michael Dewey [mailto:lists at dewey.myzen.co.uk<mailto:lists at dewey.myzen.co.uk>]
> Sent: Monday, November 23, 2015 10:17 AM
> To: DIGHE, NILESH [AG/2362]; r-help at r-project.org<mailto:r-help at r-project.org>
> Subject: Re: [R] subset data using a vector
>
> length(strsplit(as.character(mydata$ranges2use), ","))
>
> was that what you expected? I think not.
>
> On 23/11/2015 16:05, DIGHE, NILESH [AG/2362] wrote:
>> Dear R users,
>>                   I like to split my data by a vector created by using variable "ranges".  This vector will have the current range (ranges), preceding range (ranges - 1), and post range (ranges + 1) for a given plotid.  If the preceding or post ranges in this vector are outside the levels of ranges in the data set then I like to drop those ranges and only include the ranges that are available.  Variable "rangestouse" includes all the desired ranges I like to subset a given plotid.  After I subset these dataset using these desired ranges, then I like to extract the yield data for checks in those desired ranges and adjust yield of my data by dividing yield of a given plotid with the check average for the desired ranges.
>>
>> I have created this function (fun1) but when I run it, I get the following error:
>>
>> Error in m1[[i]] : subscript out of bounds
>>
>> Any help will be highly appreciated!
>> Thanks, Nilesh
>>
>> Dataset:
>> dput(mydata)
>> structure(list(rows = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
>> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
>> 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
>> 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
>> 4L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
>> cols = structure(c(1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 2L, 3L, 4L,
>> 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 2L, 3L,
>> 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 2L,
>> 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
>> 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), .Label = c("1", "2", "3", "4", "5",
>> "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16"), class = "factor"),
>>       plotid = c(289L, 298L, 299L, 300L, 301L, 302L, 303L, 304L,
>>       290L, 291L, 292L, 293L, 294L, 295L, 296L, 297L, 384L, 375L,
>>       374L, 373L, 372L, 371L, 370L, 369L, 383L, 382L, 381L, 380L,
>>       379L, 378L, 377L, 376L, 385L, 394L, 395L, 396L, 397L, 398L,
>>       399L, 400L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L,
>>       480L, 471L, 470L, 469L, 468L, 467L, 466L, 465L, 479L, 478L,
>>       477L, 476L, 475L, 474L, 473L, 472L), yield = c(5.1, 5, 3.9,
>>       4.6, 5, 4.4, 5.1, 4.3, 5.5, 5, 5.5, 6.2, 5.1, 5.5, 5.2, 5,
>>       5.6, 4.7, 5.4, 4.8, 4.6, 3.9, 4.2, 4.4, 5.3, 5.5, 5.8, 4.6,
>>       5.8, 4.8, 5.3, 5.5, 5.6, 4.2, 4.6, 4.2, 4.2, 4, 3.9, 4.5,
>>       5, 4.8, 4.9, 5.2, 5.3, 4.6, 4.8, 5.3, 4.5, 4.5, 5.1, 4.9,
>>       5.2, 4.6, 4.8, 5.4, 5.9, 4.9, 5.8, 5.3, 4.8, 4.7, 5.2, 5.8
>>       ), linecode = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
>>       2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
>>       2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
>>       1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
>>       2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("check",
>>       "variety"), class = "factor"), ranges = c(1L, 1L, 1L, 1L,
>>       1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
>>       2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
>>       3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
>>       4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L
>>       ), rangestouse = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
>>       1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
>>       2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
>>       3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
>>       4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1,2",
>>       "1,2,3", "2,3,4", "3,4"), class = "factor")), .Names =
>> c("rows", "cols", "plotid", "yield", "linecode", "ranges", "rangestouse"
>>
>> ), class = "data.frame", row.names = c(NA, -64L))

>>
>> Function:
>>
>> fun1<- function (dataset, plot.id<http://plot.id>, ranges2use, control)
>>
>> {
>>
>>       m1 <- strsplit(as.character(dataset$ranges2use), ",")
>>
>>       dat1 <- data.frame()
>>
>>       m2 <- c()
>>
>>       row_check_mean <- c()
>>
>>       row_check_adj_yield <- c()
>>
>>       x <- length(plot.id<http://plot.id>)
>>
>>       for (i in (1:x)) {
>>
>>           m2[i] <- m1[[i]]
>>
>>           dat1 <- dataset[dataset$ranges %in% m2[i], ]
>>
>>           row_check_mean[i] <- tapply(dat1$trait, dat1$control,
>>
>>               mean, na.rm = TRUE)[1]
>>
>>           row_check_adj_yield[i] <- ifelse(control[i] == "variety",
>>
>>               trait[i]/dataset$row_check_mean[i], trait[i]/trait[i])
>>
>>       }
>>
>>       data.frame(dataset, row_check_adj_yield)
>>
>> }
>>
>> Apply function:
>> fun1(mydata, plot.id<http://plot.id>=mydata$plotid, ranges2use =
>> mydata$rangestouse,control=mydata$linecode)
>>
>> Error:
>>
>> Error in m1[[i]] : subscript out of bounds
>>
>> Session info:
>>
>> R version 3.2.1 (2015-06-18)
>>
>> Platform: i386-w64-mingw32/i386 (32-bit)
>>
>> Running under: Windows 7 x64 (build 7601) Service Pack 1
>>
>>
>>
>> locale:
>>
>> [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United
>> States.1252
>>
>> [3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
>>
>> [5] LC_TIME=English_United States.1252
>>
>>
>>
>> attached base packages:
>>
>> [1] stats     graphics  grDevices utils     datasets  methods   base
>>
>>
>>
>> loaded via a namespace (and not attached):
>>
>>    [1] magrittr_1.5    plyr_1.8.3      tools_3.2.1     reshape2_1.4.1  Rcpp_0.12.1     stringi_1.0-1
>>
>>    [7] grid_3.2.1      agridat_1.12    stringr_1.0.0   lattice_0.20-31
>>
>>
>> Nilesh Dighe
>> (806)-252-7492 (Cell)
>> (806)-741-2019 (Office)
>>
>>
>> This e-mail message may contain privileged and/or confidential
>> information, and is intended to be received only by persons entitled
>> to receive such information. If you have received this e-mail in error, please notify the sender immediately. Please delete it and all attachments from any servers, hard drives or any other media. Other use of this e-mail by you is strictly prohibited.
>>
>> All e-mails and attachments sent and received are subject to
>> monitoring, reading and archival by Monsanto, including its subsidiaries. The recipient of this e-mail is solely responsible for checking for the presence of "Viruses" or other "Malware".
>> Monsanto, along with its subsidiaries, accepts no liability for any
>> damage caused by any such code transmitted by or accompanying this e-mail or any attachment.
>>
>>
>> The information contained in this email may be subject to the export
>> control laws and regulations of the United States, potentially
>> including but not limited to the Export Administration Regulations
>> (EAR) and sanctions regulations issued by the U.S. Department of Treasury, Office of Foreign Asset Controls (OFAC).  As a recipient of this information you are obligated to comply with all applicable U.S. export laws and regulations.
>>
>>      [[alternative HTML version deleted]]
>>
>> ______________________________________________
>> R-help at r-project.org<mailto:R-help at r-project.org> mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide
>> http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>>
>
> --
> Michael
> http://www.dewey.myzen.co.uk/home.html
> This e-mail message may contain privileged and/or confidential
> information, and is intended to be received only by persons entitled
> to receive such information. If you have received this e-mail in error, please notify the sender immediately. Please delete it and all attachments from any servers, hard drives or any other media. Other use of this e-mail by you is strictly prohibited.
>
> All e-mails and attachments sent and received are subject to
> monitoring, reading and archival by Monsanto, including its subsidiaries. The recipient of this e-mail is solely responsible for checking for the presence of "Viruses" or other "Malware".
> Monsanto, along with its subsidiaries, accepts no liability for any
> damage caused by any such code transmitted by or accompanying this e-mail or any attachment.
>
>
> The information contained in this email may be subject to the export
> control laws and regulations of the United States, potentially
> including but not limited to the Export Administration Regulations
> (EAR) and sanctions regulations issued by the U.S. Department of Treasury, Office of Foreign Asset Controls (OFAC).  As a recipient of this information you are obligated to comply with all applicable U.S. export laws and regulations.
>

--
Michael
http://www.dewey.myzen.co.uk/home.html
This e-mail message may contain privileged and/or confidential information, and is intended to be received only by persons entitled
to receive such information. If you have received this e-mail in error, please notify the sender immediately. Please delete it and
all attachments from any servers, hard drives or any other media. Other use of this e-mail by you is strictly prohibited.

All e-mails and attachments sent and received are subject to monitoring, reading and archival by Monsanto, including its
subsidiaries. The recipient of this e-mail is solely responsible for checking for the presence of "Viruses" or other "Malware".
Monsanto, along with its subsidiaries, accepts no liability for any damage caused by any such code transmitted by or accompanying
this e-mail or any attachment.


The information contained in this email may be subject to the export control laws and regulations of the United States, potentially
including but not limited to the Export Administration Regulations (EAR) and sanctions regulations issued by the U.S. Department of
Treasury, Office of Foreign Asset Controls (OFAC).  As a recipient of this information you are obligated to comply with all
applicable U.S. export laws and regulations.

______________________________________________
R-help at r-project.org<mailto:R-help at r-project.org> mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

This e-mail message may contain privileged and/or confidential information, and is intended to be received only by persons entitled
to receive such information. If you have received this e-mail in error, please notify the sender immediately. Please delete it and
all attachments from any servers, hard drives or any other media. Other use of this e-mail by you is strictly prohibited.

All e-mails and attachments sent and received are subject to monitoring, reading and archival by Monsanto, including its
subsidiaries. The recipient of this e-mail is solely responsible for checking for the presence of "Viruses" or other "Malware".
Monsanto, along with its subsidiaries, accepts no liability for any damage caused by any such code transmitted by or accompanying
this e-mail or any attachment.


The information contained in this email may be subject to the export control laws and regulations of the United States, potentially
including but not limited to the Export Administration Regulations (EAR) and sanctions regulations issued by the U.S. Department of
Treasury, Office of Foreign Asset Controls (OFAC).  As a recipient of this information you are obligated to comply with all
applicable U.S. export laws and regulations.

	[[alternative HTML version deleted]]



More information about the R-help mailing list