R: ranking variable per trial according to time column

Question

My data looks like this:

Subject  Trial  Task  Time  Fixation
..
1        1       2    1      0.335
1        1       2    456    NA
1        1       2    765    0.165
1        1       2    967    0.445
..
2        3       1    1      0.665
2        3       1    300    0.556
2        3       1    570    NA
2        3       1    900    NA
..
15       5       3    1      0.766
15       5       3    567    0.254
15       5       3    765    0.167
15       5       3    1465   NA
..

I want to create a column FixationID where I want to rank every Fixation per Trial according to Time column (1,2,3,4..). Time column shows time course in milliseconds for every trial and every Trial starts with 1. Trials have different lengths.

I want my data to look like this:

Subject  Trial  Task  Time  Fixation FixationID
..
1        1       2    1      0.335    1
1        1       2    456    NA       NA
1        1       2    765    0.165    2
1        1       2    967    0.445    3
..
2        3       1    1      0.665    1
2        3       1    300    0.556    2
2        3       1    570    NA       NA
2        3       1    900    NA       NA
..
15       5       3    1      0.766    1
15       5       3    567    0.254    2
15       5       3    765    0.167    3
15       5       3    1465   NA       NA
..

I tried

library(data.table)
setDT(mydata)[!is.na(Fixation), FixID := 
              seq_len(.N)[order(Time)], by = Trial]

but what I get is ranking 1,16,31,45,57.. for my Subject 1 Trial 1. I want 1,2,3,4,5..

Can anyone help me with this?

Excerpt from my data:

structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L), Trial = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L), Task = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), Time = c(1L, 385L, 571L, 638L, 951L, 
1020L, 1349L, 1401L, 1661L, 1706L, 2042L, 2067L, 2322L, 2375L, 
2540L, 2660L, 2686L, 3108L, 3172L, 3423L, 3462L, 3845L, 3870L, 
3969L, 4099L, 4132L, 1L, 471L, 513L, 697L), Fixation = c(0.383, 
0.185, NA, 0.312, NA, 0.328, NA, 0.259, NA, 0.335, NA, 0.254, 
NA, 0.164, 0.119, NA, 0.421, NA, 0.25, NA, 0.382, NA, 0.0979999999999999, 
0.129, NA, 0.335, 0.469, NA, 0.183, NA)), .Names = c("Subject", 
"Trial", "Task", "Time", "Fixation"), row.names = c(NA, 30L), class = "data.frame")

Nice clear presentation of the problem! – Gregor Thomas Mar 12 '20 at 15:39 — Gregor Thomas, Mar 12 '20 at 15:39

score 1 · Answer 1 · answered Mar 12 '20 at 15:30

What about this:

library(data.table)
setDT(mydata)
mydata[!is.na(Fixation), FixID := frank(Time), by = Trial]

head(mydata, 10)
    Subject Trial Task Time Fixation FixID
 1:       1     1    2    1    0.383     1
 2:       1     1    2  385    0.185     2
 3:       1     1    2  571       NA    NA
 4:       1     1    2  638    0.312     3
 5:       1     1    2  951       NA    NA
 6:       1     1    2 1020    0.328     4
 7:       1     1    2 1349       NA    NA
 8:       1     1    2 1401    0.259     5
 9:       1     1    2 1661       NA    NA
10:       1     1    2 1706    0.335     6

tail(mydata, 10)
    Subject Trial Task Time Fixation FixID
 1:       1     1    2 3462    0.382    12
 2:       1     1    2 3845       NA    NA
 3:       1     1    2 3870    0.098    13
 4:       1     1    2 3969    0.129    14
 5:       1     1    2 4099       NA    NA
 6:       1     1    2 4132    0.335    15
 7:       1     2    2    1    0.469     1
 8:       1     2    2  471       NA    NA
 9:       1     2    2  513    0.183     2
10:       1     2    2  697       NA    NA

score 0 · Accepted Answer · answered Mar 12 '20 at 15:28

Using ave on as.logical(Fixation) and @josliber's NA-ignoring cumsum code.

mydata$FixationID <- 
  with(mydata, ave(as.logical(Fixation), Subject, Trial, FUN=function(x) 
    cumsum(ifelse(is.na(x), 0, x)) + x*0))

Result

mydata
#    Subject Trial task Time Fixation FixationID
# 1        1     1    1    1    0.596          1
# 10       1     1    1  500    0.016          2
# 19       1     1    1  512       NA         NA
# 28       1     1    1  524       NA         NA
# 4        1     2    2    1    0.688          1
# 13       1     2    2  501       NA         NA
# 22       1     2    2  513       NA         NA
# 31       1     2    2  525       NA         NA
# 7        1     3    3    1    0.582          1
# 16       1     3    3  502       NA         NA
# 25       1     3    3  514    0.369          2
# 34       1     3    3  526    0.847          3
# 2        2     1    1    1       NA         NA
# 11       2     1    1  503    0.779          1
# 20       2     1    1  515    0.950          2
# 29       2     1    1  527    0.304          3
# 5        2     2    2    1    0.158          1
# 14       2     2    2  504    0.281          2
# 23       2     2    2  516    0.360          3
# 32       2     2    2  528    0.535          4
# 8        2     3    3    1       NA         NA
# 17       2     3    3  505    0.717          1
# 26       2     3    3  517       NA         NA
# 35       2     3    3  529    0.959          2
# 3        3     1    1    1    0.174          1
# 12       3     1    1  506    0.278          2
# 21       3     1    1  518    0.784          3
# 30       3     1    1  530       NA         NA
# 6        3     2    2    1    0.439          1
# 15       3     2    2  507    0.857          2
# 24       3     2    2  519       NA         NA
# 33       3     2    2  531    0.019          3
# 9        3     3    3    1    0.175          1
# 18       3     3    3  508    0.314          2
# 27       3     3    3  520       NA         NA
# 36       3     3    3  532    0.845          3

Data

mydata <- structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), Trial = c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L), task = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 
1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
), Time = c(1, 500, 512, 524, 1, 501, 513, 525, 1, 502, 514, 
526, 1, 503, 515, 527, 1, 504, 516, 528, 1, 505, 517, 529, 1, 
506, 518, 530, 1, 507, 519, 531, 1, 508, 520, 532), Fixation = c(0.596, 
0.016, NA, NA, 0.688, NA, NA, NA, 0.582, NA, 0.369, 0.847, NA, 
0.779, 0.95, 0.304, 0.158, 0.281, 0.36, 0.535, NA, 0.717, NA, 
0.959, 0.174, 0.278, 0.784, NA, 0.439, 0.857, NA, 0.019, 0.175, 
0.314, NA, 0.845)), row.names = c(1L, 10L, 19L, 28L, 4L, 13L, 
22L, 31L, 7L, 16L, 25L, 34L, 2L, 11L, 20L, 29L, 5L, 14L, 23L, 
32L, 8L, 17L, 26L, 35L, 3L, 12L, 21L, 30L, 6L, 15L, 24L, 33L, 
9L, 18L, 27L, 36L), class = "data.frame")

score 0 · Answer 3 · answered Mar 13 '20 at 00:49

0

Here is another option which should be fast:

setDT(mydata)[!is.na(Fixation), FixID := .SD[order(Trial, Time), rowid(Trial)]]
mydata

answered Mar 13 '20 at 00:49

chinsoon12

25,005
4
25
35

R: ranking variable per trial according to time column

3 Answers3

Result