let
adds new variables or modify existing variables. let_if
make the same thing conditionally. take
aggregates data or select
subset of the data by rows or columns. Both functions return
data.table
.
Add new variables: let(mtcars, new_var = 42, new_var2 = new_var*hp)
Filter data: take_if(mtcars, am==0)
Select variables: take(mtcars, am, vs, mpg)
Aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
Aggregate all non-grouping columns: take(mtcars, fun = mean, by = am)
let_if( data, i, ..., by, keyby, with = TRUE, nomatch = getOption("datatable.nomatch"), mult = "all", roll = FALSE, rollends = if (roll == "nearest") c(TRUE, TRUE) else if (roll >= 0) c(FALSE, TRUE) else c(TRUE, FALSE), which = FALSE, .SDcols, verbose = getOption("datatable.verbose"), allow.cartesian = getOption("datatable.allow.cartesian"), drop = NULL, on = NULL ) take_if( data, i, ..., by, keyby, with = TRUE, nomatch = getOption("datatable.nomatch"), mult = "all", roll = FALSE, rollends = if (roll == "nearest") c(TRUE, TRUE) else if (roll >= 0) c(FALSE, TRUE) else c(TRUE, FALSE), which = FALSE, .SDcols, verbose = getOption("datatable.verbose"), allow.cartesian = getOption("datatable.allow.cartesian"), drop = NULL, on = NULL, autoname = TRUE, fun = NULL ) take( data, ..., by, keyby, with = TRUE, nomatch = getOption("datatable.nomatch"), mult = "all", roll = FALSE, rollends = if (roll == "nearest") c(TRUE, TRUE) else if (roll >= 0) c(FALSE, TRUE) else c(TRUE, FALSE), which = FALSE, .SDcols, verbose = getOption("datatable.verbose"), allow.cartesian = getOption("datatable.allow.cartesian"), drop = NULL, on = NULL, autoname = TRUE, fun = NULL ) let( data, ..., by, keyby, with = TRUE, nomatch = getOption("datatable.nomatch"), mult = "all", roll = FALSE, rollends = if (roll == "nearest") c(TRUE, TRUE) else if (roll >= 0) c(FALSE, TRUE) else c(TRUE, FALSE), which = FALSE, .SDcols, verbose = getOption("datatable.verbose"), allow.cartesian = getOption("datatable.allow.cartesian"), drop = NULL, on = NULL ) # S3 method for default let( data, ..., by, keyby, with = TRUE, nomatch = getOption("datatable.nomatch"), mult = "all", roll = FALSE, rollends = if (roll == "nearest") c(TRUE, TRUE) else if (roll >= 0) c(FALSE, TRUE) else c(TRUE, FALSE), which = FALSE, .SDcols, verbose = getOption("datatable.verbose"), allow.cartesian = getOption("datatable.allow.cartesian"), drop = NULL, on = NULL ) sort_by(data, ..., na.last = FALSE)
data | data.table/data.frame data.frame will be automatically converted
to data.table. |
---|---|
i | integer/logical vector. Supposed to use to subset/conditional
modifications of |
... | List of variables or name-value pairs of summary/modifications
functions. The name will be the name of the variable in the result. In the
|
by | unquoted name of grouping variable of list of unquoted names of grouping variables. For details see data.table |
keyby | Same as |
with | logical. For details see data.table. |
nomatch | Same as nomatch in match. For details see data.table. |
mult | For details see data.table. |
roll | For details see data.table. |
rollends | For details see data.table. |
which | For details see data.table. |
.SDcols | Specifies the columns of x to be included in the special symbol .SD which stands for Subset of data.table. May be character column names or numeric positions. For details see data.table. |
verbose | logical. For details see data.table. |
allow.cartesian | For details see data.table. |
drop | For details see data.table. |
on | For details see data.table. |
autoname | logical. TRUE by default. Should we create names for unnamed expressions in |
fun | function which will be applied to all variables in |
na.last | logical. FALSE by default. If TRUE, missing values in the data are put last; if FALSE, they are put first. |
data.table. let
returns its result invisibly.
# examples form 'dplyr' package data(mtcars) # Newly created variables are available immediately mtcars %>% let( cyl2 = cyl * 2, cyl4 = cyl2 * 2 ) %>% head()#> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 cyl4 #> 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 12 24 #> 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 12 24 #> 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 8 16 #> 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 12 24 #> 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 16 32 #> 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 12 24# You can also use let() to remove variables and # modify existing variables mtcars %>% let( mpg = NULL, disp = disp * 0.0163871 # convert to litres ) %>% head()#> cyl disp hp drat wt qsec vs am gear carb #> 1: 6 2.621936 110 3.90 2.620 16.46 0 1 4 4 #> 2: 6 2.621936 110 3.90 2.875 17.02 0 1 4 4 #> 3: 4 1.769807 93 3.85 2.320 18.61 1 1 4 1 #> 4: 6 4.227872 110 3.08 3.215 19.44 1 0 3 1 #> 5: 8 5.899356 175 3.15 3.440 17.02 0 0 3 2 #> 6: 6 3.687098 105 2.76 3.460 20.22 1 0 3 1# window functions are useful for grouped computations mtcars %>% let(rank = rank(-mpg, ties.method = "min"), by = cyl) %>% head()#> mpg cyl disp hp drat wt qsec vs am gear carb rank #> 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2 #> 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2 #> 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 8 #> 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1 #> 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 2 #> 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 6#> mpg disp hp drat wt qsec vs am gear carb #> 1: 21.0 160 110 3.90 2.620 16.46 0 1 4 4 #> 2: 21.0 160 110 3.90 2.875 17.02 0 1 4 4 #> 3: 22.8 108 93 3.85 2.320 18.61 1 1 4 1 #> 4: 21.4 258 110 3.08 3.215 19.44 1 0 3 1 #> 5: 18.7 360 175 3.15 3.440 17.02 0 0 3 2 #> 6: 18.1 225 105 2.76 3.460 20.22 1 0 3 1#> mpg cyl disp hp drat wt qsec vs am gear carb displ_l #> 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2.621932 #> 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2.621932 #> 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 1.769804 #> 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 4.227866 #> 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 5.899347 #> 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 3.687092# keeps only the variables you create mtcars %>% take(displ_l = disp / 61.0237)#> displ_l #> 1: 2.621932 #> 2: 2.621932 #> 3: 1.769804 #> 4: 4.227866 #> 5: 5.899347 #> 6: 3.687092 #> 7: 5.899347 #> 8: 2.403984 #> 9: 2.307300 #> 10: 2.746474 #> 11: 2.746474 #> 12: 4.519556 #> 13: 4.519556 #> 14: 4.519556 #> 15: 7.734700 #> 16: 7.538055 #> 17: 7.210313 #> 18: 1.289663 #> 19: 1.240502 #> 20: 1.165121 #> 21: 1.968088 #> 22: 5.211090 #> 23: 4.981671 #> 24: 5.735477 #> 25: 6.554830 #> 26: 1.294579 #> 27: 1.971365 #> 28: 1.558411 #> 29: 5.751864 #> 30: 2.376126 #> 31: 4.932510 #> 32: 1.982836 #> displ_l# can refer to both contextual variables and variable names: var = 100 mtcars %>% let(cyl = cyl * var) %>% head()#> Error in cyl * var: non-numeric argument to binary operator# filter by condition mtcars %>% take_if(am==0)#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 2: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 3: 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 4: 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 5: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 6: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 7: 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 8: 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 9: 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 10: 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 11: 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 12: 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 13: 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> 14: 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 15: 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 16: 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 17: 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 18: 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 19: 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 2: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 3: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 4: 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1# A 'take' with summary functions applied without 'by' argument returns an aggregated data mtcars %>% take(mean = mean(disp), n = .N)#> mean n #> 1: 230.7219 32#> cyl mean n #> 1: 6 183.3143 7 #> 2: 4 105.1364 11 #> 3: 8 353.1000 14#> vsam mpg cyl disp hp drat wt qsec #> 1: 1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231 #> 2: 2 28.37143 4.000000 89.8000 80.57143 4.148571 2.028286 18.70000 #> 3: 0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250 #> gear carb #> 1: 4.076923 3.307692 #> 2: 4.142857 1.428571 #> 3: 3.000000 3.083333#> Error in `[.data.table`(data, , `:=`("mean_cyl", eval(var))): invalid type/length (closure/32) in vector allocation#> eval(var) #> 1: 6.1875#> Error in eval(lhs, parent.frame(), parent.frame()): object 'new_var' not found#> mean_cyl #> 1: 6.1875######################################## # examples from data.table dat = data.table( x=rep(c("b","a","c"), each=3), y=c(1,3,6), v=1:9 ) # basic row subset operations take_if(dat, 2) # 2nd row#> x y v #> 1: b 3 2take_if(dat, 3:2) # 3rd and 2nd row#> x y v #> 1: b 6 3 #> 2: b 3 2#> x y v #> 1: a 1 4 #> 2: a 3 5 #> 3: a 6 6 #> 4: b 1 1 #> 5: b 3 2 #> 6: b 6 3 #> 7: c 1 7 #> 8: c 3 8 #> 9: c 6 9take_if(dat, y>2) # all rows where dat$y > 2#> x y v #> 1: b 3 2 #> 2: b 6 3 #> 3: a 3 5 #> 4: a 6 6 #> 5: c 3 8 #> 6: c 6 9take_if(dat, y>2 & v>5) # compound logical expressions#> x y v #> 1: a 6 6 #> 2: c 3 8 #> 3: c 6 9take_if(dat, !2:4) # all rows other than 2:4#> x y v #> 1: b 1 1 #> 2: a 3 5 #> 3: a 6 6 #> 4: c 1 7 #> 5: c 3 8 #> 6: c 6 9take_if(dat, -(2:4)) # same#> x y v #> 1: b 1 1 #> 2: a 3 5 #> 3: a 6 6 #> 4: c 1 7 #> 5: c 3 8 #> 6: c 6 9# select|compute columns take(dat, v) # v column (as data.table)#> v #> 1: 1 #> 2: 2 #> 3: 3 #> 4: 4 #> 5: 5 #> 6: 6 #> 7: 7 #> 8: 8 #> 9: 9#> sum(v) #> 1: 45#> sv #> 1: 45take(dat, v, v*2) # return two column data.table, v and v*2#> v v * 2 #> 1: 1 2 #> 2: 2 4 #> 3: 3 6 #> 4: 4 8 #> 5: 5 10 #> 6: 6 12 #> 7: 7 14 #> 8: 8 16 #> 9: 9 18#> sum(v) #> 1: 5#> sv #> 1: 5#> x sum(v) #> 1: b 6 #> 2: a 15 #> 3: c 24#> x sum(v) #> 1: a 15 #> 2: b 6 #> 3: c 24#> x sum(v) #> 1: b 6 #> 2: c 24#> x sum(v) #> 1: b 6 #> 2: c 24# more on special symbols, see also ?"data.table::special-symbols" take_if(dat, .N) # last row#> x y v #> 1: c 6 9take(dat, .N) # total number of rows in DT#> .N #> 1: 9take(dat, .N, by=x) # number of rows in each group#> x .N #> 1: b 3 #> 2: a 3 #> 3: c 3take(dat, .I[1], by=x) # row number in DT corresponding to each group#> x .I[1] #> 1: b 1 #> 2: a 4 #> 3: c 7# add/update/delete by reference # [] at the end of expression is for autoprinting let(dat, grp = .GRP, by=x)[] # add a group counter column#> x y v grp #> 1: b 1 1 1 #> 2: b 3 2 1 #> 3: b 6 3 1 #> 4: a 1 4 2 #> 5: a 3 5 2 #> 6: a 6 6 2 #> 7: c 1 7 3 #> 8: c 3 8 3 #> 9: c 6 9 3let(dat, z = 42L)[] # add new column by reference#> x y v grp z #> 1: b 1 1 1 42 #> 2: b 3 2 1 42 #> 3: b 6 3 1 42 #> 4: a 1 4 2 42 #> 5: a 3 5 2 42 #> 6: a 6 6 2 42 #> 7: c 1 7 3 42 #> 8: c 3 8 3 42 #> 9: c 6 9 3 42let(dat, z = NULL)[] # remove column by reference#> x y v grp #> 1: b 1 1 1 #> 2: b 3 2 1 #> 3: b 6 3 1 #> 4: a 1 4 2 #> 5: a 3 5 2 #> 6: a 6 6 2 #> 7: c 1 7 3 #> 8: c 3 8 3 #> 9: c 6 9 3let_if(dat, x=="a", v = 42L)[] # subassign to existing v column by reference#> x y v grp #> 1: b 1 1 1 #> 2: b 3 2 1 #> 3: b 6 3 1 #> 4: a 1 42 2 #> 5: a 3 42 2 #> 6: a 6 42 2 #> 7: c 1 7 3 #> 8: c 3 8 3 #> 9: c 6 9 3let_if(dat, x=="b", v2 = 84L)[] # subassign to new column by reference (NA padded)#> x y v grp v2 #> 1: b 1 1 1 84 #> 2: b 3 2 1 84 #> 3: b 6 3 1 84 #> 4: a 1 42 2 NA #> 5: a 3 42 2 NA #> 6: a 6 42 2 NA #> 7: c 1 7 3 NA #> 8: c 3 8 3 NA #> 9: c 6 9 3 NA#> x y v grp v2 m #> 1: b 1 1 1 84 2 #> 2: b 3 2 1 84 2 #> 3: b 6 3 1 84 2 #> 4: a 1 42 2 NA 42 #> 5: a 3 42 2 NA 42 #> 6: a 6 42 2 NA 42 #> 7: c 1 7 3 NA 8 #> 8: c 3 8 3 NA 8 #> 9: c 6 9 3 NA 8# advanced usage dat = data.table(x=rep(c("b","a","c"), each=3), v=c(1,1,1,2,2,1,1,2,2), y=c(1,3,6), a=1:9, b=9:1) take(dat, sum(v), by=list(y%%2)) # expressions in by#> y sum(v) #> 1: 1 9 #> 2: 0 4#> bool sum(v) #> 1: 1 9 #> 2: 0 4take(dat, fun = sum, by=x) # sum of all (other) columns for each group#> x v y a b #> 1: b 3 10 6 24 #> 2: a 5 10 15 15 #> 3: c 5 10 24 6#> x y MySum MyMin MyMax #> 1: b 1 2 1 1 #> 2: b 0 1 1 1 #> 3: a 1 4 2 2 #> 4: a 0 1 1 1 #> 5: c 1 3 1 2 #> 6: c 0 2 2 2#> x seq #> 1: b 1 #> 2: b 2 #> 3: b 3 #> 4: b 4 #> 5: b 5 #> 6: b 6 #> 7: b 7 #> 8: b 8 #> 9: b 9 #> 10: a 4 #> 11: a 5 #> 12: a 6 #> 13: c 7 #> 14: c 6 #> 15: c 5 #> 16: c 4 #> 17: c 3#> x V1 #> 1: b 3 #> 2: a 5 #> 3: c 5#> x V1 #> 1: a 5 #> 2: c 5 #> 3: b 3