Carry out data manipulation within specified groups.
group_dt(.data, by = NULL, ...)
rowwise_dt(.data, ...)
data.table
If you want to use summarise_dt
and mutate_dt
in
group_dt
, it is better to use the "by" parameter in those functions,
that would be much faster because you don't have to use .SD
(which takes
extra time to copy).
https://stackoverflow.com/questions/36802385/use-by-each-row-for-data-table
iris %>% group_dt(by = Species,slice_dt(1:2))
#> Species Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <fctr> <num> <num> <num> <num>
#> 1: setosa 5.1 3.5 1.4 0.2
#> 2: setosa 4.9 3.0 1.4 0.2
#> 3: versicolor 7.0 3.2 4.7 1.4
#> 4: versicolor 6.4 3.2 4.5 1.5
#> 5: virginica 6.3 3.3 6.0 2.5
#> 6: virginica 5.8 2.7 5.1 1.9
iris %>% group_dt(Species,filter_dt(Sepal.Length == max(Sepal.Length)))
#> Species Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <fctr> <num> <num> <num> <num>
#> 1: setosa 5.8 4.0 1.2 0.2
#> 2: versicolor 7.0 3.2 4.7 1.4
#> 3: virginica 7.9 3.8 6.4 2.0
iris %>% group_dt(Species,summarise_dt(new = max(Sepal.Length)))
#> Species new
#> <fctr> <num>
#> 1: setosa 5.8
#> 2: versicolor 7.0
#> 3: virginica 7.9
# you can pipe in the `group_dt`
iris %>% group_dt(Species,
mutate_dt(max= max(Sepal.Length)) %>%
summarise_dt(sum=sum(Sepal.Length)))
#> Species sum
#> <fctr> <num>
#> 1: setosa 250.3
#> 2: versicolor 296.8
#> 3: virginica 329.4
# for users familiar with data.table, you can work on .SD directly
# following codes get the first and last row from each group
iris %>%
group_dt(
by = Species,
rbind(.SD[1],.SD[.N])
)
#> Species Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <fctr> <num> <num> <num> <num>
#> 1: setosa 5.1 3.5 1.4 0.2
#> 2: setosa 5.0 3.3 1.4 0.2
#> 3: versicolor 7.0 3.2 4.7 1.4
#> 4: versicolor 5.7 2.8 4.1 1.3
#> 5: virginica 6.3 3.3 6.0 2.5
#> 6: virginica 5.9 3.0 5.1 1.8
#' # for summarise_dt, you can use "by" to calculate within the group
mtcars %>%
summarise_dt(
disp = mean(disp),
hp = mean(hp),
by = cyl
)
#> cyl disp hp
#> <num> <num> <num>
#> 1: 6 183.3143 122.28571
#> 2: 4 105.1364 82.63636
#> 3: 8 353.1000 209.21429
# but you could also, of course, use group_dt
mtcars %>%
group_dt(by =.(vs,am),
summarise_dt(avg = mean(mpg)))
#> vs am avg
#> <num> <num> <num>
#> 1: 0 1 19.75000
#> 2: 1 1 28.37143
#> 3: 1 0 20.74286
#> 4: 0 0 15.05000
# and list of variables could also be used
mtcars %>%
group_dt(by =list(vs,am),
summarise_dt(avg = mean(mpg)))
#> vs am avg
#> <num> <num> <num>
#> 1: 0 1 19.75000
#> 2: 1 1 28.37143
#> 3: 1 0 20.74286
#> 4: 0 0 15.05000
# examples for `rowwise_dt`
df <- data.table(x = 1:2, y = 3:4, z = 4:5)
df %>% mutate_dt(m = mean(c(x, y, z)))
#> x y z m
#> <int> <int> <int> <num>
#> 1: 1 3 4 3.166667
#> 2: 2 4 5 3.166667
df %>% rowwise_dt(
mutate_dt(m = mean(c(x, y, z)))
)
#> x y z m
#> <int> <int> <int> <num>
#> 1: 1 3 4 2.666667
#> 2: 2 4 5 3.666667