Create or melt list columns in data.frame.
Analogous function for nest
and unnest
in tidyr.
unnest_dt
will automatically remove other list-columns except for the
target list-columns (which would be unnested later). Also, squeeze_dt
is
designed to merge multiple columns into list column.
nest_dt(.data, ..., mcols = NULL, .name = "ndt")
unnest_dt(.data, ...)
squeeze_dt(.data, ..., .name = "ndt")
chop_dt(.data, ...)
unchop_dt(.data, ...)
data.table, nested or unnested
The variables for nest group(for nest_dt
),
columns to be nested(for squeeze_dt
and chop_dt
),
or column(s) to be unnested(for unnest_dt
).
Could recieve anything that select_dt
could receive.
Name-variable pairs in the list, form like
Character. The nested column name. Defaults to "ndt".
list(petal="^Pe",sepal="^Se")
, see example.
data.table, nested or unnested
In the nest_dt
, the data would be nested to a column named `ndt`,
which is short for nested data.table.
The squeeze_dt
would not remove the originial columns.
The unchop_dt
is the reverse operation of chop_dt
.
These functions are experiencing the experimental stage, especially
the unnest_dt
. If they don't work on some circumtances, try tidyr
package.
https://www.r-bloggers.com/much-faster-unnesting-with-data-table/
https://stackoverflow.com/questions/25430986/create-nested-data-tables-by-collapsing-rows-into-new-data-tables
# examples for nest_dt
# nest by which columns?
mtcars %>% nest_dt(cyl)
#> cyl ndt
#> <num> <list>
#> 1: 6 <data.table[7x10]>
#> 2: 4 <data.table[11x10]>
#> 3: 8 <data.table[14x10]>
mtcars %>% nest_dt("cyl")
#> cyl ndt
#> <num> <list>
#> 1: 6 <data.table[7x10]>
#> 2: 4 <data.table[11x10]>
#> 3: 8 <data.table[14x10]>
mtcars %>% nest_dt(cyl,vs)
#> cyl vs ndt
#> <num> <num> <list>
#> 1: 6 0 <data.table[3x9]>
#> 2: 4 1 <data.table[10x9]>
#> 3: 6 1 <data.table[4x9]>
#> 4: 8 0 <data.table[14x9]>
#> 5: 4 0 <data.table[1x9]>
mtcars %>% nest_dt(vs:am)
#> vs am ndt
#> <num> <num> <list>
#> 1: 0 1 <data.table[6x9]>
#> 2: 1 1 <data.table[7x9]>
#> 3: 1 0 <data.table[7x9]>
#> 4: 0 0 <data.table[12x9]>
mtcars %>% nest_dt("cyl|vs")
#> cyl vs ndt
#> <num> <num> <list>
#> 1: 6 0 <data.table[3x9]>
#> 2: 4 1 <data.table[10x9]>
#> 3: 6 1 <data.table[4x9]>
#> 4: 8 0 <data.table[14x9]>
#> 5: 4 0 <data.table[1x9]>
mtcars %>% nest_dt(c("cyl","vs"))
#> cyl vs ndt
#> <num> <num> <list>
#> 1: 6 0 <data.table[3x9]>
#> 2: 4 1 <data.table[10x9]>
#> 3: 6 1 <data.table[4x9]>
#> 4: 8 0 <data.table[14x9]>
#> 5: 4 0 <data.table[1x9]>
# change the nested column name
mtcars %>% nest_dt(cyl,.name = "data")
#> cyl data
#> <num> <list>
#> 1: 6 <data.table[7x10]>
#> 2: 4 <data.table[11x10]>
#> 3: 8 <data.table[14x10]>
# nest two columns directly
iris %>% nest_dt(mcols = list(petal="^Pe",sepal="^Se"))
#> Key: <Species>
#> Species petal sepal
#> <fctr> <list> <list>
#> 1: setosa <data.table[50x4]> <data.table[50x4]>
#> 2: versicolor <data.table[50x4]> <data.table[50x4]>
#> 3: virginica <data.table[50x4]> <data.table[50x4]>
# nest more flexibly
iris %>% nest_dt(mcols = list(ndt1 = 1:3,
ndt2 = "Pe",
ndt3 = Sepal.Length:Sepal.Width))
#> Key: <Species>
#> Species ndt1 ndt2 ndt3
#> <fctr> <list> <list> <list>
#> 1: setosa <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
#> 2: versicolor <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
#> 3: virginica <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
# examples for unnest_dt
# unnest which column?
mtcars %>% nest_dt("cyl|vs") %>%
unnest_dt(ndt)
#> cyl vs mpg disp hp drat wt qsec am gear
#> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#> 1: 6 0 21.0 160.0 110 3.90 2.620 16.46 1 4
#> 2: 6 0 21.0 160.0 110 3.90 2.875 17.02 1 4
#> 3: 6 0 19.7 145.0 175 3.62 2.770 15.50 1 5
#> 4: 4 1 22.8 108.0 93 3.85 2.320 18.61 1 4
#> 5: 4 1 24.4 146.7 62 3.69 3.190 20.00 0 4
#> 6: 4 1 22.8 140.8 95 3.92 3.150 22.90 0 4
#> 7: 4 1 32.4 78.7 66 4.08 2.200 19.47 1 4
#> 8: 4 1 30.4 75.7 52 4.93 1.615 18.52 1 4
#> 9: 4 1 33.9 71.1 65 4.22 1.835 19.90 1 4
#> 10: 4 1 21.5 120.1 97 3.70 2.465 20.01 0 3
#> 11: 4 1 27.3 79.0 66 4.08 1.935 18.90 1 4
#> 12: 4 1 30.4 95.1 113 3.77 1.513 16.90 1 5
#> 13: 4 1 21.4 121.0 109 4.11 2.780 18.60 1 4
#> 14: 6 1 21.4 258.0 110 3.08 3.215 19.44 0 3
#> 15: 6 1 18.1 225.0 105 2.76 3.460 20.22 0 3
#> 16: 6 1 19.2 167.6 123 3.92 3.440 18.30 0 4
#> 17: 6 1 17.8 167.6 123 3.92 3.440 18.90 0 4
#> 18: 8 0 18.7 360.0 175 3.15 3.440 17.02 0 3
#> 19: 8 0 14.3 360.0 245 3.21 3.570 15.84 0 3
#> 20: 8 0 16.4 275.8 180 3.07 4.070 17.40 0 3
#> 21: 8 0 17.3 275.8 180 3.07 3.730 17.60 0 3
#> 22: 8 0 15.2 275.8 180 3.07 3.780 18.00 0 3
#> 23: 8 0 10.4 472.0 205 2.93 5.250 17.98 0 3
#> 24: 8 0 10.4 460.0 215 3.00 5.424 17.82 0 3
#> 25: 8 0 14.7 440.0 230 3.23 5.345 17.42 0 3
#> 26: 8 0 15.5 318.0 150 2.76 3.520 16.87 0 3
#> 27: 8 0 15.2 304.0 150 3.15 3.435 17.30 0 3
#> 28: 8 0 13.3 350.0 245 3.73 3.840 15.41 0 3
#> 29: 8 0 19.2 400.0 175 3.08 3.845 17.05 0 3
#> 30: 8 0 15.8 351.0 264 4.22 3.170 14.50 1 5
#> 31: 8 0 15.0 301.0 335 3.54 3.570 14.60 1 5
#> 32: 4 0 26.0 120.3 91 4.43 2.140 16.70 1 5
#> cyl vs mpg disp hp drat wt qsec am gear
#> 1 variable(s) not shown: [carb <num>]
mtcars %>% nest_dt("cyl|vs") %>%
unnest_dt("ndt")
#> cyl vs mpg disp hp drat wt qsec am gear
#> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#> 1: 6 0 21.0 160.0 110 3.90 2.620 16.46 1 4
#> 2: 6 0 21.0 160.0 110 3.90 2.875 17.02 1 4
#> 3: 6 0 19.7 145.0 175 3.62 2.770 15.50 1 5
#> 4: 4 1 22.8 108.0 93 3.85 2.320 18.61 1 4
#> 5: 4 1 24.4 146.7 62 3.69 3.190 20.00 0 4
#> 6: 4 1 22.8 140.8 95 3.92 3.150 22.90 0 4
#> 7: 4 1 32.4 78.7 66 4.08 2.200 19.47 1 4
#> 8: 4 1 30.4 75.7 52 4.93 1.615 18.52 1 4
#> 9: 4 1 33.9 71.1 65 4.22 1.835 19.90 1 4
#> 10: 4 1 21.5 120.1 97 3.70 2.465 20.01 0 3
#> 11: 4 1 27.3 79.0 66 4.08 1.935 18.90 1 4
#> 12: 4 1 30.4 95.1 113 3.77 1.513 16.90 1 5
#> 13: 4 1 21.4 121.0 109 4.11 2.780 18.60 1 4
#> 14: 6 1 21.4 258.0 110 3.08 3.215 19.44 0 3
#> 15: 6 1 18.1 225.0 105 2.76 3.460 20.22 0 3
#> 16: 6 1 19.2 167.6 123 3.92 3.440 18.30 0 4
#> 17: 6 1 17.8 167.6 123 3.92 3.440 18.90 0 4
#> 18: 8 0 18.7 360.0 175 3.15 3.440 17.02 0 3
#> 19: 8 0 14.3 360.0 245 3.21 3.570 15.84 0 3
#> 20: 8 0 16.4 275.8 180 3.07 4.070 17.40 0 3
#> 21: 8 0 17.3 275.8 180 3.07 3.730 17.60 0 3
#> 22: 8 0 15.2 275.8 180 3.07 3.780 18.00 0 3
#> 23: 8 0 10.4 472.0 205 2.93 5.250 17.98 0 3
#> 24: 8 0 10.4 460.0 215 3.00 5.424 17.82 0 3
#> 25: 8 0 14.7 440.0 230 3.23 5.345 17.42 0 3
#> 26: 8 0 15.5 318.0 150 2.76 3.520 16.87 0 3
#> 27: 8 0 15.2 304.0 150 3.15 3.435 17.30 0 3
#> 28: 8 0 13.3 350.0 245 3.73 3.840 15.41 0 3
#> 29: 8 0 19.2 400.0 175 3.08 3.845 17.05 0 3
#> 30: 8 0 15.8 351.0 264 4.22 3.170 14.50 1 5
#> 31: 8 0 15.0 301.0 335 3.54 3.570 14.60 1 5
#> 32: 4 0 26.0 120.3 91 4.43 2.140 16.70 1 5
#> cyl vs mpg disp hp drat wt qsec am gear
#> 1 variable(s) not shown: [carb <num>]
df <- data.table(
a = list(c("a", "b"), "c"),
b = list(c(TRUE,TRUE),FALSE),
c = list(3,c(1,2)),
d = c(11, 22)
)
df
#> a b c d
#> <list> <list> <list> <num>
#> 1: a,b TRUE,TRUE 3 11
#> 2: c FALSE 1,2 22
df %>% unnest_dt(a)
#> d a
#> <num> <char>
#> 1: 11 a
#> 2: 11 b
#> 3: 22 c
df %>% unnest_dt(2)
#> d b
#> <num> <lgcl>
#> 1: 11 TRUE
#> 2: 11 TRUE
#> 3: 22 FALSE
df %>% unnest_dt("c")
#> d c
#> <num> <num>
#> 1: 11 3
#> 2: 22 1
#> 3: 22 2
df %>% unnest_dt(cols = names(df)[3])
#> d c
#> <num> <num>
#> 1: 11 3
#> 2: 22 1
#> 3: 22 2
# You can unnest multiple columns simultaneously
df %>% unnest_dt(1:3)
#> d a b c
#> <num> <char> <lgcl> <num>
#> 1: 11 a TRUE 3
#> 2: 11 b TRUE 1
#> 3: 22 c FALSE 2
df %>% unnest_dt(a,b,c)
#> d a b c
#> <num> <char> <lgcl> <num>
#> 1: 11 a TRUE 3
#> 2: 11 b TRUE 1
#> 3: 22 c FALSE 2
df %>% unnest_dt("a|b|c")
#> d a b c
#> <num> <char> <lgcl> <num>
#> 1: 11 a TRUE 3
#> 2: 11 b TRUE 1
#> 3: 22 c FALSE 2
# examples for squeeze_dt
# nest which columns?
iris %>% squeeze_dt(1:2)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species ndt
#> <num> <num> <num> <num> <fctr> <list>
#> 1: 5.1 3.5 1.4 0.2 setosa 5.1,3.5
#> 2: 4.9 3.0 1.4 0.2 setosa 4.9,3.0
#> 3: 4.7 3.2 1.3 0.2 setosa 4.7,3.2
#> 4: 4.6 3.1 1.5 0.2 setosa 4.6,3.1
#> 5: 5.0 3.6 1.4 0.2 setosa 5.0,3.6
#> ---
#> 146: 6.7 3.0 5.2 2.3 virginica 6.7,3.0
#> 147: 6.3 2.5 5.0 1.9 virginica 6.3,2.5
#> 148: 6.5 3.0 5.2 2.0 virginica 6.5,3.0
#> 149: 6.2 3.4 5.4 2.3 virginica 6.2,3.4
#> 150: 5.9 3.0 5.1 1.8 virginica 5.9,3.0
iris %>% squeeze_dt("Se")
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species ndt
#> <num> <num> <num> <num> <fctr> <list>
#> 1: 5.1 3.5 1.4 0.2 setosa 5.1,3.5
#> 2: 4.9 3.0 1.4 0.2 setosa 4.9,3.0
#> 3: 4.7 3.2 1.3 0.2 setosa 4.7,3.2
#> 4: 4.6 3.1 1.5 0.2 setosa 4.6,3.1
#> 5: 5.0 3.6 1.4 0.2 setosa 5.0,3.6
#> ---
#> 146: 6.7 3.0 5.2 2.3 virginica 6.7,3.0
#> 147: 6.3 2.5 5.0 1.9 virginica 6.3,2.5
#> 148: 6.5 3.0 5.2 2.0 virginica 6.5,3.0
#> 149: 6.2 3.4 5.4 2.3 virginica 6.2,3.4
#> 150: 5.9 3.0 5.1 1.8 virginica 5.9,3.0
iris %>% squeeze_dt(Sepal.Length:Petal.Width)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <num> <num> <num> <num> <fctr>
#> 1: 5.1 3.5 1.4 0.2 setosa
#> 2: 4.9 3.0 1.4 0.2 setosa
#> 3: 4.7 3.2 1.3 0.2 setosa
#> 4: 4.6 3.1 1.5 0.2 setosa
#> 5: 5.0 3.6 1.4 0.2 setosa
#> ---
#> 146: 6.7 3.0 5.2 2.3 virginica
#> 147: 6.3 2.5 5.0 1.9 virginica
#> 148: 6.5 3.0 5.2 2.0 virginica
#> 149: 6.2 3.4 5.4 2.3 virginica
#> 150: 5.9 3.0 5.1 1.8 virginica
#> 1 variable(s) not shown: [ndt <list>]
iris %>% squeeze_dt(1:2,.name = "data")
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species data
#> <num> <num> <num> <num> <fctr> <list>
#> 1: 5.1 3.5 1.4 0.2 setosa 5.1,3.5
#> 2: 4.9 3.0 1.4 0.2 setosa 4.9,3.0
#> 3: 4.7 3.2 1.3 0.2 setosa 4.7,3.2
#> 4: 4.6 3.1 1.5 0.2 setosa 4.6,3.1
#> 5: 5.0 3.6 1.4 0.2 setosa 5.0,3.6
#> ---
#> 146: 6.7 3.0 5.2 2.3 virginica 6.7,3.0
#> 147: 6.3 2.5 5.0 1.9 virginica 6.3,2.5
#> 148: 6.5 3.0 5.2 2.0 virginica 6.5,3.0
#> 149: 6.2 3.4 5.4 2.3 virginica 6.2,3.4
#> 150: 5.9 3.0 5.1 1.8 virginica 5.9,3.0
# examples for chop_dt
df <- data.table(x = c(1, 1, 1, 2, 2, 3), y = 1:6, z = 6:1)
df %>% chop_dt(y,z)
#> x y z
#> <num> <list> <list>
#> 1: 1 1,2,3 6,5,4
#> 2: 2 4,5 3,2
#> 3: 3 6 1
df %>% chop_dt(y,z) %>% unchop_dt(y,z)
#> x y z
#> <num> <int> <int>
#> 1: 1 1 6
#> 2: 1 2 5
#> 3: 1 3 4
#> 4: 2 4 3
#> 5: 2 5 2
#> 6: 3 6 1