Analogous function for nest and unnest in tidyr. unnest will automatically remove other list-columns except for the target list-columns (which would be unnested later). Also, squeeze is designed to merge multiple columns into list column.

nest(.data, ..., mcols = NULL, .name = "ndt")

unnest(.data, ...)

squeeze(.data, ..., .name = "ndt")

chop(.data, ...)

unchop(.data, ...)

Arguments

.data

data.table, nested or unnested

...

The variables for nest group(for nest), columns to be nested(for squeeze and chop), or column(s) to be unnested(for unnest). Could recieve anything that select_dt could receive.

mcols

Name-variable pairs in the list, form like

.name

Character. The nested column name. Defaults to "ndt". list(petal="^Pe",sepal="^Se"), see example.

Value

data.table, nested or unnested

Details

In the nest, the data would be nested to a column named `ndt`, which is short for nested data.table.

The squeeze would not remove the originial columns.

The unchop is the reverse operation of chop.

These functions are experiencing the experimental stage, especially the unnest. If they don't work on some circumtances, try tidyr package.

References

https://www.r-bloggers.com/much-faster-unnesting-with-data-table/

https://stackoverflow.com/questions/25430986/create-nested-data-tables-by-collapsing-rows-into-new-data-tables

See also

Examples


mtcars = as.data.table(mtcars)
iris = as.data.table(iris)

# examples for nest

# nest by which columns?
 mtcars %>% nest(cyl)
#>      cyl                 ndt
#>    <num>              <list>
#> 1:     6  <data.table[7x10]>
#> 2:     4 <data.table[11x10]>
#> 3:     8 <data.table[14x10]>
 mtcars %>% nest("cyl")
#>      cyl                 ndt
#>    <num>              <list>
#> 1:     6  <data.table[7x10]>
#> 2:     4 <data.table[11x10]>
#> 3:     8 <data.table[14x10]>
 mtcars %>% nest(cyl,vs)
#>      cyl    vs                ndt
#>    <num> <num>             <list>
#> 1:     6     0  <data.table[3x9]>
#> 2:     4     1 <data.table[10x9]>
#> 3:     6     1  <data.table[4x9]>
#> 4:     8     0 <data.table[14x9]>
#> 5:     4     0  <data.table[1x9]>
 mtcars %>% nest(vs:am)
#>       vs    am                ndt
#>    <num> <num>             <list>
#> 1:     0     1  <data.table[6x9]>
#> 2:     1     1  <data.table[7x9]>
#> 3:     1     0  <data.table[7x9]>
#> 4:     0     0 <data.table[12x9]>
 mtcars %>% nest("cyl|vs")
#>      cyl    vs                ndt
#>    <num> <num>             <list>
#> 1:     6     0  <data.table[3x9]>
#> 2:     4     1 <data.table[10x9]>
#> 3:     6     1  <data.table[4x9]>
#> 4:     8     0 <data.table[14x9]>
#> 5:     4     0  <data.table[1x9]>
 mtcars %>% nest(c("cyl","vs"))
#>      cyl    vs                ndt
#>    <num> <num>             <list>
#> 1:     6     0  <data.table[3x9]>
#> 2:     4     1 <data.table[10x9]>
#> 3:     6     1  <data.table[4x9]>
#> 4:     8     0 <data.table[14x9]>
#> 5:     4     0  <data.table[1x9]>

# nest two columns directly
iris %>% nest(mcols = list(petal="^Pe",sepal="^Se"))
#> Key: <Species>
#>       Species              petal              sepal
#>        <fctr>             <list>             <list>
#> 1:     setosa <data.table[50x4]> <data.table[50x4]>
#> 2: versicolor <data.table[50x4]> <data.table[50x4]>
#> 3:  virginica <data.table[50x4]> <data.table[50x4]>

# nest more flexibly
iris %>% nest(mcols = list(ndt1 = 1:3,
  ndt2 = "Pe",
  ndt3 = Sepal.Length:Sepal.Width))
#> Key: <Species>
#>       Species               ndt1               ndt2               ndt3
#>        <fctr>             <list>             <list>             <list>
#> 1:     setosa <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
#> 2: versicolor <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
#> 3:  virginica <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>

# examples for unnest
# unnest which column?
 mtcars %>% nest("cyl|vs") %>%
   unnest(ndt)
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear  carb
#>     <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#>  1:     6     0  21.0 160.0   110  3.90 2.620 16.46     1     4     4
#>  2:     6     0  21.0 160.0   110  3.90 2.875 17.02     1     4     4
#>  3:     6     0  19.7 145.0   175  3.62 2.770 15.50     1     5     6
#>  4:     4     1  22.8 108.0    93  3.85 2.320 18.61     1     4     1
#>  5:     4     1  24.4 146.7    62  3.69 3.190 20.00     0     4     2
#>  6:     4     1  22.8 140.8    95  3.92 3.150 22.90     0     4     2
#>  7:     4     1  32.4  78.7    66  4.08 2.200 19.47     1     4     1
#>  8:     4     1  30.4  75.7    52  4.93 1.615 18.52     1     4     2
#>  9:     4     1  33.9  71.1    65  4.22 1.835 19.90     1     4     1
#> 10:     4     1  21.5 120.1    97  3.70 2.465 20.01     0     3     1
#> 11:     4     1  27.3  79.0    66  4.08 1.935 18.90     1     4     1
#> 12:     4     1  30.4  95.1   113  3.77 1.513 16.90     1     5     2
#> 13:     4     1  21.4 121.0   109  4.11 2.780 18.60     1     4     2
#> 14:     6     1  21.4 258.0   110  3.08 3.215 19.44     0     3     1
#> 15:     6     1  18.1 225.0   105  2.76 3.460 20.22     0     3     1
#> 16:     6     1  19.2 167.6   123  3.92 3.440 18.30     0     4     4
#> 17:     6     1  17.8 167.6   123  3.92 3.440 18.90     0     4     4
#> 18:     8     0  18.7 360.0   175  3.15 3.440 17.02     0     3     2
#> 19:     8     0  14.3 360.0   245  3.21 3.570 15.84     0     3     4
#> 20:     8     0  16.4 275.8   180  3.07 4.070 17.40     0     3     3
#> 21:     8     0  17.3 275.8   180  3.07 3.730 17.60     0     3     3
#> 22:     8     0  15.2 275.8   180  3.07 3.780 18.00     0     3     3
#> 23:     8     0  10.4 472.0   205  2.93 5.250 17.98     0     3     4
#> 24:     8     0  10.4 460.0   215  3.00 5.424 17.82     0     3     4
#> 25:     8     0  14.7 440.0   230  3.23 5.345 17.42     0     3     4
#> 26:     8     0  15.5 318.0   150  2.76 3.520 16.87     0     3     2
#> 27:     8     0  15.2 304.0   150  3.15 3.435 17.30     0     3     2
#> 28:     8     0  13.3 350.0   245  3.73 3.840 15.41     0     3     4
#> 29:     8     0  19.2 400.0   175  3.08 3.845 17.05     0     3     2
#> 30:     8     0  15.8 351.0   264  4.22 3.170 14.50     1     5     4
#> 31:     8     0  15.0 301.0   335  3.54 3.570 14.60     1     5     8
#> 32:     4     0  26.0 120.3    91  4.43 2.140 16.70     1     5     2
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear  carb
 mtcars %>% nest("cyl|vs") %>%
   unnest("ndt")
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear  carb
#>     <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#>  1:     6     0  21.0 160.0   110  3.90 2.620 16.46     1     4     4
#>  2:     6     0  21.0 160.0   110  3.90 2.875 17.02     1     4     4
#>  3:     6     0  19.7 145.0   175  3.62 2.770 15.50     1     5     6
#>  4:     4     1  22.8 108.0    93  3.85 2.320 18.61     1     4     1
#>  5:     4     1  24.4 146.7    62  3.69 3.190 20.00     0     4     2
#>  6:     4     1  22.8 140.8    95  3.92 3.150 22.90     0     4     2
#>  7:     4     1  32.4  78.7    66  4.08 2.200 19.47     1     4     1
#>  8:     4     1  30.4  75.7    52  4.93 1.615 18.52     1     4     2
#>  9:     4     1  33.9  71.1    65  4.22 1.835 19.90     1     4     1
#> 10:     4     1  21.5 120.1    97  3.70 2.465 20.01     0     3     1
#> 11:     4     1  27.3  79.0    66  4.08 1.935 18.90     1     4     1
#> 12:     4     1  30.4  95.1   113  3.77 1.513 16.90     1     5     2
#> 13:     4     1  21.4 121.0   109  4.11 2.780 18.60     1     4     2
#> 14:     6     1  21.4 258.0   110  3.08 3.215 19.44     0     3     1
#> 15:     6     1  18.1 225.0   105  2.76 3.460 20.22     0     3     1
#> 16:     6     1  19.2 167.6   123  3.92 3.440 18.30     0     4     4
#> 17:     6     1  17.8 167.6   123  3.92 3.440 18.90     0     4     4
#> 18:     8     0  18.7 360.0   175  3.15 3.440 17.02     0     3     2
#> 19:     8     0  14.3 360.0   245  3.21 3.570 15.84     0     3     4
#> 20:     8     0  16.4 275.8   180  3.07 4.070 17.40     0     3     3
#> 21:     8     0  17.3 275.8   180  3.07 3.730 17.60     0     3     3
#> 22:     8     0  15.2 275.8   180  3.07 3.780 18.00     0     3     3
#> 23:     8     0  10.4 472.0   205  2.93 5.250 17.98     0     3     4
#> 24:     8     0  10.4 460.0   215  3.00 5.424 17.82     0     3     4
#> 25:     8     0  14.7 440.0   230  3.23 5.345 17.42     0     3     4
#> 26:     8     0  15.5 318.0   150  2.76 3.520 16.87     0     3     2
#> 27:     8     0  15.2 304.0   150  3.15 3.435 17.30     0     3     2
#> 28:     8     0  13.3 350.0   245  3.73 3.840 15.41     0     3     4
#> 29:     8     0  19.2 400.0   175  3.08 3.845 17.05     0     3     2
#> 30:     8     0  15.8 351.0   264  4.22 3.170 14.50     1     5     4
#> 31:     8     0  15.0 301.0   335  3.54 3.570 14.60     1     5     8
#> 32:     4     0  26.0 120.3    91  4.43 2.140 16.70     1     5     2
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear  carb

df <- data.table(
  a = list(c("a", "b"), "c"),
  b = list(c(TRUE,TRUE),FALSE),
  c = list(3,c(1,2)),
  d = c(11, 22)
)

df
#>         a         b      c     d
#>    <list>    <list> <list> <num>
#> 1:    a,b TRUE,TRUE      3    11
#> 2:      c     FALSE    1,2    22
df %>% unnest(a)
#>        d      a
#>    <num> <char>
#> 1:    11      a
#> 2:    11      b
#> 3:    22      c
df %>% unnest(2)
#>        d      b
#>    <num> <lgcl>
#> 1:    11   TRUE
#> 2:    11   TRUE
#> 3:    22  FALSE
df %>% unnest("c")
#>        d     c
#>    <num> <num>
#> 1:    11     3
#> 2:    22     1
#> 3:    22     2
df %>% unnest(cols = names(df)[3])
#>        d     c
#>    <num> <num>
#> 1:    11     3
#> 2:    22     1
#> 3:    22     2

# You can unnest multiple columns simultaneously
df %>% unnest(1:3)
#> Key: <d>
#>        d      a      b     c
#>    <num> <char> <lgcl> <num>
#> 1:    11      a   TRUE     3
#> 2:    11      a   TRUE     3
#> 3:    11      b   TRUE     3
#> 4:    11      b   TRUE     3
#> 5:    22      c  FALSE     1
#> 6:    22      c  FALSE     2
df %>% unnest(a,b,c)
#> Key: <d>
#>        d      a      b     c
#>    <num> <char> <lgcl> <num>
#> 1:    11      a   TRUE     3
#> 2:    11      a   TRUE     3
#> 3:    11      b   TRUE     3
#> 4:    11      b   TRUE     3
#> 5:    22      c  FALSE     1
#> 6:    22      c  FALSE     2
df %>% unnest("a|b|c")
#> Key: <d>
#>        d      a      b     c
#>    <num> <char> <lgcl> <num>
#> 1:    11      a   TRUE     3
#> 2:    11      a   TRUE     3
#> 3:    11      b   TRUE     3
#> 4:    11      b   TRUE     3
#> 5:    22      c  FALSE     1
#> 6:    22      c  FALSE     2

# examples for squeeze
# nest which columns?
iris %>% squeeze(1:2)
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species     ndt
#>             <num>       <num>        <num>       <num>    <fctr>  <list>
#>   1:          5.1         3.5          1.4         0.2    setosa 5.1,3.5
#>   2:          4.9         3.0          1.4         0.2    setosa 4.9,3.0
#>   3:          4.7         3.2          1.3         0.2    setosa 4.7,3.2
#>   4:          4.6         3.1          1.5         0.2    setosa 4.6,3.1
#>   5:          5.0         3.6          1.4         0.2    setosa 5.0,3.6
#>  ---                                                                    
#> 146:          6.7         3.0          5.2         2.3 virginica 6.7,3.0
#> 147:          6.3         2.5          5.0         1.9 virginica 6.3,2.5
#> 148:          6.5         3.0          5.2         2.0 virginica 6.5,3.0
#> 149:          6.2         3.4          5.4         2.3 virginica 6.2,3.4
#> 150:          5.9         3.0          5.1         1.8 virginica 5.9,3.0
iris %>% squeeze("Se")
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species     ndt
#>             <num>       <num>        <num>       <num>    <fctr>  <list>
#>   1:          5.1         3.5          1.4         0.2    setosa 5.1,3.5
#>   2:          4.9         3.0          1.4         0.2    setosa 4.9,3.0
#>   3:          4.7         3.2          1.3         0.2    setosa 4.7,3.2
#>   4:          4.6         3.1          1.5         0.2    setosa 4.6,3.1
#>   5:          5.0         3.6          1.4         0.2    setosa 5.0,3.6
#>  ---                                                                    
#> 146:          6.7         3.0          5.2         2.3 virginica 6.7,3.0
#> 147:          6.3         2.5          5.0         1.9 virginica 6.3,2.5
#> 148:          6.5         3.0          5.2         2.0 virginica 6.5,3.0
#> 149:          6.2         3.4          5.4         2.3 virginica 6.2,3.4
#> 150:          5.9         3.0          5.1         1.8 virginica 5.9,3.0
iris %>% squeeze(Sepal.Length:Petal.Width)
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species     ndt
#>             <num>       <num>        <num>       <num>    <fctr>  <list>
#>   1:          5.1         3.5          1.4         0.2    setosa 5.1,3.5
#>   2:          4.9         3.0          1.4         0.2    setosa 4.9,3.0
#>   3:          4.7         3.2          1.3         0.2    setosa 4.7,3.2
#>   4:          4.6         3.1          1.5         0.2    setosa 4.6,3.1
#>   5:          5.0         3.6          1.4         0.2    setosa 5.0,3.6
#>  ---                                                                    
#> 146:          6.7         3.0          5.2         2.3 virginica 6.7,3.0
#> 147:          6.3         2.5          5.0         1.9 virginica 6.3,2.5
#> 148:          6.5         3.0          5.2         2.0 virginica 6.5,3.0
#> 149:          6.2         3.4          5.4         2.3 virginica 6.2,3.4
#> 150:          5.9         3.0          5.1         1.8 virginica 5.9,3.0

# examples for chop
df <- data.table(x = c(1, 1, 1, 2, 2, 3), y = 1:6, z = 6:1)
df %>% chop(y,z)
#>        x      y      z
#>    <num> <list> <list>
#> 1:     1  1,2,3  6,5,4
#> 2:     2    4,5    3,2
#> 3:     3      6      1
df %>% chop(y,z) %>% unchop(y,z)
#>        x     y     z
#>    <num> <int> <int>
#> 1:     1     1     6
#> 2:     1     2     5
#> 3:     1     3     4
#> 4:     2     4     3
#> 5:     2     5     2
#> 6:     3     6     1