Create or melt list columns in data.frame.

Analogous function for nest and unnest in tidyr. unnest_dt will automatically remove other list-columns except for the target list-columns (which would be unnested later). Also, squeeze_dt is designed to merge multiple columns into list column.

nest_dt(.data, ..., mcols = NULL, .name = "ndt")

unnest_dt(.data, ...)

squeeze_dt(.data, ..., .name = "ndt")

chop_dt(.data, ...)

unchop_dt(.data, ...)

Arguments

.data

data.table, nested or unnested

...

The variables for nest group(for nest_dt), columns to be nested(for squeeze_dt and chop_dt), or column(s) to be unnested(for unnest_dt). Could recieve anything that select_dt could receive.

mcols

Name-variable pairs in the list, form like

.name

Character. The nested column name. Defaults to "ndt". list(petal="^Pe",sepal="^Se"), see example.

Value

data.table, nested or unnested

Details

In the nest_dt, the data would be nested to a column named `ndt`, which is short for nested data.table.

The squeeze_dt would not remove the originial columns.

The unchop_dt is the reverse operation of chop_dt.

These functions are experiencing the experimental stage, especially the unnest_dt. If they don't work on some circumtances, try tidyr package.

References

https://www.r-bloggers.com/much-faster-unnesting-with-data-table/

https://stackoverflow.com/questions/25430986/create-nested-data-tables-by-collapsing-rows-into-new-data-tables

See also

Examples


# examples for nest_dt
# nest by which columns?
 mtcars %>% nest_dt(cyl)
#>      cyl                 ndt
#>    <num>              <list>
#> 1:     6  <data.table[7x10]>
#> 2:     4 <data.table[11x10]>
#> 3:     8 <data.table[14x10]>
 mtcars %>% nest_dt("cyl")
#>      cyl                 ndt
#>    <num>              <list>
#> 1:     6  <data.table[7x10]>
#> 2:     4 <data.table[11x10]>
#> 3:     8 <data.table[14x10]>
 mtcars %>% nest_dt(cyl,vs)
#>      cyl    vs                ndt
#>    <num> <num>             <list>
#> 1:     6     0  <data.table[3x9]>
#> 2:     4     1 <data.table[10x9]>
#> 3:     6     1  <data.table[4x9]>
#> 4:     8     0 <data.table[14x9]>
#> 5:     4     0  <data.table[1x9]>
 mtcars %>% nest_dt(vs:am)
#>       vs    am                ndt
#>    <num> <num>             <list>
#> 1:     0     1  <data.table[6x9]>
#> 2:     1     1  <data.table[7x9]>
#> 3:     1     0  <data.table[7x9]>
#> 4:     0     0 <data.table[12x9]>
 mtcars %>% nest_dt("cyl|vs")
#>      cyl    vs                ndt
#>    <num> <num>             <list>
#> 1:     6     0  <data.table[3x9]>
#> 2:     4     1 <data.table[10x9]>
#> 3:     6     1  <data.table[4x9]>
#> 4:     8     0 <data.table[14x9]>
#> 5:     4     0  <data.table[1x9]>
 mtcars %>% nest_dt(c("cyl","vs"))
#>      cyl    vs                ndt
#>    <num> <num>             <list>
#> 1:     6     0  <data.table[3x9]>
#> 2:     4     1 <data.table[10x9]>
#> 3:     6     1  <data.table[4x9]>
#> 4:     8     0 <data.table[14x9]>
#> 5:     4     0  <data.table[1x9]>

 # change the nested column name
 mtcars %>% nest_dt(cyl,.name = "data")
#>      cyl                data
#>    <num>              <list>
#> 1:     6  <data.table[7x10]>
#> 2:     4 <data.table[11x10]>
#> 3:     8 <data.table[14x10]>

# nest two columns directly
iris %>% nest_dt(mcols = list(petal="^Pe",sepal="^Se"))
#> Key: <Species>
#>       Species              petal              sepal
#>        <fctr>             <list>             <list>
#> 1:     setosa <data.table[50x4]> <data.table[50x4]>
#> 2: versicolor <data.table[50x4]> <data.table[50x4]>
#> 3:  virginica <data.table[50x4]> <data.table[50x4]>

# nest more flexibly
iris %>% nest_dt(mcols = list(ndt1 = 1:3,
  ndt2 = "Pe",
  ndt3 = Sepal.Length:Sepal.Width))
#> Key: <Species>
#>       Species               ndt1               ndt2               ndt3
#>        <fctr>             <list>             <list>             <list>
#> 1:     setosa <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
#> 2: versicolor <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>
#> 3:  virginica <data.table[50x4]> <data.table[50x4]> <data.table[50x4]>

# examples for unnest_dt
# unnest which column?
 mtcars %>% nest_dt("cyl|vs") %>%
   unnest_dt(ndt)
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear
#>     <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#>  1:     6     0  21.0 160.0   110  3.90 2.620 16.46     1     4
#>  2:     6     0  21.0 160.0   110  3.90 2.875 17.02     1     4
#>  3:     6     0  19.7 145.0   175  3.62 2.770 15.50     1     5
#>  4:     4     1  22.8 108.0    93  3.85 2.320 18.61     1     4
#>  5:     4     1  24.4 146.7    62  3.69 3.190 20.00     0     4
#>  6:     4     1  22.8 140.8    95  3.92 3.150 22.90     0     4
#>  7:     4     1  32.4  78.7    66  4.08 2.200 19.47     1     4
#>  8:     4     1  30.4  75.7    52  4.93 1.615 18.52     1     4
#>  9:     4     1  33.9  71.1    65  4.22 1.835 19.90     1     4
#> 10:     4     1  21.5 120.1    97  3.70 2.465 20.01     0     3
#> 11:     4     1  27.3  79.0    66  4.08 1.935 18.90     1     4
#> 12:     4     1  30.4  95.1   113  3.77 1.513 16.90     1     5
#> 13:     4     1  21.4 121.0   109  4.11 2.780 18.60     1     4
#> 14:     6     1  21.4 258.0   110  3.08 3.215 19.44     0     3
#> 15:     6     1  18.1 225.0   105  2.76 3.460 20.22     0     3
#> 16:     6     1  19.2 167.6   123  3.92 3.440 18.30     0     4
#> 17:     6     1  17.8 167.6   123  3.92 3.440 18.90     0     4
#> 18:     8     0  18.7 360.0   175  3.15 3.440 17.02     0     3
#> 19:     8     0  14.3 360.0   245  3.21 3.570 15.84     0     3
#> 20:     8     0  16.4 275.8   180  3.07 4.070 17.40     0     3
#> 21:     8     0  17.3 275.8   180  3.07 3.730 17.60     0     3
#> 22:     8     0  15.2 275.8   180  3.07 3.780 18.00     0     3
#> 23:     8     0  10.4 472.0   205  2.93 5.250 17.98     0     3
#> 24:     8     0  10.4 460.0   215  3.00 5.424 17.82     0     3
#> 25:     8     0  14.7 440.0   230  3.23 5.345 17.42     0     3
#> 26:     8     0  15.5 318.0   150  2.76 3.520 16.87     0     3
#> 27:     8     0  15.2 304.0   150  3.15 3.435 17.30     0     3
#> 28:     8     0  13.3 350.0   245  3.73 3.840 15.41     0     3
#> 29:     8     0  19.2 400.0   175  3.08 3.845 17.05     0     3
#> 30:     8     0  15.8 351.0   264  4.22 3.170 14.50     1     5
#> 31:     8     0  15.0 301.0   335  3.54 3.570 14.60     1     5
#> 32:     4     0  26.0 120.3    91  4.43 2.140 16.70     1     5
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear
#> 1 variable not shown: [carb <num>]
 mtcars %>% nest_dt("cyl|vs") %>%
   unnest_dt("ndt")
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear
#>     <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#>  1:     6     0  21.0 160.0   110  3.90 2.620 16.46     1     4
#>  2:     6     0  21.0 160.0   110  3.90 2.875 17.02     1     4
#>  3:     6     0  19.7 145.0   175  3.62 2.770 15.50     1     5
#>  4:     4     1  22.8 108.0    93  3.85 2.320 18.61     1     4
#>  5:     4     1  24.4 146.7    62  3.69 3.190 20.00     0     4
#>  6:     4     1  22.8 140.8    95  3.92 3.150 22.90     0     4
#>  7:     4     1  32.4  78.7    66  4.08 2.200 19.47     1     4
#>  8:     4     1  30.4  75.7    52  4.93 1.615 18.52     1     4
#>  9:     4     1  33.9  71.1    65  4.22 1.835 19.90     1     4
#> 10:     4     1  21.5 120.1    97  3.70 2.465 20.01     0     3
#> 11:     4     1  27.3  79.0    66  4.08 1.935 18.90     1     4
#> 12:     4     1  30.4  95.1   113  3.77 1.513 16.90     1     5
#> 13:     4     1  21.4 121.0   109  4.11 2.780 18.60     1     4
#> 14:     6     1  21.4 258.0   110  3.08 3.215 19.44     0     3
#> 15:     6     1  18.1 225.0   105  2.76 3.460 20.22     0     3
#> 16:     6     1  19.2 167.6   123  3.92 3.440 18.30     0     4
#> 17:     6     1  17.8 167.6   123  3.92 3.440 18.90     0     4
#> 18:     8     0  18.7 360.0   175  3.15 3.440 17.02     0     3
#> 19:     8     0  14.3 360.0   245  3.21 3.570 15.84     0     3
#> 20:     8     0  16.4 275.8   180  3.07 4.070 17.40     0     3
#> 21:     8     0  17.3 275.8   180  3.07 3.730 17.60     0     3
#> 22:     8     0  15.2 275.8   180  3.07 3.780 18.00     0     3
#> 23:     8     0  10.4 472.0   205  2.93 5.250 17.98     0     3
#> 24:     8     0  10.4 460.0   215  3.00 5.424 17.82     0     3
#> 25:     8     0  14.7 440.0   230  3.23 5.345 17.42     0     3
#> 26:     8     0  15.5 318.0   150  2.76 3.520 16.87     0     3
#> 27:     8     0  15.2 304.0   150  3.15 3.435 17.30     0     3
#> 28:     8     0  13.3 350.0   245  3.73 3.840 15.41     0     3
#> 29:     8     0  19.2 400.0   175  3.08 3.845 17.05     0     3
#> 30:     8     0  15.8 351.0   264  4.22 3.170 14.50     1     5
#> 31:     8     0  15.0 301.0   335  3.54 3.570 14.60     1     5
#> 32:     4     0  26.0 120.3    91  4.43 2.140 16.70     1     5
#>       cyl    vs   mpg  disp    hp  drat    wt  qsec    am  gear
#> 1 variable not shown: [carb <num>]

df <- data.table(
  a = list(c("a", "b"), "c"),
  b = list(c(TRUE,TRUE),FALSE),
  c = list(3,c(1,2)),
  d = c(11, 22)
)

df
#>         a         b      c     d
#>    <list>    <list> <list> <num>
#> 1:    a,b TRUE,TRUE      3    11
#> 2:      c     FALSE    1,2    22
df %>% unnest_dt(a)
#>        d      a
#>    <num> <char>
#> 1:    11      a
#> 2:    11      b
#> 3:    22      c
df %>% unnest_dt(2)
#>        d      b
#>    <num> <lgcl>
#> 1:    11   TRUE
#> 2:    11   TRUE
#> 3:    22  FALSE
df %>% unnest_dt("c")
#>        d     c
#>    <num> <num>
#> 1:    11     3
#> 2:    22     1
#> 3:    22     2
df %>% unnest_dt(cols = names(df)[3])
#>        d     c
#>    <num> <num>
#> 1:    11     3
#> 2:    22     1
#> 3:    22     2

# You can unnest multiple columns simultaneously
df %>% unnest_dt(1:3)
#>        d      a      b     c
#>    <num> <char> <lgcl> <num>
#> 1:    11      a   TRUE     3
#> 2:    11      b   TRUE     1
#> 3:    22      c  FALSE     2
df %>% unnest_dt(a,b,c)
#>        d      a      b     c
#>    <num> <char> <lgcl> <num>
#> 1:    11      a   TRUE     3
#> 2:    11      b   TRUE     1
#> 3:    22      c  FALSE     2
df %>% unnest_dt("a|b|c")
#>        d      a      b     c
#>    <num> <char> <lgcl> <num>
#> 1:    11      a   TRUE     3
#> 2:    11      b   TRUE     1
#> 3:    22      c  FALSE     2

# examples for squeeze_dt
# nest which columns?
iris %>% squeeze_dt(1:2)
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species     ndt
#>             <num>       <num>        <num>       <num>    <fctr>  <list>
#>   1:          5.1         3.5          1.4         0.2    setosa 5.1,3.5
#>   2:          4.9         3.0          1.4         0.2    setosa 4.9,3.0
#>   3:          4.7         3.2          1.3         0.2    setosa 4.7,3.2
#>   4:          4.6         3.1          1.5         0.2    setosa 4.6,3.1
#>   5:          5.0         3.6          1.4         0.2    setosa 5.0,3.6
#>  ---                                                                    
#> 146:          6.7         3.0          5.2         2.3 virginica 6.7,3.0
#> 147:          6.3         2.5          5.0         1.9 virginica 6.3,2.5
#> 148:          6.5         3.0          5.2         2.0 virginica 6.5,3.0
#> 149:          6.2         3.4          5.4         2.3 virginica 6.2,3.4
#> 150:          5.9         3.0          5.1         1.8 virginica 5.9,3.0
iris %>% squeeze_dt("Se")
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species     ndt
#>             <num>       <num>        <num>       <num>    <fctr>  <list>
#>   1:          5.1         3.5          1.4         0.2    setosa 5.1,3.5
#>   2:          4.9         3.0          1.4         0.2    setosa 4.9,3.0
#>   3:          4.7         3.2          1.3         0.2    setosa 4.7,3.2
#>   4:          4.6         3.1          1.5         0.2    setosa 4.6,3.1
#>   5:          5.0         3.6          1.4         0.2    setosa 5.0,3.6
#>  ---                                                                    
#> 146:          6.7         3.0          5.2         2.3 virginica 6.7,3.0
#> 147:          6.3         2.5          5.0         1.9 virginica 6.3,2.5
#> 148:          6.5         3.0          5.2         2.0 virginica 6.5,3.0
#> 149:          6.2         3.4          5.4         2.3 virginica 6.2,3.4
#> 150:          5.9         3.0          5.1         1.8 virginica 5.9,3.0
iris %>% squeeze_dt(Sepal.Length:Petal.Width)
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
#>             <num>       <num>        <num>       <num>    <fctr>
#>   1:          5.1         3.5          1.4         0.2    setosa
#>   2:          4.9         3.0          1.4         0.2    setosa
#>   3:          4.7         3.2          1.3         0.2    setosa
#>   4:          4.6         3.1          1.5         0.2    setosa
#>   5:          5.0         3.6          1.4         0.2    setosa
#>  ---                                                            
#> 146:          6.7         3.0          5.2         2.3 virginica
#> 147:          6.3         2.5          5.0         1.9 virginica
#> 148:          6.5         3.0          5.2         2.0 virginica
#> 149:          6.2         3.4          5.4         2.3 virginica
#> 150:          5.9         3.0          5.1         1.8 virginica
#> 1 variable not shown: [ndt <list>]
iris %>% squeeze_dt(1:2,.name = "data")
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species    data
#>             <num>       <num>        <num>       <num>    <fctr>  <list>
#>   1:          5.1         3.5          1.4         0.2    setosa 5.1,3.5
#>   2:          4.9         3.0          1.4         0.2    setosa 4.9,3.0
#>   3:          4.7         3.2          1.3         0.2    setosa 4.7,3.2
#>   4:          4.6         3.1          1.5         0.2    setosa 4.6,3.1
#>   5:          5.0         3.6          1.4         0.2    setosa 5.0,3.6
#>  ---                                                                    
#> 146:          6.7         3.0          5.2         2.3 virginica 6.7,3.0
#> 147:          6.3         2.5          5.0         1.9 virginica 6.3,2.5
#> 148:          6.5         3.0          5.2         2.0 virginica 6.5,3.0
#> 149:          6.2         3.4          5.4         2.3 virginica 6.2,3.4
#> 150:          5.9         3.0          5.1         1.8 virginica 5.9,3.0

# examples for chop_dt
df <- data.table(x = c(1, 1, 1, 2, 2, 3), y = 1:6, z = 6:1)
df %>% chop_dt(y,z)
#>        x      y      z
#>    <num> <list> <list>
#> 1:     1  1,2,3  6,5,4
#> 2:     2    4,5    3,2
#> 3:     3      6      1
df %>% chop_dt(y,z) %>% unchop_dt(y,z)
#>        x     y     z
#>    <num> <int> <int>
#> 1:     1     1     6
#> 2:     1     2     5
#> 3:     1     3     4
#> 4:     2     4     3
#> 5:     2     5     2
#> 6:     3     6     1