Quickly create dummy (binary) columns from character and factor type columns in the inputted data (and numeric columns if specified.) This function is useful for statistical analysis when you want binary columns rather than character columns.

dummy(.data, ..., longname = TRUE)

Arguments

.data

data.frame

...

Columns you want to create dummy variables from. Very flexible, find in the examples.

longname

logical. Should the output column labeled with the original column name? Default uses TRUE.

Value

data.table

Details

If no columns provided, will return the original data frame.

This function is inspired by fastDummies package, but provides simple and precise usage, whereas fastDummies::dummy_cols provides more features for statistical usage.

See also

dummy_cols

Examples


iris = as.data.table(iris)
iris %>% dummy(Species)
#> Key: <Sepal.Length, Sepal.Width, Petal.Length, Petal.Width>
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width Species_setosa
#>             <num>       <num>        <num>       <num>          <num>
#>   1:          4.3         3.0          1.1         0.1              1
#>   2:          4.4         2.9          1.4         0.2              1
#>   3:          4.4         3.0          1.3         0.2              1
#>   4:          4.4         3.2          1.3         0.2              1
#>   5:          4.5         2.3          1.3         0.3              1
#>  ---                                                                 
#> 146:          7.7         2.6          6.9         2.3              0
#> 147:          7.7         2.8          6.7         2.0              0
#> 148:          7.7         3.0          6.1         2.3              0
#> 149:          7.7         3.8          6.7         2.2              0
#> 150:          7.9         3.8          6.4         2.0              0
#>      Species_versicolor Species_virginica
#>                   <num>             <num>
#>   1:                  0                 0
#>   2:                  0                 0
#>   3:                  0                 0
#>   4:                  0                 0
#>   5:                  0                 0
#>  ---                                     
#> 146:                  0                 1
#> 147:                  0                 1
#> 148:                  0                 1
#> 149:                  0                 1
#> 150:                  0                 1
iris %>% dummy(Species,longname = FALSE)
#> Key: <Sepal.Length, Sepal.Width, Petal.Length, Petal.Width>
#>      Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor
#>             <num>       <num>        <num>       <num>  <num>      <num>
#>   1:          4.3         3.0          1.1         0.1      1          0
#>   2:          4.4         2.9          1.4         0.2      1          0
#>   3:          4.4         3.0          1.3         0.2      1          0
#>   4:          4.4         3.2          1.3         0.2      1          0
#>   5:          4.5         2.3          1.3         0.3      1          0
#>  ---                                                                    
#> 146:          7.7         2.6          6.9         2.3      0          0
#> 147:          7.7         2.8          6.7         2.0      0          0
#> 148:          7.7         3.0          6.1         2.3      0          0
#> 149:          7.7         3.8          6.7         2.2      0          0
#> 150:          7.9         3.8          6.4         2.0      0          0
#>      virginica
#>          <num>
#>   1:         0
#>   2:         0
#>   3:         0
#>   4:         0
#>   5:         0
#>  ---          
#> 146:         1
#> 147:         1
#> 148:         1
#> 149:         1
#> 150:         1

mtcars = as.data.table(mtcars)
mtcars %>% head() %>% dummy(vs,am)
#> Key: <mpg, cyl, disp, hp, drat, wt, qsec, gear, carb, vs_0, vs_1>
#>      mpg   cyl  disp    hp  drat    wt  qsec  gear  carb  vs_0  vs_1  am_0
#>    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#> 1:  18.1     6   225   105  2.76 3.460 20.22     3     1     0     1     1
#> 2:  18.7     8   360   175  3.15 3.440 17.02     3     2     1     0     1
#> 3:  21.0     6   160   110  3.90 2.620 16.46     4     4     1     0     0
#> 4:  21.0     6   160   110  3.90 2.875 17.02     4     4     1     0     0
#> 5:  21.4     6   258   110  3.08 3.215 19.44     3     1     0     1     1
#> 6:  22.8     4   108    93  3.85 2.320 18.61     4     1     0     1     0
#>     am_1
#>    <num>
#> 1:     0
#> 2:     0
#> 3:     1
#> 4:     1
#> 5:     0
#> 6:     1
mtcars %>% head() %>% dummy("cyl|gear")
#> Key: <mpg, disp, hp, drat, wt, qsec, vs, am, carb, cyl_4, cyl_6, cyl_8>
#>      mpg  disp    hp  drat    wt  qsec    vs    am  carb cyl_4 cyl_6 cyl_8
#>    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#> 1:  18.1   225   105  2.76 3.460 20.22     1     0     1     0     1     0
#> 2:  18.7   360   175  3.15 3.440 17.02     0     0     2     0     0     1
#> 3:  21.0   160   110  3.90 2.620 16.46     0     1     4     0     1     0
#> 4:  21.0   160   110  3.90 2.875 17.02     0     1     4     0     1     0
#> 5:  21.4   258   110  3.08 3.215 19.44     1     0     1     0     1     0
#> 6:  22.8   108    93  3.85 2.320 18.61     1     1     1     1     0     0
#>    gear_3 gear_4
#>     <num>  <num>
#> 1:      1      0
#> 2:      1      0
#> 3:      0      1
#> 4:      0      1
#> 5:      1      0
#> 6:      0      1