Quickly create dummy (binary) columns from character and factor type columns in the inputted data (and numeric columns if specified.) This function is useful for statistical analysis when you want binary columns rather than character columns.
dummy(.data, ..., longname = TRUE)
data.table
If no columns provided, will return the original data frame.
This function is inspired by fastDummies package, but provides
simple and precise usage, whereas fastDummies::dummy_cols
provides more
features for statistical usage.
dummy_cols
iris = as.data.table(iris)
iris %>% dummy(Species)
#> Key: <Sepal.Length, Sepal.Width, Petal.Length, Petal.Width>
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species_setosa
#> <num> <num> <num> <num> <num>
#> 1: 4.3 3.0 1.1 0.1 1
#> 2: 4.4 2.9 1.4 0.2 1
#> 3: 4.4 3.0 1.3 0.2 1
#> 4: 4.4 3.2 1.3 0.2 1
#> 5: 4.5 2.3 1.3 0.3 1
#> ---
#> 146: 7.7 2.6 6.9 2.3 0
#> 147: 7.7 2.8 6.7 2.0 0
#> 148: 7.7 3.0 6.1 2.3 0
#> 149: 7.7 3.8 6.7 2.2 0
#> 150: 7.9 3.8 6.4 2.0 0
#> Species_versicolor Species_virginica
#> <num> <num>
#> 1: 0 0
#> 2: 0 0
#> 3: 0 0
#> 4: 0 0
#> 5: 0 0
#> ---
#> 146: 0 1
#> 147: 0 1
#> 148: 0 1
#> 149: 0 1
#> 150: 0 1
iris %>% dummy(Species,longname = FALSE)
#> Key: <Sepal.Length, Sepal.Width, Petal.Length, Petal.Width>
#> Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor
#> <num> <num> <num> <num> <num> <num>
#> 1: 4.3 3.0 1.1 0.1 1 0
#> 2: 4.4 2.9 1.4 0.2 1 0
#> 3: 4.4 3.0 1.3 0.2 1 0
#> 4: 4.4 3.2 1.3 0.2 1 0
#> 5: 4.5 2.3 1.3 0.3 1 0
#> ---
#> 146: 7.7 2.6 6.9 2.3 0 0
#> 147: 7.7 2.8 6.7 2.0 0 0
#> 148: 7.7 3.0 6.1 2.3 0 0
#> 149: 7.7 3.8 6.7 2.2 0 0
#> 150: 7.9 3.8 6.4 2.0 0 0
#> virginica
#> <num>
#> 1: 0
#> 2: 0
#> 3: 0
#> 4: 0
#> 5: 0
#> ---
#> 146: 1
#> 147: 1
#> 148: 1
#> 149: 1
#> 150: 1
mtcars = as.data.table(mtcars)
mtcars %>% head() %>% dummy(vs,am)
#> Key: <mpg, cyl, disp, hp, drat, wt, qsec, gear, carb, vs_0, vs_1>
#> mpg cyl disp hp drat wt qsec gear carb vs_0 vs_1 am_0
#> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#> 1: 18.1 6 225 105 2.76 3.460 20.22 3 1 0 1 1
#> 2: 18.7 8 360 175 3.15 3.440 17.02 3 2 1 0 1
#> 3: 21.0 6 160 110 3.90 2.620 16.46 4 4 1 0 0
#> 4: 21.0 6 160 110 3.90 2.875 17.02 4 4 1 0 0
#> 5: 21.4 6 258 110 3.08 3.215 19.44 3 1 0 1 1
#> 6: 22.8 4 108 93 3.85 2.320 18.61 4 1 0 1 0
#> am_1
#> <num>
#> 1: 0
#> 2: 0
#> 3: 1
#> 4: 1
#> 5: 0
#> 6: 1
mtcars %>% head() %>% dummy("cyl|gear")
#> Key: <mpg, disp, hp, drat, wt, qsec, vs, am, carb, cyl_4, cyl_6, cyl_8>
#> mpg disp hp drat wt qsec vs am carb cyl_4 cyl_6 cyl_8
#> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
#> 1: 18.1 225 105 2.76 3.460 20.22 1 0 1 0 1 0
#> 2: 18.7 360 175 3.15 3.440 17.02 0 0 2 0 0 1
#> 3: 21.0 160 110 3.90 2.620 16.46 0 1 4 0 1 0
#> 4: 21.0 160 110 3.90 2.875 17.02 0 1 4 0 1 0
#> 5: 21.4 258 110 3.08 3.215 19.44 1 0 1 0 1 0
#> 6: 22.8 108 93 3.85 2.320 18.61 1 1 1 1 0 0
#> gear_3 gear_4
#> <num> <num>
#> 1: 1 0
#> 2: 1 0
#> 3: 0 1
#> 4: 0 1
#> 5: 1 0
#> 6: 0 1