This vignette has referred to dplyr
’s vignette in https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html. We’ll try to reproduce all the results. First load the needed packages.
library(tidydt)
#>
#> Life's short, use R.
library(nycflights13)
data.table(flights)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour
#> 1: 1400 5 15 2013-01-01 05:00:00
#> 2: 1416 5 29 2013-01-01 05:00:00
#> 3: 1089 5 40 2013-01-01 05:00:00
#> 4: 1576 5 45 2013-01-01 05:00:00
#> 5: 762 6 0 2013-01-01 06:00:00
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00
#> 336773: 198 22 0 2013-09-30 22:00:00
#> 336774: 764 12 10 2013-09-30 12:00:00
#> 336775: 419 11 59 2013-09-30 11:00:00
#> 336776: 431 8 40 2013-09-30 08:00:00
filter_dt()
filter_dt(flights, month == 1, day == 1)
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> 1: 2013 1 1 517 515 2 830 819
#> 2: 2013 1 1 533 529 4 850 830
#> 3: 2013 1 1 542 540 2 923 850
#> 4: 2013 1 1 544 545 -1 1004 1022
#> 5: 2013 1 1 554 600 -6 812 837
#> ---
#> 838: 2013 1 1 2356 2359 -3 425 437
#> 839: 2013 1 1 NA 1630 NA NA 1815
#> 840: 2013 1 1 NA 1935 NA NA 2240
#> 841: 2013 1 1 NA 1500 NA NA 1825
#> 842: 2013 1 1 NA 600 NA NA 901
#> arr_delay carrier flight tailnum origin dest air_time distance hour minute
#> 1: 11 UA 1545 N14228 EWR IAH 227 1400 5 15
#> 2: 20 UA 1714 N24211 LGA IAH 227 1416 5 29
#> 3: 33 AA 1141 N619AA JFK MIA 160 1089 5 40
#> 4: -18 B6 725 N804JB JFK BQN 183 1576 5 45
#> 5: -25 DL 461 N668DN LGA ATL 116 762 6 0
#> ---
#> 838: -12 B6 727 N588JB JFK BQN 186 1576 23 59
#> 839: NA EV 4308 N18120 EWR RDU NA 416 16 30
#> 840: NA AA 791 N3EHAA LGA DFW NA 1389 19 35
#> 841: NA AA 1925 N3EVAA LGA MIA NA 1096 15 0
#> 842: NA B6 125 N618JB JFK FLL NA 1069 6 0
#> time_hour
#> 1: 2013-01-01 05:00:00
#> 2: 2013-01-01 05:00:00
#> 3: 2013-01-01 05:00:00
#> 4: 2013-01-01 05:00:00
#> 5: 2013-01-01 06:00:00
#> ---
#> 838: 2013-01-01 23:00:00
#> 839: 2013-01-01 16:00:00
#> 840: 2013-01-01 19:00:00
#> 841: 2013-01-01 15:00:00
#> 842: 2013-01-01 06:00:00
arrange_dt()
arrange_dt(flights, year, month, day)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 12 31 NA 705 NA NA
#> 336773: 2013 12 31 NA 825 NA NA
#> 336774: 2013 12 31 NA 1615 NA NA
#> 336775: 2013 12 31 NA 600 NA NA
#> 336776: 2013 12 31 NA 830 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 931 NA UA 1729 <NA> EWR DEN NA
#> 336773: 1029 NA US 1831 <NA> JFK CLT NA
#> 336774: 1800 NA MQ 3301 N844MQ LGA RDU NA
#> 336775: 735 NA UA 219 <NA> EWR ORD NA
#> 336776: 1154 NA UA 443 <NA> JFK LAX NA
#> distance hour minute time_hour
#> 1: 1400 5 15 2013-01-01 05:00:00
#> 2: 1416 5 29 2013-01-01 05:00:00
#> 3: 1089 5 40 2013-01-01 05:00:00
#> 4: 1576 5 45 2013-01-01 05:00:00
#> 5: 762 6 0 2013-01-01 06:00:00
#> ---
#> 336772: 1605 7 5 2013-12-31 07:00:00
#> 336773: 541 8 25 2013-12-31 08:00:00
#> 336774: 431 16 15 2013-12-31 16:00:00
#> 336775: 719 6 0 2013-12-31 06:00:00
#> 336776: 2475 8 30 2013-12-31 08:00:00
Use -
(minus symbol) to order a column in descending order:
arrange_dt(flights, -arr_delay)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 9 641 900 1301 1242
#> 2: 2013 6 15 1432 1935 1137 1607
#> 3: 2013 1 10 1121 1635 1126 1239
#> 4: 2013 9 20 1139 1845 1014 1457
#> 5: 2013 7 22 845 1600 1005 1044
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 1530 1272 HA 51 N384HA JFK HNL 640
#> 2: 2120 1127 MQ 3535 N504MQ JFK CMH 74
#> 3: 1810 1109 MQ 3695 N517MQ EWR ORD 111
#> 4: 2210 1007 AA 177 N338AA JFK SFO 354
#> 5: 1815 989 MQ 3075 N665MQ JFK CVG 96
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour
#> 1: 4983 9 0 2013-01-09 09:00:00
#> 2: 483 19 35 2013-06-15 19:00:00
#> 3: 719 16 35 2013-01-10 16:00:00
#> 4: 2586 18 45 2013-09-20 18:00:00
#> 5: 589 16 0 2013-07-22 16:00:00
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00
#> 336773: 198 22 0 2013-09-30 22:00:00
#> 336774: 764 12 10 2013-09-30 12:00:00
#> 336775: 419 11 59 2013-09-30 11:00:00
#> 336776: 431 8 40 2013-09-30 08:00:00
select_dt()
select_dt(flights, year, month, day)
#> year month day
#> 1: 2013 1 1
#> 2: 2013 1 1
#> 3: 2013 1 1
#> 4: 2013 1 1
#> 5: 2013 1 1
#> ---
#> 336772: 2013 9 30
#> 336773: 2013 9 30
#> 336774: 2013 9 30
#> 336775: 2013 9 30
#> 336776: 2013 9 30
select_dt(flights, year:day)
and select_dt(flights, -(year:day))
are not supported. But I have added a feature to help select with regular expression, which means you can:
select_dt(flights, "^dep")
#> dep_time dep_delay
#> 1: 517 2
#> 2: 533 4
#> 3: 542 2
#> 4: 544 -1
#> 5: 554 -6
#> ---
#> 336772: NA NA
#> 336773: NA NA
#> 336774: NA NA
#> 336775: NA NA
#> 336776: NA NA
The rename process is almost the same as that in dplyr
:
select_dt(flights, tail_num = tailnum)
#> tail_num
#> 1: N14228
#> 2: N24211
#> 3: N619AA
#> 4: N804JB
#> 5: N668DN
#> ---
#> 336772: <NA>
#> 336773: <NA>
#> 336774: N535MQ
#> 336775: N511MQ
#> 336776: N839MQ
rename_dt(flights, tail_num = tailnum)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tail_num origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour
#> 1: 1400 5 15 2013-01-01 05:00:00
#> 2: 1416 5 29 2013-01-01 05:00:00
#> 3: 1089 5 40 2013-01-01 05:00:00
#> 4: 1576 5 45 2013-01-01 05:00:00
#> 5: 762 6 0 2013-01-01 06:00:00
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00
#> 336773: 198 22 0 2013-09-30 22:00:00
#> 336774: 764 12 10 2013-09-30 12:00:00
#> 336775: 419 11 59 2013-09-30 11:00:00
#> 336776: 431 8 40 2013-09-30 08:00:00
mutate_dt()
mutate_dt(flights,
gain = arr_delay - dep_delay,
speed = distance / air_time * 60
)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour gain speed
#> 1: 1400 5 15 2013-01-01 05:00:00 9 370.0441
#> 2: 1416 5 29 2013-01-01 05:00:00 16 374.2731
#> 3: 1089 5 40 2013-01-01 05:00:00 31 408.3750
#> 4: 1576 5 45 2013-01-01 05:00:00 -17 516.7213
#> 5: 762 6 0 2013-01-01 06:00:00 -19 394.1379
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00 NA NA
#> 336773: 198 22 0 2013-09-30 22:00:00 NA NA
#> 336774: 764 12 10 2013-09-30 12:00:00 NA NA
#> 336775: 419 11 59 2013-09-30 11:00:00 NA NA
#> 336776: 431 8 40 2013-09-30 08:00:00 NA NA
However, if you just create the column, please split them. The following codes would not work:
Instead, use:
mutate_dt(flights,gain = arr_delay - dep_delay) %>%
mutate_dt(gain_per_hour = gain / (air_time / 60))
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour gain gain_per_hour
#> 1: 1400 5 15 2013-01-01 05:00:00 9 2.378855
#> 2: 1416 5 29 2013-01-01 05:00:00 16 4.229075
#> 3: 1089 5 40 2013-01-01 05:00:00 31 11.625000
#> 4: 1576 5 45 2013-01-01 05:00:00 -17 -5.573770
#> 5: 762 6 0 2013-01-01 06:00:00 -19 -9.827586
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00 NA NA
#> 336773: 198 22 0 2013-09-30 22:00:00 NA NA
#> 336774: 764 12 10 2013-09-30 12:00:00 NA NA
#> 336775: 419 11 59 2013-09-30 11:00:00 NA NA
#> 336776: 431 8 40 2013-09-30 08:00:00 NA NA
If you only want to keep the new variables, use transmute_dt()
:
sample_n_dt()
and sample_frac_dt()
sample_n_dt(flights, 10)
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> 1: 2013 12 9 1005 1010 -5 1200 1147
#> 2: 2013 10 18 1637 1645 -8 1754 1820
#> 3: 2013 10 13 916 925 -9 1015 1033
#> 4: 2013 5 25 1127 1129 -2 1219 1235
#> 5: 2013 12 2 1755 1800 -5 1937 1919
#> 6: 2013 3 8 849 820 29 1110 944
#> 7: 2013 12 7 1330 1246 44 1614 1538
#> 8: 2013 2 10 1518 1530 -12 1704 1711
#> 9: 2013 7 17 1256 1230 26 1614 1558
#> 10: 2013 11 9 705 710 -5 835 845
#> arr_delay carrier flight tailnum origin dest air_time distance hour minute
#> 1: 13 UA 258 N460UA EWR ORD 139 719 10 10
#> 2: -26 MQ 3216 N603MQ JFK ORF 51 290 16 45
#> 3: -18 B6 1634 N203JB JFK BTV 44 266 9 25
#> 4: -16 B6 1174 N354JB EWR BOS 37 200 11 29
#> 5: 18 US 2158 N955UW LGA BOS 35 184 18 0
#> 6: 86 9E 4051 N8932C JFK BWI 32 184 8 20
#> 7: 36 B6 383 N594JB JFK MCO 140 944 12 46
#> 8: -7 9E 3719 N8974C LGA RIC 53 292 15 30
#> 9: 16 DL 2098 N322NB LGA MIA 155 1096 12 30
#> 10: -10 AA 305 N4YFAA LGA ORD 132 733 7 10
#> time_hour
#> 1: 2013-12-09 10:00:00
#> 2: 2013-10-18 16:00:00
#> 3: 2013-10-13 09:00:00
#> 4: 2013-05-25 11:00:00
#> 5: 2013-12-02 18:00:00
#> 6: 2013-03-08 08:00:00
#> 7: 2013-12-07 12:00:00
#> 8: 2013-02-10 15:00:00
#> 9: 2013-07-17 12:00:00
#> 10: 2013-11-09 07:00:00
sample_frac_dt(flights, 0.01)
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> 1: 2013 3 7 2006 1955 11 2310 2310
#> 2: 2013 3 27 1441 1445 -4 1657 1710
#> 3: 2013 6 8 1722 1725 -3 1928 1947
#> 4: 2013 1 6 1846 1855 -9 2036 2100
#> 5: 2013 1 29 1109 1114 -5 1316 1315
#> ---
#> 3363: 2013 6 27 1807 1726 41 2108 2009
#> 3364: 2013 11 5 848 853 -5 1150 1207
#> 3365: 2013 9 27 1210 1210 0 1338 1330
#> 3366: 2013 6 27 1239 1229 10 1414 1351
#> 3367: 2013 6 30 942 945 -3 1106 1120
#> arr_delay carrier flight tailnum origin dest air_time distance hour
#> 1: 0 AA 1709 N3HFAA LGA MIA 150 1096 19
#> 2: -13 MQ 4669 N512MQ LGA ATL 106 762 14
#> 3: -19 UA 280 N461UA EWR PHX 279 2133 17
#> 4: -24 MQ 4649 N537MQ LGA MSP 151 1020 18
#> 5: 1 DL 1031 N361NB LGA DTW 100 502 11
#> ---
#> 3363: 59 UA 1593 N77430 EWR LAS 321 2227 17
#> 3364: -17 UA 354 N426UA EWR IAH 206 1400 8
#> 3365: 8 MQ 3461 N527MQ LGA BNA 97 764 12
#> 3366: 23 B6 2502 N348JB JFK BUF 72 301 12
#> 3367: -14 WN 2431 N246LV LGA MDW 109 725 9
#> minute time_hour
#> 1: 55 2013-03-07 19:00:00
#> 2: 45 2013-03-27 14:00:00
#> 3: 25 2013-06-08 17:00:00
#> 4: 55 2013-01-06 18:00:00
#> 5: 14 2013-01-29 11:00:00
#> ---
#> 3363: 26 2013-06-27 17:00:00
#> 3364: 53 2013-11-05 08:00:00
#> 3365: 10 2013-09-27 12:00:00
#> 3366: 29 2013-06-27 12:00:00
#> 3367: 45 2013-06-30 09:00:00
For the below dplyr
codes:
by_tailnum <- group_by(flights, tailnum)
delay <- summarise(by_tailnum,
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE))
delay <- filter(delay, count > 20, dist < 2000)
We could get it via:
flights %>%
summarise_dt( count = .N,
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE),by = tailnum)
#> tailnum count dist delay
#> 1: N14228 111 1546.964 3.711712
#> 2: N24211 130 1330.262 7.700000
#> 3: N619AA 24 1339.208 7.652174
#> 4: N804JB 219 1424.621 -1.860465
#> 5: N668DN 49 1027.592 2.625000
#> ---
#> 4040: N766SK 1 419.000 -24.000000
#> 4041: N772SK 1 419.000 -8.000000
#> 4042: N776SK 1 419.000 -18.000000
#> 4043: N785SK 1 419.000 -16.000000
#> 4044: N557AS 1 2402.000 -30.000000
summarise_dt
(or summarize_dt
) has a parameter “by”, you can specify the group. We could find the number of planes and the number of flights that go to each possible destination:
# the dplyr syntax:
# destinations <- group_by(flights, dest)
# summarise(destinations,
# planes = n_distinct(tailnum),
# flights = n()
# )
summarise_dt(flights,planes = uniqueN(tailnum),flights = .N,by = dest) %>%
arrange_dt(dest)
#> dest planes flights
#> 1: ABQ 108 254
#> 2: ACK 58 265
#> 3: ALB 172 439
#> 4: ANC 6 8
#> 5: ATL 1180 17215
#> ---
#> 101: TPA 1126 7466
#> 102: TUL 105 315
#> 103: TVC 60 101
#> 104: TYS 273 631
#> 105: XNA 176 1036
If you need to group by many variables, use:
# the dplyr syntax:
# daily <- group_by(flights, year, month, day)
# (per_day <- summarise(daily, flights = n()))
flights %>%
summarise_dt(by = .(year,month,day),flights = .N)
#> year month day flights
#> 1: 2013 1 1 842
#> 2: 2013 1 2 943
#> 3: 2013 1 3 914
#> 4: 2013 1 4 915
#> 5: 2013 1 5 720
#> ---
#> 361: 2013 9 26 996
#> 362: 2013 9 27 996
#> 363: 2013 9 28 682
#> 364: 2013 9 29 914
#> 365: 2013 9 30 993
# (per_month <- summarise(per_day, flights = sum(flights)))
flights %>%
summarise_dt(by = .(year,month,day),flights = .N) %>%
summarise_dt(by = .(year,month),flights = sum(flights))
#> year month flights
#> 1: 2013 1 27004
#> 2: 2013 10 28889
#> 3: 2013 11 27268
#> 4: 2013 12 28135
#> 5: 2013 2 24951
#> 6: 2013 3 28834
#> 7: 2013 4 28330
#> 8: 2013 5 28796
#> 9: 2013 6 28243
#> 10: 2013 7 29425
#> 11: 2013 8 29327
#> 12: 2013 9 27574
# (per_year <- summarise(per_month, flights = sum(flights)))
flights %>%
summarise_dt(by = .(year,month,day),flights = .N) %>%
summarise_dt(by = .(year,month),flights = sum(flights)) %>%
summarise_dt(by = .(year),flights = sum(flights))
#> year flights
#> 1: 2013 336776