library(pacman)
p_load(tidyverse,DBI,RSQLite)
= dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite") mydb
10 必知必会第10课:分组数据
10.1 数据库的连接
10.2 基于SQL的数据操作
# 10.1
dbGetQuery(mydb,"SELECT COUNT(*) AS num_prods
FROM Products
WHERE vend_id = 'DLL01';")
num_prods
1 4
# 10.2
dbGetQuery(mydb,"SELECT vend_id, COUNT(*) AS num_prods
FROM Products
GROUP BY vend_id;")
vend_id num_prods
1 BRS01 3
2 DLL01 4
3 FNG01 2
# 10.3
dbGetQuery(mydb,"SELECT cust_id, COUNT(*) AS orders
FROM Orders
GROUP BY cust_id
HAVING COUNT(*) >= 2;")
cust_id orders
1 1000000001 2
dbGetQuery(mydb,"SELECT vend_id, COUNT(*) AS num_prods
FROM Products
WHERE prod_price >= 4
GROUP BY vend_id
HAVING COUNT(*) >= 2;")
vend_id num_prods
1 BRS01 3
2 FNG01 2
dbGetQuery(mydb,"SELECT vend_id, COUNT(*) AS num_prods
FROM Products
GROUP BY vend_id
HAVING COUNT(*) >= 2;")
vend_id num_prods
1 BRS01 3
2 DLL01 4
3 FNG01 2
# 10.4
dbGetQuery(mydb,"SELECT order_num, COUNT(*) AS items
FROM OrderItems
GROUP BY order_num
HAVING COUNT(*) >= 3;")
order_num items
1 20006 3
2 20007 5
3 20008 5
4 20009 3
dbGetQuery(mydb,"SELECT order_num, COUNT(*) AS items
FROM OrderItems
GROUP BY order_num
HAVING COUNT(*) >= 3
ORDER BY items, order_num;")
order_num items
1 20006 3
2 20009 3
3 20007 5
4 20008 5
10.3 基于tidyverse的数据操作
# 10.1
tbl(mydb,"Products") %>%
filter(vend_id == 'DLL01') %>%
tally(name = "num_prods")
# Source: SQL [1 x 1]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
num_prods
<int>
1 4
# 10.2
tbl(mydb,"Products") %>%
group_by(vend_id) %>%
tally(name = "num_prods")
# Source: SQL [3 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
vend_id num_prods
<chr> <int>
1 BRS01 3
2 DLL01 4
3 FNG01 2
# 10.3
tbl(mydb,"Orders") %>%
summarise(orders = n(),.by = cust_id) %>%
filter(orders >= 2)
# Source: SQL [1 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
cust_id orders
<chr> <int>
1 1000000001 2
tbl(mydb,"Products") %>%
filter(prod_price >= 4) %>% # WHERE
group_by(vend_id) %>%
summarise(num_prods = n()) %>%
filter(num_prods >= 2) # HAVING
# Source: SQL [2 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
vend_id num_prods
<chr> <int>
1 BRS01 3
2 FNG01 2
tbl(mydb,"Products") %>%
group_by(vend_id) %>%
summarise(num_prods = n()) %>%
filter(num_prods >= 2)
# Source: SQL [3 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
vend_id num_prods
<chr> <int>
1 BRS01 3
2 DLL01 4
3 FNG01 2
# 10.4
tbl(mydb,"OrderItems") %>%
group_by(order_num) %>%
tally(name = "items") %>%
filter(items >=3)
# Source: SQL [4 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
order_num items
<int> <int>
1 20006 3
2 20007 5
3 20008 5
4 20009 3
tbl(mydb,"OrderItems") %>%
group_by(order_num) %>%
tally(name = "items") %>%
filter(items >=3) %>%
arrange(items,order_num)
# Source: SQL [4 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# Ordered by: items, order_num
order_num items
<int> <int>
1 20006 3
2 20009 3
3 20007 5
4 20008 5
10.4 练习
- 请使用SQL和R两种方式,解决课后的挑战题。