11  必知必会第11课:使用子查询

作者

黄天元

11.1 数据库的连接

library(pacman)
p_load(tidyverse,DBI,RSQLite)

mydb = dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite")

11.2 基于SQL的数据操作

# 11.2
dbGetQuery(mydb,"SELECT order_num
FROM OrderItems
WHERE prod_id = 'RGAN01';") 
  order_num
1     20007
2     20008
dbGetQuery(mydb,"SELECT cust_id
FROM Orders
WHERE order_num IN (20007,20008);")
     cust_id
1 1000000004
2 1000000005
dbGetQuery(mydb,"SELECT cust_id
FROM Orders
WHERE order_num IN (SELECT order_num
FROM OrderItems
WHERE prod_id = 'RGAN01');")
     cust_id
1 1000000004
2 1000000005
dbGetQuery(mydb,"SELECT cust_name, cust_contact
FROM Customers
WHERE cust_id IN (1000000004,1000000005);")
      cust_name       cust_contact
1       Fun4All Denise L. Stephens
2 The Toy Store         Kim Howard
dbGetQuery(mydb,"SELECT cust_name, cust_contact
FROM Customers
WHERE cust_id IN (SELECT cust_id
FROM Orders
WHERE order_num IN (SELECT order_num
FROM OrderItems
WHERE prod_id = 'RGAN01'));")
      cust_name       cust_contact
1       Fun4All Denise L. Stephens
2 The Toy Store         Kim Howard
# 11.3
dbGetQuery(mydb,"SELECT COUNT(*) AS orders
FROM Orders
WHERE cust_id = 1000000001;")
  orders
1      2
dbGetQuery(mydb,"SELECT cust_name,
cust_state,
(SELECT COUNT(*)
FROM Orders
WHERE Orders.cust_id = Customers.cust_id) AS orders
FROM Customers
ORDER BY cust_name;")
      cust_name cust_state orders
1       Fun4All         IN      1
2       Fun4All         AZ      1
3    Kids Place         OH      0
4 The Toy Store         IL      1
5      Toy Land         NY      0
6  Village Toys         MI      2
dbGetQuery(mydb,"SELECT cust_name,
cust_state,
(SELECT COUNT(*)
FROM Orders
WHERE cust_id = cust_id) AS orders
FROM Customers
ORDER BY cust_name;")
      cust_name cust_state orders
1       Fun4All         IN      5
2       Fun4All         AZ      5
3    Kids Place         OH      5
4 The Toy Store         IL      5
5      Toy Land         NY      5
6  Village Toys         MI      5

11.3 基于tidyverse的数据操作

# 11.2
tbl(mydb,"OrderItems") %>% 
  filter(prod_id == 'RGAN01') %>% 
  pull(order_num) -> query1
tbl(mydb,"Orders") %>% 
  filter(order_num %in% query1) %>% 
  pull(cust_id) -> query2
tbl(mydb,"Customers") %>% 
  filter(cust_id %in% query2) %>% 
  select(cust_name, cust_contact) 
# Source:   SQL [2 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  cust_name     cust_contact      
  <chr>         <chr>             
1 Fun4All       Denise L. Stephens
2 The Toy Store Kim Howard        
# 11.3
tbl(mydb,"Orders") %>% 
  count(cust_id,name = "orders") -> query4
tbl(mydb,"Customers") %>% 
  select(cust_name,cust_state,cust_id) %>% 
  left_join(query4) %>% 
  arrange(cust_name) 
Joining with `by = join_by(cust_id)`
# Source:     SQL [6 x 4]
# Database:   sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# Ordered by: cust_name
  cust_name     cust_state cust_id    orders
  <chr>         <chr>      <chr>       <int>
1 Fun4All       IN         1000000003      1
2 Fun4All       AZ         1000000004      1
3 Kids Place    OH         1000000002     NA
4 The Toy Store IL         1000000005      1
5 Toy Land      NY         1000000006     NA
6 Village Toys  MI         1000000001      2

11.4 练习

  • 请使用SQL和R两种方式,解决课后的挑战题。