5  必知必会第5课:高级数据过滤

作者

黄天元

5.1 数据库的连接

library(pacman)
p_load(tidyverse,DBI,RSQLite)

mydb = dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite")

5.2 基于SQL的数据操作

# 5.1.1
dbGetQuery(mydb,"SELECT prod_id, prod_price, prod_name
FROM Products
WHERE vend_id = 'DLL01' AND prod_price <= 4;") 
  prod_id prod_price           prod_name
1  BNBG01       3.49   Fish bean bag toy
2  BNBG02       3.49   Bird bean bag toy
3  BNBG03       3.49 Rabbit bean bag toy
# 5.1.2
dbGetQuery(mydb,"SELECT prod_name,prod_price
FROM Products
WHERE vend_id = 'DLL01' OR vend_id = 'BRS01';")
            prod_name prod_price
1   8 inch teddy bear       5.99
2  12 inch teddy bear       8.99
3  18 inch teddy bear      11.99
4   Fish bean bag toy       3.49
5   Bird bean bag toy       3.49
6 Rabbit bean bag toy       3.49
7         Raggedy Ann       4.99
# 5.1.3
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE vend_id = 'DLL01' OR vend_id = 'BRS01'
AND prod_price >= 10;")
            prod_name prod_price
1  18 inch teddy bear      11.99
2   Fish bean bag toy       3.49
3   Bird bean bag toy       3.49
4 Rabbit bean bag toy       3.49
5         Raggedy Ann       4.99
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE (vend_id = 'DLL01' OR vend_id = 'BRS01')
AND prod_price >= 10;")
           prod_name prod_price
1 18 inch teddy bear      11.99
# 5.2
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE vend_id IN ('DLL01','BRS01')
ORDER BY prod_name;")
            prod_name prod_price
1  12 inch teddy bear       8.99
2  18 inch teddy bear      11.99
3   8 inch teddy bear       5.99
4   Bird bean bag toy       3.49
5   Fish bean bag toy       3.49
6 Rabbit bean bag toy       3.49
7         Raggedy Ann       4.99
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE vend_id = 'DLL01' OR vend_id = 'BRS01'
ORDER BY prod_name;")
            prod_name prod_price
1  12 inch teddy bear       8.99
2  18 inch teddy bear      11.99
3   8 inch teddy bear       5.99
4   Bird bean bag toy       3.49
5   Fish bean bag toy       3.49
6 Rabbit bean bag toy       3.49
7         Raggedy Ann       4.99
# 5.3
dbGetQuery(mydb,"SELECT prod_name
FROM Products
WHERE NOT vend_id = 'DLL01'
ORDER BY prod_name;")
           prod_name
1 12 inch teddy bear
2 18 inch teddy bear
3  8 inch teddy bear
4          King doll
5         Queen doll
dbGetQuery(mydb,"SELECT prod_name
FROM Products
WHERE vend_id <> 'DLL01'
ORDER BY prod_name;")
           prod_name
1 12 inch teddy bear
2 18 inch teddy bear
3  8 inch teddy bear
4          King doll
5         Queen doll

5.3 基于tidyverse的数据操作

# 5.1.1
tbl(mydb,"Products") %>% 
  filter(vend_id == 'DLL01' & prod_price <= 4) %>% 
  select(prod_id, prod_price, prod_name)
# Source:   SQL [3 x 3]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_id prod_price prod_name          
  <chr>        <dbl> <chr>              
1 BNBG01        3.49 Fish bean bag toy  
2 BNBG02        3.49 Bird bean bag toy  
3 BNBG03        3.49 Rabbit bean bag toy
# 5.1.2
tbl(mydb,"Products") %>% 
  filter(vend_id == 'DLL01' | vend_id == 'BRS01') %>% 
  select(prod_name,prod_price)
# Source:   SQL [7 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name           prod_price
  <chr>                    <dbl>
1 8 inch teddy bear         5.99
2 12 inch teddy bear        8.99
3 18 inch teddy bear       12.0 
4 Fish bean bag toy         3.49
5 Bird bean bag toy         3.49
6 Rabbit bean bag toy       3.49
7 Raggedy Ann               4.99
# 5.1.3
tbl(mydb,"Products") %>% 
  filter(vend_id == 'DLL01' | vend_id == 'BRS01' & prod_price >= 10) %>% 
  select(prod_name, prod_price)
# Source:   SQL [5 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name           prod_price
  <chr>                    <dbl>
1 18 inch teddy bear       12.0 
2 Fish bean bag toy         3.49
3 Bird bean bag toy         3.49
4 Rabbit bean bag toy       3.49
5 Raggedy Ann               4.99
tbl(mydb,"Products") %>% 
  filter((vend_id == 'DLL01' | vend_id == 'BRS01') & prod_price >= 10) %>% 
  select(prod_name, prod_price)
# Source:   SQL [1 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name          prod_price
  <chr>                   <dbl>
1 18 inch teddy bear       12.0
# 5.2
tbl(mydb,"Products") %>% 
  filter(vend_id %in% c('DLL01','BRS01')) %>% 
  arrange(prod_name) %>% 
  select(prod_name,prod_price)
# Source:     SQL [7 x 2]
# Database:   sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# Ordered by: prod_name
  prod_name           prod_price
  <chr>                    <dbl>
1 12 inch teddy bear        8.99
2 18 inch teddy bear       12.0 
3 8 inch teddy bear         5.99
4 Bird bean bag toy         3.49
5 Fish bean bag toy         3.49
6 Rabbit bean bag toy       3.49
7 Raggedy Ann               4.99
tbl(mydb,"Products") %>% 
  filter(vend_id == 'DLL01' | vend_id == 'BRS01') %>% 
  arrange(prod_name) %>% 
  select(prod_name,prod_price)
# Source:     SQL [7 x 2]
# Database:   sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# Ordered by: prod_name
  prod_name           prod_price
  <chr>                    <dbl>
1 12 inch teddy bear        8.99
2 18 inch teddy bear       12.0 
3 8 inch teddy bear         5.99
4 Bird bean bag toy         3.49
5 Fish bean bag toy         3.49
6 Rabbit bean bag toy       3.49
7 Raggedy Ann               4.99
# 5.3
tbl(mydb,"Products") %>% 
  filter(!vend_id == 'DLL01') %>% 
  arrange(prod_name) %>% 
  select(prod_name)
# Source:     SQL [5 x 1]
# Database:   sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# Ordered by: prod_name
  prod_name         
  <chr>             
1 12 inch teddy bear
2 18 inch teddy bear
3 8 inch teddy bear 
4 King doll         
5 Queen doll        
tbl(mydb,"Products") %>% 
  filter(vend_id != 'DLL01') %>% 
  arrange(prod_name) %>% 
  select(prod_name)
# Source:     SQL [5 x 1]
# Database:   sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# Ordered by: prod_name
  prod_name         
  <chr>             
1 12 inch teddy bear
2 18 inch teddy bear
3 8 inch teddy bear 
4 King doll         
5 Queen doll        

5.4 练习

  • 请使用SQL和R两种方式,解决课后的挑战题。