4  必知必会第4课:过滤数据

作者

黄天元

4.1 数据库的连接

library(pacman)
p_load(tidyverse,DBI,RSQLite)

mydb = dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite")

4.2 基于SQL的数据操作

# 4.1
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price = 3.49;") 
            prod_name prod_price
1   Fish bean bag toy       3.49
2   Bird bean bag toy       3.49
3 Rabbit bean bag toy       3.49
# 4.2.1
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price < 10;") 
            prod_name prod_price
1   8 inch teddy bear       5.99
2  12 inch teddy bear       8.99
3   Fish bean bag toy       3.49
4   Bird bean bag toy       3.49
5 Rabbit bean bag toy       3.49
6         Raggedy Ann       4.99
7           King doll       9.49
8          Queen doll       9.49
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price <= 10;")
            prod_name prod_price
1   8 inch teddy bear       5.99
2  12 inch teddy bear       8.99
3   Fish bean bag toy       3.49
4   Bird bean bag toy       3.49
5 Rabbit bean bag toy       3.49
6         Raggedy Ann       4.99
7           King doll       9.49
8          Queen doll       9.49
# 4.2.2
dbGetQuery(mydb,"SELECT vend_id, prod_name
FROM Products
WHERE vend_id <> 'DLL01';")
  vend_id          prod_name
1   BRS01  8 inch teddy bear
2   BRS01 12 inch teddy bear
3   BRS01 18 inch teddy bear
4   FNG01          King doll
5   FNG01         Queen doll
dbGetQuery(mydb,"SELECT vend_id, prod_name
FROM Products
WHERE vend_id != 'DLL01';")
  vend_id          prod_name
1   BRS01  8 inch teddy bear
2   BRS01 12 inch teddy bear
3   BRS01 18 inch teddy bear
4   FNG01          King doll
5   FNG01         Queen doll
# 4.2.3
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price BETWEEN 5 AND 10;")
           prod_name prod_price
1  8 inch teddy bear       5.99
2 12 inch teddy bear       8.99
3          King doll       9.49
4         Queen doll       9.49
# 4.2.4
dbGetQuery(mydb,"SELECT prod_name
FROM Products
WHERE prod_price IS NULL;")
[1] prod_name
<0 rows> (or 0-length row.names)
dbGetQuery(mydb,"SELECT cust_name
FROM Customers
WHERE cust_email IS NULL;")
   cust_name
1 Kids Place
2   Toy Land

4.3 基于tidyverse的数据操作

# 4.1
tbl(mydb,"Products") %>% 
  filter(prod_price == 3.49) %>% 
  select(prod_name, prod_price)
# Source:   SQL [3 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name           prod_price
  <chr>                    <dbl>
1 Fish bean bag toy         3.49
2 Bird bean bag toy         3.49
3 Rabbit bean bag toy       3.49
# 4.2.1
tbl(mydb,"Products") %>% 
  filter(prod_price < 10) %>% 
  select(prod_name, prod_price)
# Source:   SQL [8 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name           prod_price
  <chr>                    <dbl>
1 8 inch teddy bear         5.99
2 12 inch teddy bear        8.99
3 Fish bean bag toy         3.49
4 Bird bean bag toy         3.49
5 Rabbit bean bag toy       3.49
6 Raggedy Ann               4.99
7 King doll                 9.49
8 Queen doll                9.49
tbl(mydb,"Products") %>% 
  filter(prod_price <= 10) %>% 
  select(prod_name, prod_price)
# Source:   SQL [8 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name           prod_price
  <chr>                    <dbl>
1 8 inch teddy bear         5.99
2 12 inch teddy bear        8.99
3 Fish bean bag toy         3.49
4 Bird bean bag toy         3.49
5 Rabbit bean bag toy       3.49
6 Raggedy Ann               4.99
7 King doll                 9.49
8 Queen doll                9.49
# 4.2.2
tbl(mydb,"Products") %>% 
  filter(vend_id != 'DLL01') %>% 
  select(vend_id, prod_name)
# Source:   SQL [5 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  vend_id prod_name         
  <chr>   <chr>             
1 BRS01   8 inch teddy bear 
2 BRS01   12 inch teddy bear
3 BRS01   18 inch teddy bear
4 FNG01   King doll         
5 FNG01   Queen doll        
# 4.2.3
tbl(mydb,"Products") %>% 
  filter(between(prod_price,5,10)) %>% 
  select(prod_name, prod_price)
# Source:   SQL [4 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  prod_name          prod_price
  <chr>                   <dbl>
1 8 inch teddy bear        5.99
2 12 inch teddy bear       8.99
3 King doll                9.49
4 Queen doll               9.49
# 4.2.4
tbl(mydb,"Products") %>% 
  filter(is.na(prod_price)) %>% 
  select(prod_name)
# Source:   SQL [0 x 1]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# ℹ 1 variable: prod_name <chr>
tbl(mydb,"Customers") %>% 
  filter(is.na(cust_email)) %>% 
  select(cust_name)
# Source:   SQL [2 x 1]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
  cust_name 
  <chr>     
1 Kids Place
2 Toy Land  

4.4 练习

  • 请使用SQL和R两种方式,解决课后的挑战题。