library(pacman)
p_load(tidyverse,DBI,RSQLite)
= dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite") mydb
4 必知必会第4课:过滤数据
4.1 数据库的连接
4.2 基于SQL的数据操作
# 4.1
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price = 3.49;")
prod_name prod_price
1 Fish bean bag toy 3.49
2 Bird bean bag toy 3.49
3 Rabbit bean bag toy 3.49
# 4.2.1
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price < 10;")
prod_name prod_price
1 8 inch teddy bear 5.99
2 12 inch teddy bear 8.99
3 Fish bean bag toy 3.49
4 Bird bean bag toy 3.49
5 Rabbit bean bag toy 3.49
6 Raggedy Ann 4.99
7 King doll 9.49
8 Queen doll 9.49
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price <= 10;")
prod_name prod_price
1 8 inch teddy bear 5.99
2 12 inch teddy bear 8.99
3 Fish bean bag toy 3.49
4 Bird bean bag toy 3.49
5 Rabbit bean bag toy 3.49
6 Raggedy Ann 4.99
7 King doll 9.49
8 Queen doll 9.49
# 4.2.2
dbGetQuery(mydb,"SELECT vend_id, prod_name
FROM Products
WHERE vend_id <> 'DLL01';")
vend_id prod_name
1 BRS01 8 inch teddy bear
2 BRS01 12 inch teddy bear
3 BRS01 18 inch teddy bear
4 FNG01 King doll
5 FNG01 Queen doll
dbGetQuery(mydb,"SELECT vend_id, prod_name
FROM Products
WHERE vend_id != 'DLL01';")
vend_id prod_name
1 BRS01 8 inch teddy bear
2 BRS01 12 inch teddy bear
3 BRS01 18 inch teddy bear
4 FNG01 King doll
5 FNG01 Queen doll
# 4.2.3
dbGetQuery(mydb,"SELECT prod_name, prod_price
FROM Products
WHERE prod_price BETWEEN 5 AND 10;")
prod_name prod_price
1 8 inch teddy bear 5.99
2 12 inch teddy bear 8.99
3 King doll 9.49
4 Queen doll 9.49
# 4.2.4
dbGetQuery(mydb,"SELECT prod_name
FROM Products
WHERE prod_price IS NULL;")
[1] prod_name
<0 rows> (or 0-length row.names)
dbGetQuery(mydb,"SELECT cust_name
FROM Customers
WHERE cust_email IS NULL;")
cust_name
1 Kids Place
2 Toy Land
4.3 基于tidyverse的数据操作
# 4.1
tbl(mydb,"Products") %>%
filter(prod_price == 3.49) %>%
select(prod_name, prod_price)
# Source: SQL [3 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
prod_name prod_price
<chr> <dbl>
1 Fish bean bag toy 3.49
2 Bird bean bag toy 3.49
3 Rabbit bean bag toy 3.49
# 4.2.1
tbl(mydb,"Products") %>%
filter(prod_price < 10) %>%
select(prod_name, prod_price)
# Source: SQL [8 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
prod_name prod_price
<chr> <dbl>
1 8 inch teddy bear 5.99
2 12 inch teddy bear 8.99
3 Fish bean bag toy 3.49
4 Bird bean bag toy 3.49
5 Rabbit bean bag toy 3.49
6 Raggedy Ann 4.99
7 King doll 9.49
8 Queen doll 9.49
tbl(mydb,"Products") %>%
filter(prod_price <= 10) %>%
select(prod_name, prod_price)
# Source: SQL [8 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
prod_name prod_price
<chr> <dbl>
1 8 inch teddy bear 5.99
2 12 inch teddy bear 8.99
3 Fish bean bag toy 3.49
4 Bird bean bag toy 3.49
5 Rabbit bean bag toy 3.49
6 Raggedy Ann 4.99
7 King doll 9.49
8 Queen doll 9.49
# 4.2.2
tbl(mydb,"Products") %>%
filter(vend_id != 'DLL01') %>%
select(vend_id, prod_name)
# Source: SQL [5 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
vend_id prod_name
<chr> <chr>
1 BRS01 8 inch teddy bear
2 BRS01 12 inch teddy bear
3 BRS01 18 inch teddy bear
4 FNG01 King doll
5 FNG01 Queen doll
# 4.2.3
tbl(mydb,"Products") %>%
filter(between(prod_price,5,10)) %>%
select(prod_name, prod_price)
# Source: SQL [4 x 2]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
prod_name prod_price
<chr> <dbl>
1 8 inch teddy bear 5.99
2 12 inch teddy bear 8.99
3 King doll 9.49
4 Queen doll 9.49
# 4.2.4
tbl(mydb,"Products") %>%
filter(is.na(prod_price)) %>%
select(prod_name)
# Source: SQL [0 x 1]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
# ℹ 1 variable: prod_name <chr>
tbl(mydb,"Customers") %>%
filter(is.na(cust_email)) %>%
select(cust_name)
# Source: SQL [2 x 1]
# Database: sqlite 3.46.0 [F:\Data\R4SQL\data\TYSQL.sqlite]
cust_name
<chr>
1 Kids Place
2 Toy Land
4.4 练习
- 请使用SQL和R两种方式,解决课后的挑战题。