library(pacman)
p_load(tidyverse,DBI,RSQLite,fs,dbplyr)
file_copy(path = "data/TYSQL.sqlite",
new_path = "data/TYSQL_copy1.sqlite",overwrite = T)
file_copy(path = "data/TYSQL.sqlite",
new_path = "data/TYSQL_copy2.sqlite",overwrite = T)
= dbConnect(RSQLite::SQLite(), "data/TYSQL_copy1.sqlite")
c1= dbConnect(RSQLite::SQLite(), "data/TYSQL_copy2.sqlite") c2
15 必知必会第15课:插入数据
15.1 数据库的连接
因为本次实验有可能会对原来的数据库造成改变,因此我们先做一个备份,并在实验中连接备份数据库。由于我们要分别使用SQL和R进行演示,因此我们需要做两个数据库备份。
15.2 基于SQL和tidyverse的数据操作
注意要执行插入操作的时候,我们不是对数据库进行查询,因此不再使用dbGetQuery
函数,而是使用dbExecute
函数。
# 15.1.1
dbExecute(c1,"INSERT INTO Customers(cust_id,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country,
cust_contact,
cust_email)
VALUES(1000000006,
'Toy Land',
'123 Any Street',
'New York',
'NY',
'11111',
'USA',
NULL,
NULL);")
[1] 1
<- tibble(
new_customer cust_id = 1000000006L,
cust_name = 'Toy Land',
cust_address = '123 Any Street',
cust_city = 'New York',
cust_state = 'NY',
cust_zip = '11111',
cust_country = 'USA',
cust_contact = NA,
cust_email = NA
)tbl(c2,"Customers") %>%
rows_insert(new_customer,conflict = "ignore",copy = T)
Matching, by = "cust_id"
Warning: Column `cust_id`: mixed type, first seen values of type string,
coercing other values of type integer
# Source: SQL [6 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy2.sqlite]
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
6 1000000006 Toy Land 123 Any Str… New York NY 11111 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
tbl(c2,"Customers") %>%
collect() %>%
print(n = Inf)
# A tibble: 5 × 9
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
tbl(c2,"Customers") %>%
rows_insert(new_customer,conflict = "ignore",copy = T,in_place = T)
Matching, by = "cust_id"
tbl(c2,"Customers") %>%
collect() %>%
print(n = Inf)
# A tibble: 6 × 9
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
6 1000000006 Toy Land 123 Any Str… New York NY 11111 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
# 15.1.2
dbExecute(c1,"INSERT INTO Customers(cust_id,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country)
VALUES(1000000014,
'Toy Land',
'123 Any Street',
'New York',
'NY',
'11111',
'USA');
")
[1] 1
# 创建一个数据框
<- tibble(
new_customer cust_id = 1000000014L,
cust_name = 'Toy Land',
cust_address = '123 Any Street',
cust_city = 'New York',
cust_state = 'NY',
cust_zip = '11111',
cust_country = 'USA'
)# 将数据插入到目标表 Customers
rows_insert(tbl(c2, "Customers"), new_customer,
conflict = "ignore",
in_place = T,copy = T)
Matching, by = "cust_id"
tbl(c2,"Customers") %>%
collect() %>%
print(n = Inf)
# A tibble: 7 × 9
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
6 1000000006 Toy Land 123 Any Str… New York NY 11111 USA
7 1000000014 Toy Land 123 Any Str… New York NY 11111 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
下面这个例子的代码无法正确运行,因为数据库中不存在名为“CustNew”的表,因此这里仅仅进行一个展示:
# 15.1.3
dbExecute(c1,"INSERT INTO Customers(cust_id,
cust_contact,
cust_email,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country)
SELECT cust_id,
cust_contact,
cust_email,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country
FROM CustNew;")
tbl(c2,"Customers") %>%
rows_append(tbl(c2,"CustNew"),in_place = T)
# 15.2
dbExecute(c1,"CREATE TABLE CustCopy AS SELECT * FROM Customers;")
[1] 0
dbListTables(c1)
[1] "CustCopy" "Customers" "OrderItems" "Orders" "Products"
[6] "Vendors"
tbl(c1,"CustCopy")
# Source: table<`CustCopy`> [7 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy1.sqlite]
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
6 1000000006 Toy Land 123 Any Str… New York NY 11111 USA
7 1000000014 Toy Land 123 Any Str… New York NY 11111 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
在R中有两种方法可以实现相同的操作。一种方法需要先把数据框载入到R环境中(使用collect
函数),然后再存入,操作方法如下:
copy_to(dest = c2,name = "CustCopy",df = tbl(c2,"Customers") %>%
collect(),overwrite = T)
tbl(c2,"CustCopy")
# Source: table<`CustCopy`> [7 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy2.sqlite]
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
6 1000000006 Toy Land 123 Any Str… New York NY 11111 USA
7 1000000014 Toy Land 123 Any Str… New York NY 11111 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
# 先删掉创建的表
dbRemoveTable(c2,"CustCopy")
另一种方法是直接把查询保存到数据库的一个表中,实现方法如下:
tbl(c2, "Customers") %>%
sql_render() %>%
db_save_query(con = c2,sql = .,name = "CustCopy",temporary = F)
<table_path> `CustCopy`
dbListTables(c2)
[1] "CustCopy" "Customers" "OrderItems"
[4] "Orders" "Products" "Vendors"
[7] "dbplyr_R8DnPKCOk4" "dbplyr_dfK1Y1axzY" "dbplyr_vRpiFA51eq"
[10] "sqlite_stat1" "sqlite_stat4"
tbl(c2,"CustCopy")
# Source: table<`CustCopy`> [7 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy2.sqlite]
cust_id cust_name cust_address cust_city cust_state cust_zip cust_country
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1000000001 Village To… 200 Maple L… Detroit MI 44444 USA
2 1000000002 Kids Place 333 South L… Columbus OH 43333 USA
3 1000000003 Fun4All 1 Sunny Pla… Muncie IN 42222 USA
4 1000000004 Fun4All 829 Riversi… Phoenix AZ 88888 USA
5 1000000005 The Toy St… 4545 53rd S… Chicago IL 54545 USA
6 1000000006 Toy Land 123 Any Str… New York NY 11111 USA
7 1000000014 Toy Land 123 Any Str… New York NY 11111 USA
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
15.3 练习
- 请使用SQL和R两种方式,解决课后的挑战题。
15.4 关闭数据库
这一步,我们会关闭数据库连接。
dbDisconnect(c1)
dbDisconnect(c2)