15  必知必会第15课:插入数据

作者

黄天元

15.1 数据库的连接

因为本次实验有可能会对原来的数据库造成改变,因此我们先做一个备份,并在实验中连接备份数据库。由于我们要分别使用SQL和R进行演示,因此我们需要做两个数据库备份。

library(pacman)
p_load(tidyverse,DBI,RSQLite,fs,dbplyr)
file_copy(path = "data/TYSQL.sqlite",
          new_path = "data/TYSQL_copy1.sqlite",overwrite = T)
file_copy(path = "data/TYSQL.sqlite",
          new_path = "data/TYSQL_copy2.sqlite",overwrite = T)
c1= dbConnect(RSQLite::SQLite(), "data/TYSQL_copy1.sqlite")
c2= dbConnect(RSQLite::SQLite(), "data/TYSQL_copy2.sqlite")

15.2 基于SQL和tidyverse的数据操作

注意要执行插入操作的时候,我们不是对数据库进行查询,因此不再使用dbGetQuery函数,而是使用dbExecute函数。

# 15.1.1
dbExecute(c1,"INSERT INTO Customers(cust_id,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country,
cust_contact,
cust_email)
VALUES(1000000006,
'Toy Land',
'123 Any Street',
'New York',
'NY',
'11111',
'USA',
NULL,
NULL);")
[1] 1
new_customer <- tibble(
  cust_id = 1000000006L,
  cust_name = 'Toy Land',
  cust_address = '123 Any Street',
  cust_city = 'New York',
  cust_state = 'NY',
  cust_zip = '11111',
  cust_country = 'USA',
  cust_contact = NA,
  cust_email = NA
)
tbl(c2,"Customers") %>% 
  rows_insert(new_customer,conflict = "ignore",copy = T) 
Matching, by = "cust_id"
Warning: Column `cust_id`: mixed type, first seen values of type string,
coercing other values of type integer
# Source:   SQL [6 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy2.sqlite]
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
6 1000000006 Toy Land    123 Any Str… New York  NY         11111    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
tbl(c2,"Customers") %>% 
  collect() %>% 
  print(n = Inf)
# A tibble: 5 × 9
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
tbl(c2,"Customers") %>% 
  rows_insert(new_customer,conflict = "ignore",copy = T,in_place = T) 
Matching, by = "cust_id"
tbl(c2,"Customers") %>% 
  collect() %>% 
  print(n = Inf)
# A tibble: 6 × 9
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
6 1000000006 Toy Land    123 Any Str… New York  NY         11111    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
# 15.1.2
dbExecute(c1,"INSERT INTO Customers(cust_id,
                      cust_name,
                      cust_address,
                      cust_city,
                      cust_state,
                      cust_zip,
                      cust_country)
VALUES(1000000014,
       'Toy Land',
       '123 Any Street',
       'New York',
       'NY',
       '11111',
       'USA');
")
[1] 1
# 创建一个数据框
new_customer <- tibble(
  cust_id = 1000000014L,
  cust_name = 'Toy Land',
  cust_address = '123 Any Street',
  cust_city = 'New York',
  cust_state = 'NY',
  cust_zip = '11111',
  cust_country = 'USA'
)
# 将数据插入到目标表 Customers
rows_insert(tbl(c2, "Customers"), new_customer,
            conflict = "ignore",
            in_place = T,copy = T)
Matching, by = "cust_id"
tbl(c2,"Customers") %>% 
  collect() %>% 
  print(n = Inf)
# A tibble: 7 × 9
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
6 1000000006 Toy Land    123 Any Str… New York  NY         11111    USA         
7 1000000014 Toy Land    123 Any Str… New York  NY         11111    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>

下面这个例子的代码无法正确运行,因为数据库中不存在名为“CustNew”的表,因此这里仅仅进行一个展示:

# 15.1.3
dbExecute(c1,"INSERT INTO Customers(cust_id,
cust_contact,
cust_email,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country)
SELECT cust_id,
cust_contact,
cust_email,
cust_name,
cust_address,
cust_city,
cust_state,
cust_zip,
cust_country
FROM CustNew;")

tbl(c2,"Customers") %>% 
  rows_append(tbl(c2,"CustNew"),in_place = T)
# 15.2

dbExecute(c1,"CREATE TABLE CustCopy AS SELECT * FROM Customers;")
[1] 0
dbListTables(c1)
[1] "CustCopy"   "Customers"  "OrderItems" "Orders"     "Products"  
[6] "Vendors"   
tbl(c1,"CustCopy")
# Source:   table<`CustCopy`> [7 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy1.sqlite]
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
6 1000000006 Toy Land    123 Any Str… New York  NY         11111    USA         
7 1000000014 Toy Land    123 Any Str… New York  NY         11111    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>

在R中有两种方法可以实现相同的操作。一种方法需要先把数据框载入到R环境中(使用collect函数),然后再存入,操作方法如下:

copy_to(dest = c2,name = "CustCopy",df = tbl(c2,"Customers") %>% 
          collect(),overwrite = T)
tbl(c2,"CustCopy")
# Source:   table<`CustCopy`> [7 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy2.sqlite]
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
6 1000000006 Toy Land    123 Any Str… New York  NY         11111    USA         
7 1000000014 Toy Land    123 Any Str… New York  NY         11111    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>
# 先删掉创建的表
dbRemoveTable(c2,"CustCopy")

另一种方法是直接把查询保存到数据库的一个表中,实现方法如下:

tbl(c2, "Customers") %>%
  sql_render()  %>% 
  db_save_query(con = c2,sql = .,name = "CustCopy",temporary = F)
<table_path> `CustCopy`
dbListTables(c2)
 [1] "CustCopy"          "Customers"         "OrderItems"       
 [4] "Orders"            "Products"          "Vendors"          
 [7] "dbplyr_R8DnPKCOk4" "dbplyr_dfK1Y1axzY" "dbplyr_vRpiFA51eq"
[10] "sqlite_stat1"      "sqlite_stat4"     
tbl(c2,"CustCopy")
# Source:   table<`CustCopy`> [7 x 9]
# Database: sqlite 3.46.0 [D:\数据库技术及应用\R4SQL\data\TYSQL_copy2.sqlite]
  cust_id    cust_name   cust_address cust_city cust_state cust_zip cust_country
  <chr>      <chr>       <chr>        <chr>     <chr>      <chr>    <chr>       
1 1000000001 Village To… 200 Maple L… Detroit   MI         44444    USA         
2 1000000002 Kids Place  333 South L… Columbus  OH         43333    USA         
3 1000000003 Fun4All     1 Sunny Pla… Muncie    IN         42222    USA         
4 1000000004 Fun4All     829 Riversi… Phoenix   AZ         88888    USA         
5 1000000005 The Toy St… 4545 53rd S… Chicago   IL         54545    USA         
6 1000000006 Toy Land    123 Any Str… New York  NY         11111    USA         
7 1000000014 Toy Land    123 Any Str… New York  NY         11111    USA         
# ℹ 2 more variables: cust_contact <chr>, cust_email <chr>

15.3 练习

  • 请使用SQL和R两种方式,解决课后的挑战题。

15.4 关闭数据库

这一步,我们会关闭数据库连接。

dbDisconnect(c1)
dbDisconnect(c2)