测试说明:
merge是oracle提供的一种特殊的sql语法,非常适用于数据同步场景,即: (把a表数据插到b表,如果b表存在相同主键的记录则使用a表数据对b表进行更新) 数据同步的常规做法是先尝试插入,插入失败再进行更新,merge比这种常规做法效率高很多。 (特别是a与b表基本一致,同步时主键冲突比较多的情况,效率能相差10倍以上)
为了验证merge效率,我建了两张表,tab_test_c(初始化生成50000条记录)和tab_test_q(初始化从tab_test_c生成40000条记录), 写了两个plsql脚本,分别将tab_test_c的数据同步到tab_test_q,看它们效率区别。
第一个脚本使用merge语法,第二个脚本使用常规先插入,出现主键冲突的操作。
测试结果:
使用merge语法的脚本同步数据耗时0.04秒,使用常规操作耗时14.77秒,效率差369倍
测试脚本:
set serveroutput on -- 启动计时 以便观察脚本执行时间 set timing on set time on
-- 数据初始化
drop table tab_test_c;
create table tab_test_c
(
c1 varchar2(512),
c2 varchar2(512),
c3 varchar2(512),
c4 varchar2(512),
c5 varchar2(512),
c6 varchar2(512),
c7 varchar2(512),
c8 varchar2(512),
c9 varchar2(512),
c10 varchar2(512)
);
declare
v_total number;
begin
v_total := 0;
loop
exit when v_total >= 50000;
for cur in (select owner, object_name, subobject_name, object_id, data_object_id, object_type,
created, last_ddl_time, timestamp from all_objects where rownum < 101)
loop
insert into tab_test_c values (cur.owner, cur.object_name, cur.subobject_name,
cur.object_id, cur.data_object_id,
cur.object_type, cur.created,
cur.last_ddl_time, cur.timestamp, v_total);
v_total := v_total + 1;
end loop;
end loop;
commit;
end;
/
-- 建唯一索引
select count(1) from tab_test_c;
create unique index uid_test_c_1 on tab_test_c(c10);
--初始化tab_test_q表数据,先从tab_test_c生成同步40000条数据,剩下10000条数据使用脚本同步过来 drop table tab_test_q; create table tab_test_q as select * from tab_test_c where rownum < 40001; create unique index uid_test_q_1 on tab_test_q(c10); -- 验证数据未同步成功 此时记录数差1000 select count(*) from tab_test_q;
-- 使用merge语法同步tab_test_c的数据到tab_test_q
declare
cursor cur is select * from tab_test_c;
type mergearray_t is table of tab_test_c % rowtype index by binary_integer;
mergearray mergearray_t;
begin
open cur;
loop
exit when cur % notfound;
fetch cur bulk collect into mergearray limit 16; -- 每次限十几条记录,不要占用太多内存 这个数字调大点效率会更高
begin
forall rw in 1 .. mergearray.count
merge into tab_test_q a
using (select mergearray(rw).c1 c1, mergearray(rw).c2 c2, mergearray(rw).c3 c3, mergearray(rw).c4 c4,
mergearray(rw).c5 c5, mergearray(rw).c6 c6, mergearray(rw).c7 c7, mergearray(rw).c8 c8,
mergearray(rw).c9 c9, mergearray(rw).c10 c10 from dual) b
on (a.c10 = b.c10)
when matched then
update set a.c1 = mergearray(rw).c1, a.c2 = mergearray(rw).c2, a.c3 = mergearray(rw).c3,
a.c4 = mergearray(rw).c4, a.c5 = mergearray(rw).c5,
a.c6 = mergearray(rw).c6, a.c7 = mergearray(rw).c7, a.c8 = mergearray(rw).c8,
a.c9 = mergearray(rw).c9
when not matched then
insert (c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) values(mergearray(rw).c1, mergearray(rw).c2,
mergearray(rw).c3, mergearray(rw).c4, mergearray(rw).c5, mergearray(rw).c6,
mergearray(rw).c7, mergearray(rw).c8, mergearray(rw).c9, mergearray(rw).c10);
-- dbms_output.put_line(mergearray.count);
exception
when others then
dbms_output.put_line('error1');
end;
end loop;
close cur;
commit;
end;
/
--耗时0.04秒 -- 验证数据同步成功 select count(*) from tab_test_q;
--初始化tab_test_q表数据,先从tab_test_c生成同步40000条数据,剩下10000条数据使用脚本同步过来 drop table tab_test_q; create table tab_test_q as select * from tab_test_c where rownum < 40001; create unique index uid_test_q_1 on tab_test_q(c10); -- 验证数据未同步成功 此时记录数差1000 select count(*) from tab_test_q;
-- 使用常规语法同步tab_test_c的数据到tab_test_q
begin
for cur in (select * from tab_test_c)
loop
begin
insert into tab_test_q(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10)
values(cur.c1, cur.c2, cur.c3, cur.c4, cur.c5, cur.c6, cur.c7, cur.c8, cur.c9, cur.c10);
exception
when dup_val_on_index then --唯一索引冲突时更新
update tab_test_q set c1 = cur.c1, c2 = cur.c2, c3 = cur.c3, c4 = cur.c4, c5 = cur.c5, c6 = cur.c6, c7 = cur.c7, c8 = cur.c8, c9 = cur.c9
where c10 = cur.c10;
when others then
dbms_output.put_line('error1');
end;
end loop;
commit;
end;
/
--耗时14.77秒 -- 验证数据同步成功 select count(*) from tab_test_q;