编写SQL需要注意的细节Checklist总结

复制代码 代码如下:

/*

–注意:准备数据(可略过,非常耗时)

create table check1_t1

(

id int,

c1 char(8000)

)

create table check1_t2

(

id int,

c1 char(8000)

)

declare @i int

set @i=1

while @i<=10000

begin

insert into check1_t1 select @i,’c1′

insert into check1_t2 select 10000+@i,’c1′

set @i=@i+1

end

create table check2_t1

(

id int,

c1 char(8000)

)

declare @i int

set @i=1

while @i<=10000

begin

insert into check2_t1 select @i,’c1′

set @i=@i+1

end

insert into check2_t1 values(10001,’c2′)

insert into check2_t1 values(10002,’c1′)

create table check3_t1

(

id int,

c1 char(7000)

)

create table check3_t2

(

id int,

c1 char(7000)

)

declare @i int

set @i=1

while @i<=20000

begin

if @i%2 =0

begin

insert into check3_t1 select @i,’c1′

end

else

begin

insert into check3_t1 select @i,’c2′

end

if @i%100=0

begin

insert into check3_t2 select @i,’c1′

insert into check3_t2 select @i+50000,’c2′

end

set @i=@i+1

end

create table check4_t1

(

id int,

c1 char(500),

)

declare @i int

set @i=1

while @i<=500000

begin

if @i%100000 =0

begin

insert into check4_t1 select @i,’c2′

end

else

begin

insert into check4_t1 select @i,’c1′

end

set @i=@i+1

end

create nonclustered index ncix_c1 on check4_t1(c1)

create table check5_t1

(

id int,

c1 char(10),

)

declare @i int

set @i=1

while @i<=10000

begin

insert into check5_t1 select @i,’c1′

if @i%2=0

begin

insert into check5_t1 select @i,’c1′

end

set @i=@i+1

end

*/

–=====================================

–1、 union all 代替 union

dbcc dropcleanbuffers

dbcc freeproccache

–测试一:(26s) 执行计划:表扫描->排序->合并联接

select id,c1 from check1_t1 –1w条数据

union

select id,c1 from check1_t2 –1w条数据

–测试二: (4s) 执行计划:表扫描->表扫描串联

select id,c1 from check1_t1 –1w条数据

union all

select id,c1 from check1_t2 –1w条数据

–总结:测试一中的union 排序和去重合并是相当耗时的,如果不要此功能,大数据时最好加上all

–=====================================

–2、 exists 代替 count(*)

dbcc dropcleanbuffers

dbcc freeproccache

—-测试一: (7s) 执行计划:表扫描-> 流聚合-> 计算矢量

declare @count int

select @count=count(*) from check2_t1 where c1=’c1′ –1w条数据

if @count>0

begin

print ‘s’

end

—-测试二: (0s) 执行计划:常量扫描/表扫描-> 嵌套循环-> 计算标量

if exists(select 1 from check2_t1 where c1=’c1′) –1w条数据

begin

print ‘s’

end

–总结:判断是否存在,用exist即可,没必要用count(*)将表的所有记录统计出来,扫描一次

–=====================================

–3、 in(select col1 from table)的代替方式

dbcc dropcleanbuffers

dbcc freeproccache

–测试一: (3s)执行计划:表扫描 -> 哈希匹配

select id,c1 from check3_t2 –400行

where id in (select id from check3_t1 where c1=’c1′) –2w行

–测试二:(1s)执行计划:表扫描-> 并行度 -> 位图 -> 排序 -> 合并联接 -> 并行度

select a.id,a.c1 from check3_t2 a

inner join check3_t1 b on a.id=b.id where b.c1=’c1′

–测试三:(3s)执行计划:表扫描-> 哈希匹配

select a.id,a.c1 from check3_t2 a

where exists (select 1 from check3_t1 b where b.id=a.id and b.c1=’c1′)

–总结:能用inner join 尽量用它,sql server在查询时会将关联表进行优化

–=====================================

–4、 not exists 代替 not in

–测试一:(8s) 执行计划:表扫描-> 嵌套循环 -> 哈希匹配

select id,c1 from check3_t1 –2w行

where id not in (select id from check3_t2 where c1=’c1′) –400行

–测试二:(4s) 执行计划:表扫描-> 哈希匹配

select a.id,a.c1 from check3_t1 a

where not exists (select 1 from check3_t2 b where b.id=a.id and b.c1=’c1′)

–总结:尽量不使用not in ,因为会调用嵌套循环,建议使用not exists代替not in

–=====================================

–5、 避免在条件列上使用任何函数

drop table check4_t1

create nonclustered index ncix_c1 on check4_t1(c1) –加上非聚集索引

—测试一:(4s)执行计划: 索引扫描

select * from check4_t1 where rtrim(c1)=’c2′

—测试二:(0s)执行计划: 索引查找

select * from check4_t1 where c1=’c2′

–总结:where条件里对索引字段使用了函数,会使索引查找变成索引扫描,从而查询效率大幅下降

–=====================================

–6、 用sp_executesql执行动态sql

dbcc dropcleanbuffers

dbcc freeproccache

create proc up_check5_t1 (

@id int

)

as

set nocount on

declare @count int,

@sql nvarchar(4000)

set @sql = ‘select @count=count(*) from check5_t1 where id = @id’

exec sp_executesql @sql,

n’@count int output, @id int’,

@count output,

@id

print @count

create proc up_check5_t2 (

@id int

)

as

set nocount on

declare @sql nvarchar(4000)

set @sql = ‘declare @count int;select @count=count(*) from check5_t1 where id = ‘ + cast(@id as varchar(10)) + ‘;print @count’

exec(@sql)

—测试一:瞬时

declare @n int

set @n=1

while @n<=1000

begin

exec up_check5_t1 @n

set @n=@n+1

end

—测试二:2s

declare @n int

set @n=1

while @n<=1000

begin

exec up_check5_t2 @n

set @n=@n+1

end

create clustered index cix_id on check5_t1(id)

dbcc dropcleanbuffers

dbcc freeproccache

–查看缓存计划

select a.size_in_bytes ‘占用字节数’,

total_elapsed_time / execution_count ‘平均时间’,

total_logical_reads / execution_count ‘逻辑读’,

usecounts ‘重用次数’,

substring(d.text, (statement_start_offset / 2) + 1, ((case statement_end_offset

when -1 then datalength(text)

else statement_end_offset

end – statement_start_offset) / 2) + 1) ‘语句’

from sys.dm_exec_cached_plans a

cross apply sys.dm_exec_query_plan(a.plan_handle) c,

sys.dm_exec_query_stats b

cross apply sys.dm_exec_sql_text(b.sql_handle) d

where a.plan_handle = b.plan_handle

order by total_elapsed_time / execution_count desc;

–总结:通过执行下面缓存计划可以看出,第一种完全使用了缓存计划,查询达到了很好的效果;

–而第二种则将缓存计划浪费了,导致缓存很快被占满,这种做法是相当不可取的

–=====================================

–7、 left join 的替代法

–测试一 执行计划:表扫描 -> 哈希匹配

select a.id,a.c1 from check3_t1 a –2w行

left join check3_t2 b on a.id=b.id where b.c1=’c1′ –400行

–测试二 执行计划:表扫描 -> 哈希匹配

select a.id,a.c1 from check3_t1 a

right join check3_t2 b on a.id=b.id where a.c1=’c1′

–测试三 执行计划:表扫描 -> 哈希匹配

select a.id,a.c1 from check3_t1 a

inner join check3_t2 b on a.id=b.id where b.c1=’c1′

–总结:三条语句,在执行计划上完全一样,都是走的inner join的计划,

–因为测试一和测试二中,where语句都包含了left 和right表的字段,sqlserver若发现只要有这个表的字段,则会自动按照inner join进行处理

–补充测试:(1s)执行计划:表扫描-> 并行度 -> 位图 -> 排序 -> 合并联接 -> 并行度

select a.id,a.c1 from check3_t2 a –400行

inner join check3_t1 b on a.id=b.id where a.c1=’c1′ –2w行

–总结:这里有一个比较有趣的地方,若主表和关联表数据差别很大时,走的执行计划走的另一条路

–=====================================

–8、 on(a.id=b.id and a.tag=3)

–测试一

select a.id,a.c1 from check3_t1 a

inner join check3_t2 b on a.id=b.id and a.c1=’c1′

–测试二

select a.id,a.c1 from check3_t1 a

inner join check3_t2 b on a.id=b.id where a.c1=’c1′

–总结:内连接:无论是左表和右表的筛选条件都可以放到where子句中

–测试一

select a.id,a.c1,b.c1 from check3_t1 a

left join check3_t2 b on a.id=b.id and b.c1=’c1′

–测试二

select a.id,a.c1,b.c1 from check3_t1 a

left join check3_t2 b on a.id=b.id where b.c1=’c1′

–总结:左外连接:当右表中的过滤条件放入on子句后和where子句后的结果不一样

–=====================================

–9、 赋值给变量,加top 1

–测试一:(3s) 执行计划:表扫描

declare @id int

select @id=id from check1_t1 where c1=’c1′

select @id

–测试二:(0s)执行计划:表扫描-> 前几行

declare @id int

select top 1 @id=id from check1_t1 where c1=’c1′

select @id

–总结:给变量赋值最好都加上top 1,一从查询效率上增强,二为了准确性,若表check1_t1有多个值,则会取最后一条记录赋给@id

–=====================================

–10、 考虑是否适合用case语句

declare @s int=1

select * from check5_t1

where c1=(case @s when 1 then c1 else ‘c2’ end)

select * from check5_t1

where @s=1 or c1=’c2′

/*–=====================================

、检查语句是否需要distinct. 执行计划:表扫描-> 哈希匹配-> 并行度-> 排序

select distinct c1 from check3_t1

、禁用select *,指定具体列名

select c1 from check4_t1

select * from check4_t1

、insert into table(*),指定具体的列名

、isnull,没有必要的时候不要对字段使用isnull,同样会产生无法有效利用索引的问题,

和避免在筛选列上使用函数同样的原理。

、嵌套子查询,加上查询条件,确保子查询的结果集最小

–=====================================*/

(0)
上一篇 2022年3月21日
下一篇 2022年3月21日

相关推荐