SQLチューニング(結合は件数を絞り込んでから)

データに依存すると思われるが、
今回試したテストケースでは、

SQL Serverは何もしなくてもいい感じの実行計画となった。
Oracleは件数を絞りこまないデフォルトのほうが速かった。

MySQLとPostgreSQLはSQLをいじったほうが速くなった。

MySQL

(8.0.27)

drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);

drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);

drop procedure proc1;

delimiter //
create procedure proc1(in x int)
begin
declare i int;
set i = 0;
start transaction;
while i < x do
set i = i + 1;
insert into tab1 values(
floor(rand() * 100)+1
,i
);
insert into tab2 values(
floor(rand() * 100)+1
,i
);
end while;
commit;
end
//
delimiter ;

call proc1(1000000);

select count(*) from tab1;
select * from tab1 order by rand() limit 20;
alter table tab1 add constraint tab1pk primary key (col1,col2 );

select count(*) from tab2;
select * from tab2 order by rand() limit 20;
alter table tab2 add constraint tab2pk primary key (col1,col2 );

analyze table tab1;
analyze table tab2;

explain
select count(distinct t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→(2.57 sec)

explain
select count(distinct t1.col1)
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→(0.18 sec)★

Oracle

(19c)

https://blogs.oracle.com/otnjp/post/tsushima-hakushi-9

drop table tab1 purge;
create table tab1(
col1 int
,col2 int
);

drop table tab2 purge;
create table tab2(
col1 int
,col2 int
);

declare
begin
for i in 1..1000000 loop
insert into tab1 values(
floor(dbms_random.value(1, 101))
,i
);

insert into tab2 values(
floor(dbms_random.value(1, 101))
,i
);
end loop;
end;
/

commit;

select count(*) from tab1;
select * from tab1 order by dbms_random.value() fetch first 20 rows only;

alter table tab1 add constraint tab1pk primary key (col1,col2 );

select count(*) from tab2;
select * from tab2 order by dbms_random.value() fetch first 20 rows only;

alter table tab2 add constraint tab2pk primary key (col1,col2);

set time on
set timing on

exec dbms_stats.gather_table_stats(user,'TAB1');
exec dbms_stats.gather_table_stats(user,'TAB2');

explain plan for
select count(distinct t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

select * from table(dbms_xplan.display() );

→経過: 00:00:00.02

explain plan for
select /*+ NO_MERGE(t1) */ count(distinct t1.col1)
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

select * from table(dbms_xplan.display() );

→経過: 00:00:00.05

※インライン・ビューが展開されて実行計画が変わらないので
展開されないようにヒント句追加したが、むしろ悪化

PostgreSQL

(14)

drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);
drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);

start transaction;
insert into tab1 select
floor(random() * 100)+1
,g
from generate_series(1,1000000) g;

commit;

start transaction;
insert into tab2 select
floor(random() * 100)+1
,g
from generate_series(1,1000000) g;

commit;

select count(*) from tab1;
select * from tab1 order by random() limit 20;
alter table tab1 add constraint tab1pk primary key (col1,col2 );

select count(*) from tab2;
select * from tab2 order by random() limit 20;
alter table tab2 add constraint tab2pk primary key (col1,col2);

\timing 1

analyze tab1;
analyze tab2;

explain analyze select distinct t1.col1
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→Execution Time: 2270.292 ms

explain analyze select distinct t1.col1
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→Execution Time: 189.508 ms★

SQL Server

(2019)

drop table tab1;
create table tab1(
col1 bigint not null
,col2 bigint not null
);

drop table tab2;
create table tab2(
col1 bigint not null
,col2 bigint not null
);

set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
insert into tab1 values(
floor(rand() * 100)+1
,@i
);
insert into tab2 values(
floor(rand() * 100)+1
,@i
);
set @i = @i + 1;
end
commit;

select count(*) from tab1;
select top 20 * from tab1 order by newid();
alter table tab1 add constraint tab1pk primary key (col1,col2 );

select count(*) from tab2;
select top 20 * from tab2 order by newid();
alter table tab2 add constraint tab2pk primary key (col1,col2 );

set statistics time on

update statistics tab1;
update statistics tab2;

set statistics profile on

select count(distinct t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
;

※自動で最初に重複データ除去される実行計画となる。

→経過時間 = 1637 ミリ秒。

select count(distinct t1.col1)
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
;

set statistics profile off

→経過時間 = 1743 ミリ秒。