SQLチューニング(結合は件数を絞り込んでから)

データに依存すると思われるが、
今回試したテストケースでは、

SQL Serverは何もしなくてもいい感じの実行計画となった。
Oracleは件数を絞りこまないデフォルトのほうが速かった。

MySQLPostgreSQLSQLをいじったほうが速くなった。

(8.0.27)


drop table tab1;
create table tab1(
    col1 bigint 
   ,col2 bigint
   );

drop table tab2;
create table tab2(
    col1 bigint 
   ,col2 bigint
   );

drop procedure proc1;

delimiter //
create procedure proc1(in x int)
begin
  declare i int;
  set i = 0;
  start transaction;
  while i < x do
    set i = i + 1;
    insert into tab1 values(
     floor(rand() * 100)+1
    ,i
    );
    insert into tab2 values(
     floor(rand() * 100)+1
    ,i
    );
  end while;
  commit;
end
//
delimiter ;

call proc1(1000000);

select count(*) from tab1;
select * from tab1 order by rand() limit 20;
alter table tab1 add constraint tab1pk primary key (col1,col2 );

select count(*) from tab2;
select * from tab2 order by rand() limit 20;
alter table tab2 add constraint tab2pk primary key (col1,col2 );


analyze table tab1;
analyze table tab2;

 

explain 
select count(distinct t1.col1)
from tab1 t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→(2.57 sec)

explain 
select count(distinct t1.col1)
from ( select distinct col1 from tab1) t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→(0.18 sec)★

 

 


(19c)

https://blogs.oracle.com/otnjp/post/tsushima-hakushi-9

 

drop table tab1 purge;
create table tab1(
    col1 int 
   ,col2 int
   );

drop table tab2 purge;
create table tab2(
    col1 int 
   ,col2 int
   );

declare
begin
for i in 1..1000000 loop
  insert into tab1 values(
   floor(dbms_random.value(1, 101))
  ,i
   );
  
  insert into tab2 values(
   floor(dbms_random.value(1, 101))
  ,i
   );
end loop;
end;
/

commit;

 

select count(*) from tab1;
select * from tab1 order by dbms_random.value()  fetch first 20 rows only;

alter table tab1 add constraint tab1pk primary key (col1,col2 );


select count(*) from tab2;
select * from tab2 order by dbms_random.value()  fetch first 20 rows only;

alter table tab2 add constraint tab2pk primary key (col1,col2);

set time on
set timing on


exec dbms_stats.gather_table_stats(user,'TAB1');
exec dbms_stats.gather_table_stats(user,'TAB2');

 

explain plan for
select count(distinct t1.col1)
from tab1 t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

select * from table(dbms_xplan.display() );

→経過: 00:00:00.02

explain plan for
select /*+ NO_MERGE(t1) */ count(distinct t1.col1)
from ( select distinct col1 from tab1) t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

select * from table(dbms_xplan.display() );

→経過: 00:00:00.05

※インライン・ビューが展開されて実行計画が変わらないので
展開されないようにヒント句追加したが、むしろ悪化

 


(14)


drop table tab1;
create table tab1(
    col1 bigint 
   ,col2 bigint
   );
drop table tab2;
create table tab2(
    col1 bigint 
   ,col2 bigint
   );

start transaction;
insert into tab1 select
   floor(random() * 100)+1
  ,g
from generate_series(1,1000000) g;

commit;

start transaction;
insert into tab2 select
   floor(random() * 100)+1
  ,g
from generate_series(1,1000000) g;

commit;

select count(*) from tab1;
select * from tab1 order by random() limit 20;
alter table tab1 add constraint tab1pk primary key (col1,col2 );

select count(*) from tab2;
select * from tab2 order by random() limit 20;
alter table tab2 add constraint tab2pk primary key (col1,col2);

\timing 1

analyze tab1;
analyze tab2;


explain analyze select distinct t1.col1
from tab1 t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→Execution Time: 2270.292 ms

explain analyze select distinct t1.col1
from ( select distinct col1 from tab1) t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;

→Execution Time: 189.508 ms★

 


(2019)

 


drop table tab1;
create table tab1(
    col1 bigint not null
   ,col2 bigint not null
   );

drop table tab2;
create table tab2(
    col1 bigint not null
   ,col2 bigint not null
   );

set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
  insert into tab1 values(
     floor(rand() * 100)+1
    ,@i
    );
   insert into tab2 values(
     floor(rand() * 100)+1
    ,@i
    );
 set @i = @i + 1;
end
commit;


select count(*) from tab1;
select top 20 * from tab1 order by newid();
alter table tab1 add constraint tab1pk primary key (col1,col2 );


select count(*) from tab2;
select top 20 * from tab2 order by newid();
alter table tab2 add constraint tab2pk primary key (col1,col2 );

set statistics time on


update statistics tab1;
update statistics tab2;

 

set statistics profile on

select count(distinct t1.col1)
from tab1 t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
;


※自動で最初に重複データ除去される実行計画となる。

→経過時間 = 1637 ミリ秒。

 

select count(distinct t1.col1)
from ( select distinct col1 from tab1) t1 
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
;

set statistics profile off

→経過時間 = 1743 ミリ秒。