HJ結合順へのデータ件数影響

MySQL

(8.0.26)

drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);

drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);

drop table tab3;
create table tab3(
col1 bigint
,col2 bigint
);

drop procedure proc1;

delimiter //
create procedure proc1(in x int)
begin
declare i int;
set i = 0;
start transaction;
while i < x do
set i = i + 1;
insert into tab1 values(
i
,floor(rand() * 10)+1
);
insert into tab2 values(
i
,floor(rand() * 10)+1
);
insert into tab3 values(
i
,floor(rand() * 10)+1
);
end while;
commit;
end
//
delimiter ;

call proc1(1000000);

analyze table tab1;
analyze table tab2;
analyze table tab3;

explain analyze
select /* JOIN_PREFIX(t3,t2,t1) */ count(*)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
inner join tab3 t3
on t1.col1 = t3.col1
where mod(t1.col2,2) = 0
and mod(t2.col2,3) = 0
and mod(t3.col2,7) = 0
;

show warnings;

select count(*) from tab1 t1 where mod(t1.col2,2) = 0;
select count(*) from tab2 t2 where mod(t2.col2,3) = 0;
select count(*) from tab3 t3 where mod(t3.col2,7) = 0;

where条件適用後の件数を確認し、
最も件数が少ないテーブルをビルド表、次の件数が少ないテーブルをプローブ表とする。

今回はt3が最も件数が少ないので、t3がビルド表、次に件数の少ないt2がプローブ表となる。

次に、t3とt2結合後の件数とt1の件数を比較する

select count(*)
from tab2 t2
inner join tab3 t3
on t2.col1 = t3.col1
where mod(t2.col2,3) = 0
and mod(t3.col2,7) = 0
;

t3とt2結合後の件数のほうがt1の件数より少ないので、2段目の結合は
t3とt2結合生成物をビルド表、t1をプローブ表とする、下記結合パターンが最適と予想される
(t3 t2)t1

→ 1.47 sec

他のパターンとの比較

(t3 t1)t2 → 1.48 sec
(t2 t1)t3 → 1.51 sec
(t2 t3)t1 → 1.47 sec
(t1 t2)t3 → 1.55 sec
(t1 t3)t2 → 1.50 sec

想定通り

Oracle

(19c)

drop table tab1 purge;
create table tab1(
col1 int
,col2 int
);

drop table tab2 purge;
create table tab2(
col1 int
,col2 int
);

drop table tab3 purge;
create table tab3(
col1 int
,col2 int
);

declare
begin
for i in 1..1000000 loop
insert into tab1 values(
i
,floor(dbms_random.value(1, 11) )
);

insert into tab2 values(
i
,floor(dbms_random.value(1, 11) )
);
insert into tab3 values(
i
,floor(dbms_random.value(1, 11) )
);
end loop;
end;
/

commit;
set time on
set timing on

exec dbms_stats.gather_table_stats(user,'TAB1');
exec dbms_stats.gather_table_stats(user,'TAB2');
exec dbms_stats.gather_table_stats(user,'TAB3');

explain plan for
select /*+ LEADING(t1 t3 t2) NO_SWAP_JOIN_INPUTS(t2) */ count(*)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
inner join tab3 t3
on t1.col1 = t3.col1
where mod(t1.col2,2) = 0
and mod(t2.col2,3) = 0
and mod(t3.col2,7) = 0
;

select * from table(dbms_xplan.display(format=>'ALL') );

select count(*) from tab1 t1 where mod(t1.col2,2) = 0;
select count(*) from tab2 t2 where mod(t2.col2,3) = 0;
select count(*) from tab3 t3 where mod(t3.col2,7) = 0;

Oracleは結合条件の推移律をサポートしていないので、
可能な結合パターンについて、where条件適用後の件数を確認し、
最も件数が少ないテーブルをビルド表、次の件数が少ないテーブルをプローブ表とする。

今回はt3が最も件数が少ないので、t3がビルド表、結合条件的にプローブ表はt1となる。

次に、t1とt3結合後の件数とt2の件数を比較する

select count(*)
from tab1 t1
inner join tab3 t3
on t1.col1 = t3.col1
where mod(t1.col2,2) = 0
and mod(t3.col2,7) = 0
;

t1とt3結合後の件数のほうがt2の件数より少ないので、2段目の結合は
t1とt3結合生成物をビルド表、t2をプローブ表とする、下記結合パターンが最適と予想される
(t3 t1)t2

→ 経過: 00:00:00.57

比較として、1段目のビルド表/プローブ表を入れ替えた(t1 t3)t2
および2段目のビルド表/プローブ表を入れ替えたt2(t3 t1)の実行時間と比較する
(t1 t3)t2
→ 経過: 00:00:00.61

t2(t3 t1)
→ 経過: 00:00:00.59

想定通り

PostgreSQL

(14)

drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);
drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);

drop table tab3;
create table tab3(
col1 bigint
,col2 bigint
);

start transaction;
insert into tab1 select
g
,floor(random() * 10)+1
from generate_series(1,1000000) g;

commit;

start transaction;
insert into tab2 select
g
,floor(random() * 10)+1
from generate_series(1,1000000) g;

commit;

start transaction;
insert into tab3 select
g
,floor(random() * 10)+1
from generate_series(1,1000000) g;

commit;

\timing 1
\pset pager 0
set pg_hint_plan.debug_print=1;
show pg_hint_plan.debug_print;

analyze tab1;
analyze tab2;
analyze tab3;

/*+ Leading(( (t2 t3) t1 )) */
explain
select count(*)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
inner join tab3 t3
on t1.col1 = t3.col1
where mod(t1.col2,2) = 0
and mod(t2.col2,3) = 0
and mod(t3.col2,7) = 0
;

select count(*) from tab1 t1 where mod(t1.col2,2) = 0;
select count(*) from tab2 t2 where mod(t2.col2,3) = 0;
select count(*) from tab3 t3 where mod(t3.col2,7) = 0;

where条件適用後の件数を確認し、
最も件数が少ないテーブルをビルド表、次の件数が少ないテーブルをプローブ表とする。

今回はt3が最も件数が少ないので、t3がビルド表、次に件数の少ないt2がプローブ表となる。

次に、t3とt2結合後の件数とt1の件数を比較する

select count(*)
from tab2 t2
inner join tab3 t3
on t2.col1 = t3.col1
where mod(t2.col2,3) = 0
and mod(t3.col2,7) = 0
;

t3とt2結合後の件数のほうがt1の件数より少ないので、2段目の結合は
t3とt2結合生成物をビルド表、t1をプローブ表とする、下記結合パターンが最適と予想される
(t3 t2)t1
→ 時間: 306.156 ミリ秒

※ヒント句の指定は逆順になる

比較として、1段目のビルド表/プローブ表を入れ替えた(t2 t3)t1
および2段目のビルド表/プローブ表を入れ替えたt1(t3 t2)の実行時間と比較する
(t2 t3)t1
→ 時間: 348.143 ミリ秒

t1(t3 t2)
→ 時間: 394.774 ミリ秒

想定通り

SQL Server

(2019)

drop table tab1;
create table tab1(
col1 bigint not null
,col2 bigint
);

drop table tab2;
create table tab2(
col1 bigint not null
,col2 bigint
);

drop table tab3;
create table tab3(
col1 bigint not null
,col2 bigint
);

set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
insert into tab1 values(
@i
,floor(rand() * 10)+1
);
insert into tab2 values(
@i
,floor(rand() * 10)+1
);
insert into tab3 values(
@i
,floor(rand() * 10)+1
);
set @i = @i + 1;
end
commit;

update statistics tab1;
update statistics tab2;
update statistics tab3;

select count(*)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
inner join tab3 t3
on t1.col1 = t3.col1
where t1.col2 % 2 = 0
and t2.col2 % 3 = 0
and t3.col2 % 7 = 0
;

select count(*) from tab1 t1 where t1.col2 % 2 = 0;
select count(*) from tab2 t2 where t2.col2 % 3 = 0;
select count(*) from tab3 t3 where t3.col2 % 7 = 0;

where条件適用後の件数を確認し、
最も件数が少ないテーブルをビルド表、次の件数が少ないテーブルをプローブ表とする。

今回はt3が最も件数が少ないので、t3がビルド表、次に件数の少ないt2がプローブ表となる。

次に、t3とt2結合後の件数とt1の件数を比較する

select count(*)
from tab2 t2
inner join tab3 t3
on t1.col1 = t3.col1
where t2.col2 % 3 = 0
and t3.col2 % 7 = 0
;

t3とt2結合後の件数のほうがt1の件数より少ないので、2段目の結合は
t3とt2結合生成物をビルド表、t1をプローブ表とする、下記結合パターンが最適と予想される
(t3 t2)t1
→ 経過時間 = 258 ミリ秒

※SQLを書き換えてFORCE ORDERヒントを適用

select count(*)
from tab3 t3
inner join tab2 t2
on t3.col1 = t2.col1
inner join tab1 t1
on t3.col1 = t1.col1
where t1.col2 % 2 = 0
and t2.col2 % 3 = 0
and t3.col2 % 7 = 0
OPTION (FORCE ORDER ,MAXDOP 1)
;

他のパターンとの比較

(t3 t1)t2 → 経過時間 = 270 ミリ秒
(t2 t1)t3 → 経過時間 = 312 ミリ秒
(t2 t3)t1 → 経過時間 = 287 ミリ秒
(t1 t2)t3 → 経過時間 = 342 ミリ秒
(t1 t3)t2 → 経過時間 = 321 ミリ秒

想定通り