HJ/NLの切替件数確認 - HTN20190109の日記

MySQL

(8.0.26)

処理対象件数が下記パターンで
(1千,1千)
(1万,1万)
(10万,10万)
(100万,100万)

HJ = Hash join(Full Scan/Full Scan)
NL = Nested loop(Full Scan/Index Lookup)
を比較

drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);

drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);

drop procedure proc1;

delimiter //
create procedure proc1(in x int)
begin
declare i int;
set i = 0;
start transaction;
while i < x do
set i = i + 1;
insert into tab1 values(
i
,floor(rand() * 1000000)+1
);
insert into tab2 values(
i
,floor(rand() * 1000000)+1
);
end while;
commit;
end
//
delimiter ;

call proc1(1000000);

select count(*) from tab1;
select * from tab1 order by rand() limit 20;
alter table tab1 add constraint tab1pk primary key (col1);
create index ind12 on tab1(col2);

select count(*) from tab2;
select * from tab2 order by rand() limit 20;
alter table tab2 add constraint tab2pk primary key (col1);
create index ind22 on tab2(col2);

analyze table tab1;
analyze table tab2;

explain analyze
select /*+ JOIN_PREFIX(t1,t2) NO_INDEX(t1) INDEX(t2) */ count(t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col2 = t2.col2
where t1.col1 <= 1000000
and t2.col1 <= 1000000
;

show warnings;

NL /*+ JOIN_PREFIX(t1,t2) NO_INDEX(t1) INDEX(t2) */
(1千,1千) → 0.17 sec
(1万,1万) → 0.20 sec
(10万,10万) → 0.43 sec
(100万,100万) → 2.75 sec

HJ /*+ JOIN_PREFIX(t1,t2) NO_JOIN_INDEX(t1) NO_JOIN_INDEX(t2) */
(1千,1千) → 0.27 sec
(1万,1万) → 0.29 sec
(10万,10万) → 0.33 sec
(100万,100万) → 0.88 sec

1万～10万件程度が分岐点

Oracle

(19c)

処理対象件数が下記パターンで
(1千,1千)
(1万,1万)
(10万,10万)
(100万,100万)

HJ = Hash join(Full Scan/Full Scan)
NL = Nested loop(Full Scan/Index Lookup)
を比較

drop table tab1 purge;
create table tab1(
col1 int
,col2 int
);

drop table tab2 purge;
create table tab2(
col1 int
,col2 int
);

declare
begin
for i in 1..1000000 loop
insert into tab1 values(
i
,floor(dbms_random.value(1, 1000001) )
);

insert into tab2 values(
i
,floor(dbms_random.value(1, 1000001) )
);
end loop;
end;
/

commit;

select count(*) from tab1;
select * from tab1 order by dbms_random.value() fetch first 20 rows only;
alter table tab1 add constraint tab1pk primary key (col1);
create index ind12 on tab1(col2);

select count(*) from tab2;
select * from tab2 order by dbms_random.value() fetch first 20 rows only;
alter table tab2 add constraint tab2pk primary key (col1);
create index ind22 on tab2(col2);

set time on
set timing on

exec dbms_stats.gather_table_stats(user,'TAB1');
exec dbms_stats.gather_table_stats(user,'TAB2');

explain plan for
select /*+ LEADING(t1 t2) USE_NL(t2) FULL(t1) INDEX(t2) */ count(t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col2 = t2.col2
where t1.col1 <= 1000
and t2.col1 <= 1000
;

select * from table(dbms_xplan.display(format=>'ALL') );

NL /*+ LEADING(t1 t2) USE_NL(t2) FULL(t1) INDEX(t2) */
(1千,1千) → 経過: 00:00:00.01
(1万,1万) → 経過: 00:00:00.03
(10万,10万) → 経過: 00:00:00.27
(100万,100万) → 経過: 00:00:02.49

HJ /*+ LEADING(t1 t2) USE_HASH(t2) FULL(t1) FULL(t2) */
(1千,1千) → 経過: 00:00:00.02
(1万,1万) → 経過: 00:00:00.03
(10万,10万) → 経過: 00:00:00.04
(100万,100万) → 経過: 00:00:00.26

1万件程度が分岐点

PostgreSQL

(14)
処理対象件数が下記パターンで
(1千,1千)
(1万,1万)
(10万,10万)
(100万,100万)

HJ = Hash join(Full Scan/Full Scan)
NL = Nested loop(Full Scan/Index Lookup)
を比較

drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);
drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);

start transaction;
insert into tab1 select
g
,floor(random() * 1000000)+1
from generate_series(1,1000000) g;

commit;

start transaction;
insert into tab2 select
g
,floor(random() * 1000000)+1
from generate_series(1,1000000) g;

commit;

select count(*) from tab1;
select * from tab1 order by random() limit 20;
alter table tab1 add constraint tab1pk primary key (col1);
create index ind12 on tab1(col2);

select count(*) from tab2;
select * from tab2 order by random() limit 20;
alter table tab2 add constraint tab2pk primary key (col1);
create index ind22 on tab2(col2);

\timing 1
\pset pager 0
set pg_hint_plan.debug_print = on;

analyze tab1;
analyze tab2;

/*+ Leading(( t1 t2 )) NestLoop(t1 t2) SeqScan(t1) IndexScan(t2) */
explain analyze
select count(t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col2 = t2.col2
where t1.col1 <= 1000
and t2.col1 <= 1000
;

/*+ Leading(( t1 t2 )) NestLoop(t1 t2) SeqScan(t1) IndexScan(t2) */
select count(t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col2 = t2.col2
where t1.col1 <= 1000000
and t2.col1 <= 1000000
;

NL /*+ Leading(( t1 t2 )) NestLoop(t1 t2) SeqScan(t1) IndexScan(t2) */
(1千,1千) → 時間: 57.663 ミリ秒
(1万,1万) → 時間: 82.581 ミリ秒
(10万,10万) → 時間: 285.108 ミリ秒
(100万,100万) → 時間: 2386.081 ミリ秒

HJ /*+ Leading(( t2 t1 )) HashJoin(t2 t1) SeqScan(t1) SeqScan(t2) */
(1千,1千) → 時間: 106.021 ミリ秒
(1万,1万) → 時間: 107.487 ミリ秒
(10万,10万) → 時間: 137.085 ミリ秒
(100万,100万) → 時間: 575.687 ミリ秒

1万～10万件程度が分岐点

SQL Server

(2019)

処理対象件数が下記パターンで
(1千,1千)
(1万,1万)
(10万,10万)
(100万,100万)

HJ = Hash join(Full Scan/Full Scan)
NL = Nested loop(Full Scan/Index Lookup)
を比較

drop table tab1;
create table tab1(
col1 bigint not null
,col2 bigint
);

drop table tab2;
create table tab2(
col1 bigint not null
,col2 bigint
);

set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
insert into tab1 values(
@i
,floor(rand() * 1000000)+1
);
insert into tab2 values(
@i
,floor(rand() * 1000000)+1
);
set @i = @i + 1;
end
commit;

select count(*) from tab1;
select top 20 * from tab1 order by newid();
alter table tab1 add constraint tab1pk primary key (col1);
create index ind12 on tab1(col2);

select count(*) from tab2;
select top 20 * from tab2 order by newid();
alter table tab2 add constraint tab2pk primary key (col1);
create index ind22 on tab2(col2);

set statistics time on

update statistics tab1;
update statistics tab2;

select count(t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col2 = t2.col2
where t1.col1 <= 1000
and t2.col1 <= 1000
OPTION (MAXDOP 1, FORCE ORDER, LOOP JOIN, TABLE HINT(t1, FORCESCAN) , TABLE HINT(t2, FORCESEEK))
;

NL OPTION (MAXDOP 1, FORCE ORDER, LOOP JOIN, TABLE HINT(t1, FORCESCAN) , TABLE HINT(t1, FORCESEEK))
(1千,1千) → 経過時間 = 55 ミリ秒
(1万,1万) → 経過時間 = 76 ミリ秒
(10万,10万) → 経過時間 = 258 ミリ秒
(100万,100万) → 経過時間 = 2153 ミリ秒

HJ OPTION (MAXDOP 1, FORCE ORDER, HASH JOIN, TABLE HINT(t1, FORCESCAN) , TABLE HINT(t2, FORCESCAN))
(1千,1千) → 経過時間 = 96 ミリ秒
(1万,1万) → 経過時間 = 107 ミリ秒
(10万,10万) → 経過時間 = 157 ミリ秒
(100万,100万) → 経過時間 = 221 ミリ秒

1万～10万件程度が分岐点