セミジョイン/アンチジョイン

 

作業概要:
「in」「exists」「not in」「not exists」それぞれについて
結合キーカラムが nullable の場合とnot nullの場合の実行計画を確認した

結果:
「in」「exists」「not exists」については、結合キーカラムのnull属性に関係なくサブクエリのテーブルでインデックスが使用された。


「not in」については、結合キーカラムのnull属性に依存して実行計画が変化した。

結合キーカラムが nullable の場合、サブクエリのテーブルでフルスキャンとなった。(MySQLOraclePostgreSQLSQL Server)
結合キーカラムが not null の場合、サブクエリのテーブルでインデックスが使用された。(MySQLOracleSQL Server)
結合キーカラムが not null の場合、サブクエリのテーブルでフルスキャンのままだった。(PostgreSQL)

 

(8.0.29)

drop table tab1 ;
drop table tab2 ;
create table tab1(col1 int   ,col2 int);
create table tab2(col1 int   ,col2 int);


drop procedure proc1;

delimiter //
create procedure proc1(in x bigint)
begin
  declare i bigint;
  set i = 0;
  start transaction;
  while i < x do
    set i = i + 1;
    insert into tab1(col1,col2) select i, ceil(rand()*100000);
    insert into tab2(col1,col2) select i, ceil(rand()*100000);
  end while;
  commit;
end
//
delimiter ;

call proc1(10000);


select count(*) from tab1;
select count(*) from tab2;
select * from tab1 limit 10;
select * from tab2 limit 10;

create  index ind1 on tab1(col1);
create  index ind2 on tab2(col1);

analyze table tab1;
analyze table tab2;

explain analyze
select count(*)
from tab1 t1
where t1.col1  in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;

→ Nested loop semijoin/Table scan on t1/Index lookup on tab2 using ind2

explain analyze
select count(*)
from tab1 t1
where exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;

→ Nested loop semijoin/Table scan on t1/Index lookup on tab2 using ind2


explain analyze
select count(*)
from tab1 t1
where t1.col1 not in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;

→ Filter/Table scan on t1/Table scan on tab2 ★

※結合キーのカラム(col1)がnot nullの場合、他と同様にインデックスが使用される
→ Nested loop antijoin/Table scan on t1/Index lookup on tab2 using ind2


explain analyze
select count(*)
from tab1 t1
where not exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;

→ Nested loop antijoin/Table scan on t1/Index lookup on tab2 using ind2

 

(19c)

 

drop table tab1 purge;
drop table tab2 purge;
create table tab1(col1 int    ,col2 int);
create table tab2(col1 int    ,col2 int);

declare
begin
for i in 1..10000 loop
  insert into tab1(col1,col2) values(i, ceil(dbms_random.value()*100000) );
  insert into tab2(col1,col2) values(i, ceil(dbms_random.value()*100000) );
  
end loop;
end;
/

commit;

 

select count(*) from tab1;
select count(*) from tab2;
select * from tab1 fetch first 10 rows only;
select * from tab2 fetch first 10 rows only;

create index ind1 on tab1(col1);
create index ind2 on tab2(col1);

exec dbms_stats.gather_table_stats(user, 'TAB1');
exec dbms_stats.gather_table_stats(user, 'TAB2');


explain plan for
select count(*)
from tab1 t1
where t1.col1  in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;
select * from table(dbms_xplan.display(format => 'ALL') );

→ NESTED LOOPS SEMI/TABLE ACCESS FULL/INDEX RANGE SCAN


explain plan for
select count(*)
from tab1 t1
where exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;
select * from table(dbms_xplan.display(format => 'ALL') );

→ NESTED LOOPS SEMI/TABLE ACCESS FULL/INDEX RANGE SCAN

 

explain plan for
select count(*)
from tab1 t1
where t1.col1 not in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;
select * from table(dbms_xplan.display(format => 'ALL') );

→ HASH JOIN ANTI NA/TABLE ACCESS FULL/TABLE ACCESS FULL ★


※結合キーのカラム(col1)がnot nullの場合、他と同様にインデックスが使用される
→ NESTED LOOPS SEMI/TABLE ACCESS FULL/INDEX RANGE SCAN


explain plan for
select count(*)
from tab1 t1
where not exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;
select * from table(dbms_xplan.display(format => 'ALL') );

→ NESTED LOOPS ANTI/TABLE ACCESS FULL/INDEX RANGE SCAN

 

(14)


drop table tab1 ;
drop table tab2 ;
create table tab1(col1 int  ,col2 int);
create table tab2(col1 int  ,col2 int);

insert into tab1(col1,col2) select g, ceil(random()*100000) from generate_series(1,10000) g;
insert into tab2(col1,col2) select g, ceil(random()*100000) from generate_series(1,10000) g;

 

select count(*) from tab1;
select count(*) from tab2;
select * from tab1 limit 10;
select * from tab2 limit 10;

create index ind1 on tab1(col1);
create index ind2 on tab2(col1);

analyze tab1;
analyze tab2;


explain analyze
select count(*)
from tab1 t1
where t1.col1  in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;

→ Nested loop semijoin/Seq Scan on tab1 t1/Index Scan using ind2 on tab2

explain analyze
select count(*)
from tab1 t1
where exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;

→ Nested loop semijoin/Seq Scan on tab1 t1/Index Scan using ind2 on tab2


explain analyze
select count(*)
from tab1 t1
where t1.col1 not in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;

→ Seq Scan on tab1 t1/SubPlan 1/Seq Scan on tab2 ★


※結合キーのカラム(col1)がnot nullの場合でもインデックスは使用されない ★★


explain analyze
select count(*)
from tab1 t1
where not exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;

→ Nested Loop Anti Join/Seq Scan on tab1 t1/Index Scan using ind2 on tab2

 

(2019)

drop table tab1 ;
drop table tab2 ;
create table tab1(col1 int  ,col2 int);
create table tab2(col1 int  ,col2 int);


set nocount on
declare @i bigint;
set @i = 1;
begin transaction;
while @i <= 10000
begin
  insert into tab1(col1,col2) select @i, ceiling(rand()*100000);
  insert into tab2(col1,col2) select @i, ceiling(rand()*100000);
  set @i = @i + 1;
end
commit;

 


select count(*) from tab1;
select count(*) from tab2;
select top(10) * from tab1 ;
select top(10) * from tab2 ;

create index ind1 on tab1(col1);
create index ind2 on tab2(col1);

update statistics tab1;
update statistics tab2;

set showplan_all on
go

select count(*)
from tab1 t1
where t1.col1  in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;

→ Nested Loops(Left Semi Join)/Table Scan/Index Seek

 


select count(*)
from tab1 t1
where exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;

→ Nested Loops(Left Semi Join)/Table Scan/Index Seek


select count(*)
from tab1 t1
where t1.col1 not in ( select col1 from tab2 where col2 < 100 )
and t1.col2 < 10;

→ Nested Loops(Left Anti Join)/Table Scan/Table Scan  ★


※結合キーのカラム(col1)がnot nullの場合、他と同様にインデックスが使用される
→ Nested Loops(Left Anti Join)/Table Scan/Index Seek


select count(*)
from tab1 t1
where not exists ( select 1 from tab2 where col2 < 100 and col1 = t1.col1 )
and t1.col2 < 10;

→ Nested Loops(Left Anti Join)/Table Scan/Index Seek