データに依存すると思われるが、
今回試したテストケースでは、
SQL Serverは何もしなくてもいい感じの実行計画となった。
Oracleは件数を絞りこまないデフォルトのほうが速かった。
MySQLとPostgreSQLはSQLをいじったほうが速くなった。
(8.0.27)
drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);
drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);
drop procedure proc1;
delimiter //
create procedure proc1(in x int)
begin
declare i int;
set i = 0;
start transaction;
while i < x do
set i = i + 1;
insert into tab1 values(
floor(rand() * 100)+1
,i
);
insert into tab2 values(
floor(rand() * 100)+1
,i
);
end while;
commit;
end
//
delimiter ;
call proc1(1000000);
select count(*) from tab1;
select * from tab1 order by rand() limit 20;
alter table tab1 add constraint tab1pk primary key (col1,col2 );
select count(*) from tab2;
select * from tab2 order by rand() limit 20;
alter table tab2 add constraint tab2pk primary key (col1,col2 );
analyze table tab1;
analyze table tab2;
explain
select count(distinct t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;
→(2.57 sec)
explain
select count(distinct t1.col1)
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;
→(0.18 sec)★
(19c)
https://blogs.oracle.com/otnjp/post/tsushima-hakushi-9
drop table tab1 purge;
create table tab1(
col1 int
,col2 int
);
drop table tab2 purge;
create table tab2(
col1 int
,col2 int
);
declare
begin
for i in 1..1000000 loop
insert into tab1 values(
floor(dbms_random.value(1, 101))
,i
);
insert into tab2 values(
floor(dbms_random.value(1, 101))
,i
);
end loop;
end;
/
commit;
select count(*) from tab1;
select * from tab1 order by dbms_random.value() fetch first 20 rows only;
alter table tab1 add constraint tab1pk primary key (col1,col2 );
select count(*) from tab2;
select * from tab2 order by dbms_random.value() fetch first 20 rows only;
alter table tab2 add constraint tab2pk primary key (col1,col2);
set time on
set timing on
exec dbms_stats.gather_table_stats(user,'TAB1');
exec dbms_stats.gather_table_stats(user,'TAB2');
explain plan for
select count(distinct t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;
select * from table(dbms_xplan.display() );
→経過: 00:00:00.02
explain plan for
select /*+ NO_MERGE(t1) */ count(distinct t1.col1)
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;
select * from table(dbms_xplan.display() );
→経過: 00:00:00.05
※インライン・ビューが展開されて実行計画が変わらないので
展開されないようにヒント句追加したが、むしろ悪化
(14)
drop table tab1;
create table tab1(
col1 bigint
,col2 bigint
);
drop table tab2;
create table tab2(
col1 bigint
,col2 bigint
);
start transaction;
insert into tab1 select
floor(random() * 100)+1
,g
from generate_series(1,1000000) g;
commit;
start transaction;
insert into tab2 select
floor(random() * 100)+1
,g
from generate_series(1,1000000) g;
commit;
select count(*) from tab1;
select * from tab1 order by random() limit 20;
alter table tab1 add constraint tab1pk primary key (col1,col2 );
select count(*) from tab2;
select * from tab2 order by random() limit 20;
alter table tab2 add constraint tab2pk primary key (col1,col2);
\timing 1
analyze tab1;
analyze tab2;
explain analyze select distinct t1.col1
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;
→Execution Time: 2270.292 ms
explain analyze select distinct t1.col1
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
order by t1.col1
;
→Execution Time: 189.508 ms★
(2019)
drop table tab1;
create table tab1(
col1 bigint not null
,col2 bigint not null
);
drop table tab2;
create table tab2(
col1 bigint not null
,col2 bigint not null
);
set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
insert into tab1 values(
floor(rand() * 100)+1
,@i
);
insert into tab2 values(
floor(rand() * 100)+1
,@i
);
set @i = @i + 1;
end
commit;
select count(*) from tab1;
select top 20 * from tab1 order by newid();
alter table tab1 add constraint tab1pk primary key (col1,col2 );
select count(*) from tab2;
select top 20 * from tab2 order by newid();
alter table tab2 add constraint tab2pk primary key (col1,col2 );
set statistics time on
update statistics tab1;
update statistics tab2;
set statistics profile on
select count(distinct t1.col1)
from tab1 t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
;
※自動で最初に重複データ除去される実行計画となる。
→経過時間 = 1637 ミリ秒。
select count(distinct t1.col1)
from ( select distinct col1 from tab1) t1
inner join tab2 t2
on t1.col1 = t2.col1
where t2.col2 < 1000
;
set statistics profile off
→経過時間 = 1743 ミリ秒。