Oracleはヌルデータのインデックスは作成されない。is not nullでインデックスが使用される場合がある。
MySQL、PostgreSQL、SQL Serverはヌルデータもインデックスは作成され、使用される
(8.0.29)
https://qiita.com/omukaik/items/6c9d3018c6ab8fbef7bb
https://dev.mysql.com/doc/refman/8.0/ja/is-null-optimization.html
drop table tab1;
create table tab1(
col1 int primary key
,col2 bigint
,col3 bigint
);
drop procedure proc1;
delimiter //
create procedure proc1(in x int)
begin
declare i int;
set i = 0;
start transaction;
while i < x do
set i = i + 1;
insert into tab1 values(
i
,NULL
,floor(rand() * 1000000)+1
);
end while;
commit;
end
//
delimiter ;
call proc1(1000000);
select count(*) from tab1;
update tab1
set col2 = floor(rand() * 1000000)+1
,col3 = NULL
where col1 = 1
;
select * from tab1 order by rand() limit 20;
create index ind12 on tab1(col2);
create index ind13 on tab1(col3);
analyze table tab1;
select
table_name
,table_rows
,avg_row_length
,data_length/1024/1024/1024 tableGB
,index_length/1024/1024/1024 indexGB
from information_schema.tables
where table_schema = database()
and table_name = 'tab1'
;
インデックスサイズ
ind12 -> 0.012 GB
ind13 -> 0.020 GB
explain analyze select * from tab1 where col2 is null;
-> Index lookup on tab1 using ind12 (col2=NULL) ★
, with index condition: (tab1.col2 is null)
(cost=51474.10 rows=499111) (actual time=0.372..1966.718 rows=999999 loops=1)
explain analyze select * from tab1 where col2 is not null;
-> Index range scan on tab1 using ind12 over (NULL < col2)
, with index condition: (tab1.col2 is not null)
(cost=0.71 rows=1) (actual time=0.024..0.027 rows=1 loops=1)
explain analyze select * from tab1 where col3 is null;
-> Index lookup on tab1 using ind13 (col3=NULL) ★
, with index condition: (tab1.col3 is null)
(cost=0.35 rows=1) (actual time=0.045..0.051 rows=1 loops=1)
explain analyze select * from tab1 where col3 is not null;
-> Filter: (tab1.col3 is not null) (cost=100343.20 rows=499111) (actual time=0.032..243.522 rows=999999 loops=1)
-> Table scan on tab1 (cost=100343.20 rows=998222) (actual time=0.028..193.815 rows=1000000 loops=1)
結果:
ヌルデータについてもインデックスは使用される
(19c)
drop table tab1 purge;
create table tab1(
col1 int primary key
,col2 int
,col3 int
);
declare
begin
for i in 1..1000000 loop
insert into tab1 values
(i
,NULL
,floor(dbms_random.value(1, 1000000) )
);
end loop;
end;
/
commit;
select count(*) from tab1;
update tab1
set col2 = floor(dbms_random.value(1, 1000000) )
,col3 = NULL
where col1 = 1
;
commit;
select * from tab1 order by dbms_random.value() fetch first 20 rows only;
create index ind12 on tab1(col2);
create index ind13 on tab1(col3);
select segment_name,sum(bytes/1024/1024/1024) bytes
from user_segments
where segment_name in (select index_name from user_indexes where table_name = 'TAB1')
group by segment_name
order by segment_name;
インデックスサイズ
ind12 -> .000061035 GB
ind13 -> .017578125 GB
exec dbms_stats.gather_table_stats(user, 'TAB1');
explain plan for select * from tab1 where col2 is null;
select * from table(dbms_xplan.display(format=>'ALL') );
→ TABLE ACCESS FULL
explain plan for select * from tab1 where col2 is not null;
select * from table(dbms_xplan.display(format=>'ALL') );
→ INDEX FULL SCAN
explain plan for select * from tab1 where col3 is null;
select * from table(dbms_xplan.display(format=>'ALL') );
→ TABLE ACCESS FULL
explain plan for select * from tab1 where col3 is not null;
select * from table(dbms_xplan.display(format=>'ALL') );
→ TABLE ACCESS FULL
結果:
is not nullの場合、インデックスが使用される場合がある
(14)
https://www.postgresql.jp/document/14/html/indexes-types.html
drop table tab1;
create table tab1(
col1 int primary key
,col2 bigint
,col3 bigint
);
start transaction;
insert into tab1 select
g
,NULL
,floor(random() * 1000000)+1
from generate_series(1,1000000) g;
commit;
select count(*) from tab1;
update tab1
set col2 = floor(random() * 1000000)+1
,col3 = NULL
where col1 = 1
;
\pset pager 0
select * from tab1 order by random() limit 20;
create index ind12 on tab1(col2);
create index ind13 on tab1(col3);
select pg_size_pretty(pg_relation_size('ind12') );
select pg_size_pretty(pg_relation_size('ind13') );
インデックスサイズ
ind12 -> 6.6 MB
ind13 -> 19 MB
analyze tab1;
explain analyze select * from tab1 where col2 is null;
Seq Scan on tab1 (cost=0.00..15406.00 rows=1000000 width=20) (actual time=0.012..82.440 rows=999999 loops=1)
explain analyze select * from tab1 where col2 is not null;
Index Scan using ind12 on tab1 (cost=0.42..8.44 rows=1 width=20) (actual time=0.015..0.016 rows=1 loops=1)
Index Cond: (col2 IS NOT NULL)
explain analyze select * from tab1 where col3 is null;
Index Scan using ind13 on tab1 (cost=0.42..8.44 rows=1 width=20) (actual time=0.055..0.056 rows=1 loops=1)
Index Cond: (col3 IS NULL) ★
explain analyze select * from tab1 where col3 is not null;
Seq Scan on tab1 (cost=0.00..15406.00 rows=1000000 width=20) (actual time=0.012..93.987 rows=999999 loops=1)
結果:
ヌルデータについてもインデックスは使用される
(2019)
drop table tab1;
create table tab1(
col1 int primary key
,col2 bigint
,col3 bigint
);
set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
insert into tab1 values(
@i
,NULL
,floor(rand() * 1000000)+1
);
set @i = @i + 1;
end
commit;
select count(*) from tab1;
update tab1
set col2 = floor(rand() * 1000000)+1
,col3 = NULL
where col1 = 1
;
select top 20 * from tab1 order by newid();
create index ind12 on tab1(col2);
create index ind13 on tab1(col3);
exec sp_spaceused 'dbo.tab1';
go
インデックスサイズ
ind12 -> 18 MB
ind13 -> 18 MB
update statistics tab1;
set showplan_all on
go
select * from tab1 where col2 is null;
Clustered Index Scan
select * from tab1 where col2 is not null;
Index Seek(OBJECT:([test].[dbo].[tab1].[ind12]), SEEK:([test].[dbo].[tab1].[col2] IsNotNull) ORDERED FORWARD)
select * from tab1 where col3 is null;
Index Seek(OBJECT:([test].[dbo].[tab1].[ind13]), SEEK:([test].[dbo].[tab1].[col3]=NULL) ORDERED FORWARD) ★
select * from tab1 where col3 is not null;
Clustered Index Scan
set showplan_all off
go
結果:
ヌルデータについてもインデックスは使用される