ヌルのインデックス

 

Oracleはヌルデータのインデックスは作成されない。is not nullでインデックスが使用される場合がある。
MySQLPostgreSQLSQL Serverはヌルデータもインデックスは作成され、使用される

 

(8.0.29)
https://qiita.com/omukaik/items/6c9d3018c6ab8fbef7bb
https://dev.mysql.com/doc/refman/8.0/ja/is-null-optimization.html


drop table tab1;
create table tab1(
    col1 int primary key
   ,col2 bigint
   ,col3 bigint
   );


drop procedure proc1;

delimiter //
create procedure proc1(in x int)
begin
  declare i int;
  set i = 0;
  start transaction;
  while i < x do
    set i = i + 1;
    insert into tab1 values(
     i
    ,NULL
    ,floor(rand() * 1000000)+1
    );
  end while;
  commit;
end
//
delimiter ;

call proc1(1000000);
select count(*) from tab1;

update tab1 
set col2 = floor(rand() * 1000000)+1
   ,col3 = NULL
where col1 = 1
;


select * from tab1 order by rand() limit 20;

create index ind12 on tab1(col2);
create index ind13 on tab1(col3);


analyze table tab1;
select
   table_name
  ,table_rows
  ,avg_row_length
  ,data_length/1024/1024/1024 tableGB
  ,index_length/1024/1024/1024 indexGB
from information_schema.tables
where table_schema = database()
and table_name = 'tab1'
;

インデックスサイズ
ind12 -> 0.012 GB
ind13 -> 0.020 GB


explain analyze select * from tab1 where col2 is null;

-> Index lookup on tab1 using ind12 (col2=NULL) ★
, with index condition: (tab1.col2 is null)
  (cost=51474.10 rows=499111) (actual time=0.372..1966.718 rows=999999 loops=1)

explain analyze select * from tab1 where col2 is not null;

-> Index range scan on tab1 using ind12 over (NULL < col2)
, with index condition: (tab1.col2 is not null)
  (cost=0.71 rows=1) (actual time=0.024..0.027 rows=1 loops=1)

explain analyze select * from tab1 where col3 is null;

-> Index lookup on tab1 using ind13 (col3=NULL) ★
, with index condition: (tab1.col3 is null)
  (cost=0.35 rows=1) (actual time=0.045..0.051 rows=1 loops=1)

explain analyze select * from tab1 where col3 is not null;

-> Filter: (tab1.col3 is not null)  (cost=100343.20 rows=499111) (actual time=0.032..243.522 rows=999999 loops=1)
    -> Table scan on tab1  (cost=100343.20 rows=998222) (actual time=0.028..193.815 rows=1000000 loops=1)


結果:
ヌルデータについてもインデックスは使用される

 

(19c)

drop table tab1 purge;
create table tab1(
    col1 int primary key
   ,col2 int
   ,col3 int
   );

declare
begin
for i in 1..1000000 loop
  insert into tab1 values
  (i
  ,NULL
  ,floor(dbms_random.value(1, 1000000) )
  );
end loop;
end;
/

commit;
select count(*) from tab1;

update tab1 
set col2 = floor(dbms_random.value(1, 1000000) )
   ,col3 = NULL
where col1 = 1
;
commit;


select * from tab1 order by dbms_random.value()  fetch first 20 rows only;

create index ind12 on tab1(col2);
create index ind13 on tab1(col3);


select segment_name,sum(bytes/1024/1024/1024) bytes 
from user_segments 
where segment_name in (select index_name from user_indexes where table_name = 'TAB1')
group by segment_name
order by segment_name;


インデックスサイズ
ind12 -> .000061035 GB
ind13 -> .017578125 GB

exec dbms_stats.gather_table_stats(user, 'TAB1');

explain plan for select * from tab1 where col2 is null;
select * from table(dbms_xplan.display(format=>'ALL') );

→ TABLE ACCESS FULL

explain plan for select * from tab1 where col2 is not null;
select * from table(dbms_xplan.display(format=>'ALL') );

→ INDEX FULL SCAN

explain plan for select * from tab1 where col3 is null;
select * from table(dbms_xplan.display(format=>'ALL') );

→ TABLE ACCESS FULL

explain plan for select * from tab1 where col3 is not null;
select * from table(dbms_xplan.display(format=>'ALL') );

→ TABLE ACCESS FULL


結果:
is not nullの場合、インデックスが使用される場合がある

 

 

(14)
https://www.postgresql.jp/document/14/html/indexes-types.html


drop table tab1;
create table tab1(
    col1 int primary key
   ,col2 bigint
   ,col3 bigint
   );

start transaction;
insert into tab1 select
   g
  ,NULL
  ,floor(random() * 1000000)+1
from generate_series(1,1000000) g;

commit;

select count(*) from tab1;


update tab1 
set col2 = floor(random() * 1000000)+1
   ,col3 = NULL
where col1 = 1
;

\pset pager 0
select * from tab1 order by random() limit 20;

create index ind12 on tab1(col2);
create index ind13 on tab1(col3);

select pg_size_pretty(pg_relation_size('ind12') );
select pg_size_pretty(pg_relation_size('ind13') );


インデックスサイズ
ind12 -> 6.6 MB
ind13 -> 19 MB

analyze tab1;

explain analyze select * from tab1 where col2 is null;

 Seq Scan on tab1  (cost=0.00..15406.00 rows=1000000 width=20) (actual time=0.012..82.440 rows=999999 loops=1)


explain analyze select * from tab1 where col2 is not null;

 Index Scan using ind12 on tab1  (cost=0.42..8.44 rows=1 width=20) (actual time=0.015..0.016 rows=1 loops=1)
   Index Cond: (col2 IS NOT NULL)


explain analyze select * from tab1 where col3 is null;

 Index Scan using ind13 on tab1  (cost=0.42..8.44 rows=1 width=20) (actual time=0.055..0.056 rows=1 loops=1)
   Index Cond: (col3 IS NULL) ★


explain analyze select * from tab1 where col3 is not null;

 Seq Scan on tab1  (cost=0.00..15406.00 rows=1000000 width=20) (actual time=0.012..93.987 rows=999999 loops=1)

結果:
ヌルデータについてもインデックスは使用される

 

(2019)


drop table tab1;
create table tab1(
    col1 int primary key
   ,col2 bigint
   ,col3 bigint
   );


set nocount on
declare @i int;
set @i = 1;
begin transaction;
while @i <= 1000000
begin
  insert into tab1 values(
     @i
    ,NULL
    ,floor(rand() * 1000000)+1
    );
  set @i = @i + 1;
end
commit;
select count(*) from tab1;


update tab1 
set col2 = floor(rand() * 1000000)+1
   ,col3 = NULL
where col1 = 1
;


select top 20 * from tab1 order by newid();

create index ind12 on tab1(col2);
create index ind13 on tab1(col3);


exec sp_spaceused 'dbo.tab1';
go


インデックスサイズ
ind12 -> 18 MB
ind13 -> 18 MB

update statistics tab1;

set showplan_all on
go

select * from tab1 where col2 is null;

  Clustered Index Scan

select * from tab1 where col2 is not null;

  Index Seek(OBJECT:([test].[dbo].[tab1].[ind12]), SEEK:([test].[dbo].[tab1].[col2] IsNotNull) ORDERED FORWARD)

select * from tab1 where col3 is null;

  Index Seek(OBJECT:([test].[dbo].[tab1].[ind13]), SEEK:([test].[dbo].[tab1].[col3]=NULL) ORDERED FORWARD) ★

select * from tab1 where col3 is not null;

  Clustered Index Scan

set showplan_all off
go


結果:
ヌルデータについてもインデックスは使用される