{GCP BigQuery}パーティション分割テーブルの作成

 

https://blog.g-gen.co.jp/entry/bigquery-paritioning-and-clustering
https://cloud.google.com/bigquery/docs/creating-partitioned-tables?hl=ja


整数範囲パーティショニング
時間単位列パーティショニング
取り込み時間パーティショニング

 

-- 1. 前作業

gcloud init
gcloud auth list

gcloud --version

gcloud projects create project01-9999999 \
--name="project01"

gcloud config list
gcloud config set project project01-9999999
gcloud config set compute/region asia-southeast1 --quiet
gcloud config set compute/zone asia-southeast1-a --quiet

gcloud beta billing accounts list
gcloud beta billing projects link project01-9999999 --billing-account=111111-111111-111111

gcloud services enable compute.googleapis.com --project project01-9999999

gcloud components update


-- 2. BigQuery API有効化

gcloud services list --enabled


gcloud services enable bigquery.googleapis.com \
--project project01-9999999


-- 3. BigQueryデータセットを作成する

bq mk ds01

bq ls

 

-- 4. テスト用テーブル、データの作成

-- 4.1 時間単位列パーティション分割テーブル

 

CREATE TABLE ds01.tab1
(
  col1 INT64
, col2 DATE
)
PARTITION BY col2
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = TRUE);


CREATE TABLE ds01.tab2
(
  col1 INT64
, col2 DATE
)
PARTITION BY DATE_TRUNC(col2, MONTH)
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);

 

CREATE TABLE ds01.tab3
(
  col1 INT64
, col2 TIMESTAMP
)
PARTITION BY TIMESTAMP_TRUNC(col2, DAY)
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);

 

-- 4.2 取り込み時間パーティション分割テーブル


CREATE TABLE ds01.tab4
(
  col1 INT64
)
PARTITION BY _PARTITIONDATE
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);


CREATE TABLE ds01.tab5
(
  col1 INT64
)
PARTITION BY DATE_TRUNC(_PARTITIONTIME, MONTH)
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);

 


-- 4.3 整数範囲パーティション分割テーブル

CREATE TABLE ds01.tab6
(
  col1 INT64
, col2 DATE
)
PARTITION BY RANGE_BUCKET(col1, GENERATE_ARRAY(0, 100, 10) )
OPTIONS (
require_partition_filter = FALSE);

 


-- 5. 動作確認

select * from ds01.INFORMATION_SCHEMA.TABLES
order by table_name
;

ddlに"require_partition_filter"の設定が表示されないので
   INFORMATION_SCHEMA.TABLE_OPTIONSで要確認

select * from ds01.INFORMATION_SCHEMA.TABLE_OPTIONS
order by table_name
;

 

truncate table ds01.tab1;
truncate table ds01.tab2;
truncate table ds01.tab3;
truncate table ds01.tab4;
truncate table ds01.tab5;
truncate table ds01.tab6;


insert into ds01.tab1 values(1,CURRENT_DATE);
insert into ds01.tab2 values(1,CURRENT_DATE);
insert into ds01.tab3 values(1,CURRENT_TIMESTAMP() );


※ 取り込み時間パーティション分割テーブルの場合、下記エラーがでたのでカラム指定追加
Query error: Omitting INSERT target column list is unsupported for ingestion-time partitioned table ds01.tab4 at [3:1]

insert into ds01.tab4(col1) values(1);
insert into ds01.tab5(col1) values(1);


insert into ds01.tab6 values(1,CURRENT_DATE);


select * from ds01.tab1;

→ Cannot query over table 'ds01.tab1' without a filter over column(s) 'col2' that can be used for partition elimination

select * from ds01.tab1 where col2 = CURRENT_DATE;

 

select * from ds01.tab2;
select * from ds01.tab3;
select * from ds01.tab4;
select * from ds01.tab5;
select * from ds01.tab6;

 

-- 6. クリーンアップ


gcloud projects list

gcloud projects delete project01-9999999 \
--quiet