https://blog.g-gen.co.jp/entry/bigquery-paritioning-and-clustering
https://cloud.google.com/bigquery/docs/creating-partitioned-tables?hl=ja
整数範囲パーティショニング
時間単位列パーティショニング
取り込み時間パーティショニング
-- 1. 前作業
gcloud init
gcloud auth list
gcloud --version
gcloud projects create project01-9999999 \
--name="project01"
gcloud config list
gcloud config set project project01-9999999
gcloud config set compute/region asia-southeast1 --quiet
gcloud config set compute/zone asia-southeast1-a --quiet
gcloud beta billing accounts list
gcloud beta billing projects link project01-9999999 --billing-account=111111-111111-111111
gcloud services enable compute.googleapis.com --project project01-9999999
gcloud components update
-- 2. BigQuery API有効化
gcloud services list --enabled
gcloud services enable bigquery.googleapis.com \
--project project01-9999999
-- 3. BigQueryデータセットを作成する
bq mk ds01
bq ls
-- 4. テスト用テーブル、データの作成
-- 4.1 時間単位列パーティション分割テーブル
CREATE TABLE ds01.tab1
(
col1 INT64
, col2 DATE
)
PARTITION BY col2
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = TRUE);
CREATE TABLE ds01.tab2
(
col1 INT64
, col2 DATE
)
PARTITION BY DATE_TRUNC(col2, MONTH)
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);
CREATE TABLE ds01.tab3
(
col1 INT64
, col2 TIMESTAMP
)
PARTITION BY TIMESTAMP_TRUNC(col2, DAY)
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);
-- 4.2 取り込み時間パーティション分割テーブル
CREATE TABLE ds01.tab4
(
col1 INT64
)
PARTITION BY _PARTITIONDATE
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);
CREATE TABLE ds01.tab5
(
col1 INT64
)
PARTITION BY DATE_TRUNC(_PARTITIONTIME, MONTH)
OPTIONS (
partition_expiration_days = 1,
require_partition_filter = FALSE);
-- 4.3 整数範囲パーティション分割テーブル
CREATE TABLE ds01.tab6
(
col1 INT64
, col2 DATE
)
PARTITION BY RANGE_BUCKET(col1, GENERATE_ARRAY(0, 100, 10) )
OPTIONS (
require_partition_filter = FALSE);
-- 5. 動作確認
select * from ds01.INFORMATION_SCHEMA.TABLES
order by table_name
;
→ ddlに"require_partition_filter"の設定が表示されないので
INFORMATION_SCHEMA.TABLE_OPTIONSで要確認
select * from ds01.INFORMATION_SCHEMA.TABLE_OPTIONS
order by table_name
;
truncate table ds01.tab1;
truncate table ds01.tab2;
truncate table ds01.tab3;
truncate table ds01.tab4;
truncate table ds01.tab5;
truncate table ds01.tab6;
insert into ds01.tab1 values(1,CURRENT_DATE);
insert into ds01.tab2 values(1,CURRENT_DATE);
insert into ds01.tab3 values(1,CURRENT_TIMESTAMP() );
※ 取り込み時間パーティション分割テーブルの場合、下記エラーがでたのでカラム指定追加
Query error: Omitting INSERT target column list is unsupported for ingestion-time partitioned table ds01.tab4 at [3:1]
insert into ds01.tab4(col1) values(1);
insert into ds01.tab5(col1) values(1);
insert into ds01.tab6 values(1,CURRENT_DATE);
select * from ds01.tab1;
→ Cannot query over table 'ds01.tab1' without a filter over column(s) 'col2' that can be used for partition elimination
select * from ds01.tab1 where col2 = CURRENT_DATE;
select * from ds01.tab2;
select * from ds01.tab3;
select * from ds01.tab4;
select * from ds01.tab5;
select * from ds01.tab6;
-- 6. クリーンアップ
gcloud projects list
gcloud projects delete project01-9999999 \
--quiet