https://github.com/embulk/embulk-output-bigquery
https://qiita.com/ritukiii/items/d65912e22992d9278360
https://qiita.com/kaaaaaaaaaaai/items/6c2a459236ff2f714dc8
https://zenn.dev/dmikurube/articles/embulk-v0-11-is-coming-soon-ja
ソースDBサーバ :
MySQL : CentOS 7 , 8.0.33
Oracle : CentOS 7 , 19c
PostgreSQL : CentOS 8 , 14.5
SQL Server : Rocky Linux 8 , 2019
Embulkサーバ : Rocky Linux 9
バージョン :
java : 1.8.0_382
embulk : 0.9.25
===========================================================
BigQueryでの作業
-- 1. 前作業
gcloud init
gcloud auth list
gcloud --version
gcloud projects create project01-9999999 \
--name="project01"
gcloud config list
gcloud config set project project01-9999999
gcloud config set compute/region asia-northeast1 --quiet
gcloud config set compute/zone asia-northeast1-a --quiet
gcloud beta billing accounts list
gcloud beta billing projects link project01-9999999 --billing-account=111111-111111-111111
gcloud services enable compute.googleapis.com --project project01-9999999
gcloud components update
-- 2. BigQuery API有効化
gcloud services list --enabled
gcloud services enable bigquery.googleapis.com \
--project project01-9999999
-- 3. サービス アカウントの作成
gcloud iam service-accounts create sa123 \
--description="sa123" \
--display-name="sa123"
gcloud iam service-accounts list
gcloud projects add-iam-policy-binding project01-9999999 \
--member="serviceAccount:sa123@project01-9999999.iam.gserviceaccount.com" \
--role="roles/owner"
gcloud projects get-iam-policy project01-9999999
gcloud iam service-accounts keys create ~/key01.json \
--iam-account=sa123@project01-9999999.iam.gserviceaccount.com
cat ~/key01.json
gcloud iam service-accounts keys list \
--iam-account=sa123@project01-9999999.iam.gserviceaccount.com
scp key01.json root@192.168.137.190:/root
-- 4. Dataset作成
bq mk ds01
bq ls
===========================================================
ソースDBサーバでの作業
-- 5. テストテーブルの作成(MySQL)
use test
drop table tab1;
create table tab1(col1 bigint, col2 varchar(100),col3 datetime(6) );
insert into tab1 values(1,'A',CURRENT_TIMESTAMP(6));
insert into tab1 values(2,'B',CURRENT_TIMESTAMP(6));
insert into tab1 values(3,'C',CURRENT_TIMESTAMP(6));
select * from tab1;
-- 6. テストテーブルの作成(Oracle)
drop table tab2 purge;
create table tab2(col1 int, col2 varchar2(100),col3 timestamp);
insert into tab2 values(1,'A',systimestamp);
insert into tab2 values(2,'B',systimestamp);
insert into tab2 values(3,'C',systimestamp);
commit;
select * from tab2;
-- 7. テストテーブルの作成(PostgreSQL)
drop table tab3;
create table tab3(col1 bigint, col2 varchar(100),col3 timestamp);
insert into tab3 values(1,'A',clock_timestamp());
insert into tab3 values(2,'B',clock_timestamp());
insert into tab3 values(3,'C',clock_timestamp());
select * from tab3;
-- 8. テストテーブルの作成(SQL Server)
use test
go
drop table tab4;
go
create table tab4(col1 bigint, col2 varchar(100),col3 datetime2 );
insert into tab4 values(1,'A',getdate());
insert into tab4 values(2,'B',getdate());
insert into tab4 values(3,'C',getdate());
select * from tab4;
go
===========================================================
Embulkサーバでの作業
-- 9. Java 8インストール
dnf install -y java-1.8.0-openjdk
dnf install -y java-1.8.0-openjdk-devel
java -version
-- 10. サーバー時刻の設定
認証エラー回避のため、正しい時刻に設定する
dnf install -y chrony
systemctl restart chronyd
systemctl enable chronyd
chronyc makestep
chronyc sources
-- 11. Embulkインストール
0.11は未対応のプラグインが多いので0.9を使用する
curl --create-dirs -o ~/.embulk/bin/embulk -L "https://github.com/embulk/embulk/releases/download/v0.9.25/embulk-0.9.25.jar"
chmod +x ~/.embulk/bin/embulk
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
source ~/.bashrc
embulk --version
embulk gem install embulk-input-mysql
embulk gem install embulk-input-oracle
embulk gem install embulk-input-postgresql
embulk gem install embulk-input-sqlserver
embulk gem search -rd embulk-output
embulk gem install jwt -v 2.3.0
embulk gem install public_suffix -v 4.0.7
embulk gem install mini_mime -v 1.0.0
embulk gem install embulk-output-bigquery
embulk gem list
-- 12. 動作確認
-- 12.1 MySQL -> BQ
vi mysql_bq.yml
in:
type: mysql
host: 192.168.137.66
user: root
password: password
database: test
table: tab1
out:
type: bigquery
mode: replace
auth_method: json_key
json_keyfile: /root/key01.json
path_prefix: /tmp
file_ext: .csv.gz
source_format: CSV
project: project01-9999999
dataset: ds01
auto_create_table: true
table: tab1
formatter: {type: csv, charset: UTF-8, delimiter: ',', header_line: false}
encoders:
- {type: gzip}
embulk run mysql_bq.yml
bq query "select * FROM [project01-9999999:ds01.tab1]"
-- 12.2 Oracle -> BQ
wget https://download.oracle.com/otn-pub/otn_software/jdbc/1914/ojdbc8-full.tar.gz
tar xvzf ojdbc8-full.tar.gz
vi oracle_bq.yml
in:
type: oracle
driver_path: ./ojdbc8-full/ojdbc8.jar
driver_class: oracle.jdbc.driver.OracleDriver
url: jdbc:oracle:thin:@192.168.137.65:1521/pdb1.example.com
user: test
password: test
table: tab2
out:
type: bigquery
mode: replace
auth_method: json_key
json_keyfile: /root/key01.json
path_prefix: /tmp
file_ext: .csv.gz
source_format: CSV
project: project01-9999999
dataset: ds01
auto_create_table: true
table: tab2
formatter: {type: csv, charset: UTF-8, delimiter: ',', header_line: false}
encoders:
- {type: gzip}
embulk run oracle_bq.yml
bq query "select * FROM [project01-9999999:ds01.tab2]"
-- 12.3 PostgreSQL -> BQ
wget https://jdbc.postgresql.org/download/postgresql-42.3.3.jar
vi postgresql_bq.yml
in:
type: postgresql
driver_path: ./postgresql-42.3.3.jar
host: 192.168.137.70
user: postgres
password: postgres
database: test
table: tab3
out:
type: bigquery
mode: replace
auth_method: json_key
json_keyfile: /root/key01.json
path_prefix: /tmp
file_ext: .csv.gz
source_format: CSV
project: project01-9999999
dataset: ds01
auto_create_table: true
table: tab3
formatter: {type: csv, charset: UTF-8, delimiter: ',', header_line: false}
encoders:
- {type: gzip}
embulk preview postgresql_bq.yml
embulk run postgresql_bq.yml
bq query "select * FROM [project01-9999999:ds01.tab3]"
-- 12.4 SQL Server -> BQ
vi sqlserver_bq.yml
in:
type: sqlserver
host: 192.168.137.174
user: sa
password: password
database: test
table: tab4
default_column_options:
datetime2: { type: string, timestamp_format: "%Y/%m/%d %H:%M:%S", timezone: "+0900"}
out:
type: bigquery
mode: replace
auth_method: json_key
json_keyfile: /root/key01.json
path_prefix: /tmp
file_ext: .csv.gz
source_format: CSV
project: project01-9999999
dataset: ds01
auto_create_table: true
table: tab4
formatter: {type: csv, charset: UTF-8, delimiter: ',', header_line: false}
encoders:
- {type: gzip}
embulk preview sqlserver_bq.yml
embulk run sqlserver_bq.yml
bq query "select * FROM [project01-9999999:ds01.tab4]"
-- 13. クリーンアップ
bq rm --recursive=true ds01
bq ls
gcloud projects list
gcloud projects delete project01-9999999