{S3}Amazon S3 Select を使用したデータのフィルタリングと取得

https://tkaaad97.hatenablog.com/entry/2018/06/05/013009

 

-- 1. コマンド等のインストール

-- 1.1 aws cli version 2 インストール

curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
aws --version


-- 2. S3 バケットを作成する

aws s3 mb s3://bucket123

aws s3 ls


-- 3. テストファイルのアップロード

vim test01.csv

1,100,"A","あああ","2021/10/01",x1,イ,01:21:00
2,200,"B","いいい","2021/10/02",x2,ロ,01:22:00
3,300,"C","ううう","2021/10/03",x3,ハ,01:23:00
4,400,"D","えええ","2021/10/04",x4,ニ,01:24:00
5,500,"E","おおお","2021/10/05",x5,ホ,01:25:00


aws s3api put-object --bucket bucket123 --key test01.csv --body test01.csv --content-type text/csv

aws s3 ls s3://bucket123 --recursive

-- 4. 動作確認

aws s3api select-object-content \
--bucket bucket123 \
--key test01.csv \
--expression "select * from s3object" \
--expression-type 'SQL' \
--input-serialization '{"CSV": {}}' \
--output-serialization '{"CSV": {}}' \
test01.txt

cat test01.txt

aws s3api select-object-content \
--bucket bucket123 \
--key test01.csv \
--expression "select * from s3object limit 1" \
--expression-type 'SQL' \
--input-serialization '{"CSV": {}}' \
--output-serialization '{"CSV": {}}' \
test02.txt

cat test02.txt

 

aws s3api select-object-content \
--bucket bucket123 \
--key test01.csv \
--expression "select _1,_2,_3,_4,_5 from s3object where _1 = '1' " \
--expression-type 'SQL' \
--input-serialization '{"CSV": {}}' \
--output-serialization '{"CSV": {}}' \
test03.txt

cat test03.txt


aws s3api select-object-content \
--bucket bucket123 \
--key test01.csv \
--expression "select avg(cast(_2 as int)),count(*) from s3object" \
--expression-type 'SQL' \
--input-serialization '{"CSV": {}}' \
--output-serialization '{"CSV": {}}' \
test04.txt

cat test04.txt

 

-- 5. クリーンアップ

-- バケットの削除
aws s3 ls
aws s3 rb s3://bucket123 --force