{Comprehend}チュートリアル: Amazon Comprehend によるカスタマーレビューからの洞察の分析



https://docs.aws.amazon.com/ja_jp/comprehend/latest/dg/tutorial-reviews.html


-- 1. コマンド等のインストール

-- 1.1 aws cli version 2 インストール

curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
aws --version

 

-- 1.2 jqインストール
sudo yum -y install jq


-- 2.  S3 バケットを作成しドキュメントを追加する

wget https://docs.aws.amazon.com/ja_jp/comprehend/latest/dg/samples/tutorial-reviews-data.zip

unzip tutorial-reviews-data.zip
ls -ltr
head amazon-reviews.csv

aws s3 mb s3://bucket123

aws s3 ls

aws s3 cp amazon-reviews.csv s3://bucket123/input/

aws s3 ls s3://bucket123 --recursive

 

-- 3. Amazon Comprehend用とGlue用のIAMロールの作成
-- 3.1 IAMポリシー作成(Comprehend用)
vim policy01.json

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": [
                "s3:GetObject"
            ],
            "Resource": [
                "arn:aws:s3:::bucket123/*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::bucket123"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::bucket123/*"
            ],
            "Effect": "Allow"
        }
    ]
}

aws iam create-policy \
--policy-name policy01 \
--policy-document file://policy01.json

-- 3.2 IAMロール作成(Comprehend用)
vim role01.json

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "comprehend.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}


aws iam create-role \
--role-name role01 \
--assume-role-policy-document file://role01.json

-- 3.3 ポリシーをロールにアタッチ(Comprehend用)
aws iam attach-role-policy \
--policy-arn arn:aws:iam::999999999999:policy/policy01 \
--role-name role01


-- 3.4 IAMポリシー作成(Glue用)
vim policy02.json

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::bucket123/sentiment-results*",
                "arn:aws:s3:::bucket123/entities-results*"
            ]
        }
    ]
}


aws iam create-policy \
--policy-name policy02 \
--policy-document file://policy02.json

-- 3.5 IAMロール作成(Glue用)
vim role02.json

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "glue.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

aws iam create-role \
--role-name role02 \
--assume-role-policy-document file://role02.json

-- 3.6 ポリシーをロールにアタッチ(Glue用)
aws iam attach-role-policy \
--policy-arn arn:aws:iam::999999999999:policy/policy02 \
--role-name role02

aws iam attach-role-policy \
--policy-arn arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole \
--role-name role02


-- 4. Amazon S3 のドキュメントに対する分析ジョブの実行

-- 4.1 感情分析ジョブの開始
aws comprehend start-sentiment-detection-job \
--input-data-config S3Uri=s3://bucket123/input/ \
--output-data-config S3Uri=s3://bucket123/output/ \
--data-access-role-arn arn:aws:iam::999999999999:role/role01 \
--job-name job01 \
--language-code en

aws comprehend list-sentiment-detection-jobs

aws comprehend describe-sentiment-detection-job \
--job-id 11111111111111111111111111111111


-- 4.2 エンティティ分析ジョブの開始
aws comprehend start-entities-detection-job \
--input-data-config S3Uri=s3://bucket123/input/ \
--output-data-config S3Uri=s3://bucket123/output/ \
--data-access-role-arn arn:aws:iam::999999999999:role/role01 \
--job-name job02 \
--language-code en

aws comprehend list-entities-detection-jobs

aws comprehend describe-entities-detection-job \
--job-id 22222222222222222222222222222222


-- 5. Amazon Comprehend の出力をデータ視覚化のために準備する

-- 5.1 アウトプットをダウンロードする

aws s3 cp s3://bucket123/output/999999999999-SENTIMENT-11111111111111111111111111111111/output/output.tar.gz  output01.tar.gz

aws s3 cp s3://bucket123/output/999999999999-NER-22222222222222222222222222222222/output/output.tar.gz  output02.tar.gz

-- 5.2 出力ファイルを展開します
tar -xvf output01.tar.gz --transform 's,^,sentiment-,'

tar -xvf output02.tar.gz --transform 's,^,entities-,'


-- 5.3 抽出したファイルをアップロードする
aws s3 cp sentiment-output s3://bucket123/sentiment-results/

aws s3 cp entities-output s3://bucket123/entities-results/

aws s3 ls s3://bucket123 --recursive

 

-- 5.4 データベースを作成する

aws glue create-database \
--database-input Name="test"

aws glue get-databases

aws glue get-database \
--name test


-- 5.5 クローラを作成する

aws glue create-crawler \
--name crawler01 \
--role arn:aws:iam::999999999999:role/role02 \
--database-name test \
--targets '{
            "S3Targets": [
                { "Path": "s3://bucket123/sentiment-results" },
                { "Path": "s3://bucket123/entities-results" }
           ]
         }'

aws glue list-crawlers

aws glue get-crawlers

aws glue get-crawler \
--name crawler01


-- 5.6 クローラを実行する

aws glue start-crawler \
--name crawler01

aws glue get-crawler \
--name crawler01

 

-- 5.7 クローラが作成したテーブルを表示する

aws glue get-tables \
--database-name test

aws glue get-table \
--database-name test \
--name sentiment_results

aws glue get-table \
--database-name test \
--name entities_results


-- 5.8 Athnaクエリの結果の場所の設定

設定タブ
→ s3://bucket123/query-results/

 

-- 5.9 ネスト解除済みテーブルの作成
※Athenaコンソールで実施

CREATE TABLE sentiment_results_final AS
SELECT file, line, sentiment,
sentimentscore.mixed AS mixed,
sentimentscore.negative AS negative,
sentimentscore.neutral AS neutral,
sentimentscore.positive AS positive
FROM sentiment_results
;

CREATE TABLE entities_results_1 AS
SELECT file, line, nested FROM entities_results
CROSS JOIN UNNEST(entities) as t(nested)
;

CREATE TABLE entities_results_final AS
SELECT file, line,
nested.beginoffset AS beginoffset,
nested.endoffset AS endoffset,
nested.score AS score,
nested.text AS entity,
nested.type AS category
FROM entities_results_1
;

 


-- 6. クリーンアップ


-- テーブルの削除

aws glue get-tables \
--database-name test | jq -r .TableList[].Name

aws glue delete-table \
--database-name test \
--name sentiment_results

aws glue delete-table \
--database-name test \
--name entities_results

aws glue delete-table \
--database-name test \
--name sentiment_results_final

aws glue delete-table \
--database-name test \
--name entities_results_final

aws glue delete-table \
--database-name test \
--name entities_results_1


-- クローラの削除

aws glue list-crawlers

aws glue get-crawlers

aws glue delete-crawler \
--name crawler01


-- データベースの削除

aws glue get-databases

aws glue get-database \
--name test

aws glue delete-database \
--name test


-- IAMロールの削除(Glue用)
aws iam list-roles | grep role02

aws iam detach-role-policy \
--role-name role02 \
--policy-arn arn:aws:iam::999999999999:policy/policy02

aws iam detach-role-policy \
--role-name role02 \
--policy-arn arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole


aws iam delete-role --role-name role02


-- IAMポリシーの削除(Glue用)
aws iam list-policies | grep policy02

aws iam delete-policy \
--policy-arn arn:aws:iam::999999999999:policy/policy02


-- IAMロールの削除(Comprehend用)
aws iam list-roles | grep role01

aws iam detach-role-policy \
--role-name role01 \
--policy-arn arn:aws:iam::999999999999:policy/policy01

aws iam delete-role --role-name role01


-- IAMポリシーの削除(Comprehend用)
aws iam list-policies | grep policy01

aws iam delete-policy \
--policy-arn arn:aws:iam::999999999999:policy/policy01

-- S3バケットの削除
aws s3 ls
aws s3 rb s3://bucket123 --force