{SageMaker}Amazon SageMaker を使用して

 

https://dev.classmethod.jp/articles/a-10-minute-tutorial-from-building-to-deploying-a-machine-learning-model-with-amazon-sagemaker/

https://aws.amazon.com/jp/getting-started/hands-on/build-train-deploy-machine-learning-model-sagemaker/

 

-- 1. コマンド等のインストール

-- 1.1 aws cli version 2 インストール

curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
aws --version


-- 1.2 jqインストール
sudo yum -y install jq

-- 1.3 ライブラリインストール

pip3 install boto3
pip3 install sagemaker
pip3 install matplotlib
pip3 install IPython


-- 2. S3 バケットを作成する

aws s3 mb s3://bucket123

aws s3 ls

-- 3. ロールの作成

-- 3.1 ポリシーの作成


vim policy01.json

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:PutObject",
                "s3:DeleteObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}


aws iam create-policy \
--policy-name policy01 \
--policy-document file://policy01.json


-- 3.2 ロールの作成

vim role01.json

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "sagemaker.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

 

aws iam create-role \
--role-name role01 \
--assume-role-policy-document file://role01.json

-- 3.3 ポリシーをロールにアタッチ

aws iam attach-role-policy --policy-arn arn:aws:iam::999999999999:policy/policy01 --role-name role01
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess --role-name role01

 


-- 4. データの準備
-- 4.1 ライブラリインポート

python3


# import libraries
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import display
from time import gmtime, strftime
from sagemaker.predictor import csv_serializer

# Define IAM role
#role = get_execution_role()
role = 'role01'

prefix = 'sagemaker/DEMO-xgboost-dm'
my_region = boto3.session.Session().region_name # set the region of the instance

# this line automatically looks for the XGBoost image URI and builds an XGBoost container.
xgboost_container = sagemaker.image_uris.retrieve("xgboost", my_region, "latest")

print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + xgboost_container + " container for your SageMaker endpoint.")

-- 4.2 S3バケット作成


bucket_name = 'bucket123' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET

#s3 = boto3.resource('s3')
#try:
#    if  my_region == 'us-east-1':
#      s3.create_bucket(Bucket=bucket_name)
#    else: 
#      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
#    print('S3 bucket created successfully')
#except Exception as e:
#    print('S3 error: ',e)

-- 4.3 テストデータダウンロード


try:
  urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
  print('Success: downloaded bank_clean.csv.')
except Exception as e:
  print('Data load error: ',e)

try:
  model_data = pd.read_csv('./bank_clean.csv',index_col=0)
  print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)

-- 4.4 トレーニングデータとテストデータに分割

train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
print(train_data.shape, test_data.shape)

-- 5. ML モデルをトレーニングする

-- 5.1 トレーニングデータをS3にアップロード

pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')

-- 5.2 XGBoost モデル (予測ツール) のインスタンスを作成

sess = sagemaker.Session()
xgb = sagemaker.estimator.Estimator(xgboost_container,role, instance_count=1, instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)
xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)

-- 5.3 トレーニングJobを開始

xgb.fit({'train': s3_input_train})

 

-- 6. モデルのデプロイ
-- 6.1 モデルのデプロイ(SageMaker エンドポイントを作成)

xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

-- 6.2 テストデータで予測を実施

from sagemaker.serializers import CSVSerializer

test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values #load the data into an array
xgb_predictor.serializer = CSVSerializer() # set the serializer type
predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!
predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
print(predictions_array.shape)

-- 7. モデルの性能評価


cm = pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])
tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100
print("\n{0:<20}{1:<4.1f}%\n".format("Overall Classification Rate: ", p))
print("{0:<15}{1:<15}{2:>8}".format("Predicted", "No Purchase", "Purchase"))
print("Observed")
print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("No Purchase", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))
print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Purchase", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))


-- 8. クリーンアップ


-- SageMakerエンドポイント削除
xgb_predictor.delete_endpoint(delete_endpoint_config=True)

 

Ctrl-D


-- ロールの削除
aws iam list-roles | grep role01

aws iam detach-role-policy \
--role-name role01 \
--policy-arn arn:aws:iam::999999999999:policy/policy01

aws iam detach-role-policy \
--role-name role01 \
--policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess

aws iam delete-role --role-name role01

-- ポリシーの削除
aws iam list-policies | grep policy01

aws iam delete-policy \
--policy-arn arn:aws:iam::999999999999:policy/policy01

 

-- S3バケット削除

aws s3 ls

aws s3 rb s3://bucket123 --force


-- モデル削除

aws sagemaker list-models

aws sagemaker delete-model \
--model-name xgboost-2022-09-11-05-44-54-123