1. Cài đặt AWS CLI
curl "<https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip>" -o "awscliv2.zip"
sudo apt install unzip
unzip awscliv2.zip
sudo ./aws/install
Kiểm tra:
aws --version
2. Cấu hình AWS CLI
aws configure
Nhập thông tin:
- AWS Access Key ID:
AKIAYYPI4XJEPOYLK2HK - AWS Secret Access Key:
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 - Region:
ap-southeast-1 - Output format:
json
3. Cài đặt kubectl
curl -LO "<https://storage.googleapis.com/kubernetes-release/release/$>(curl -s <https://storage.googleapis.com/kubernetes-release/release/stable.txt>)/bin/linux/amd64/kubectl"
chmod +x ./kubectl
sudo mv ./kubectl /usr/local/bin
kubectl version --client
4. Cài đặt eksctl
curl --silent --location "<https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$>(uname -s)_amd64.tar.gz" | tar xz -C /tmp
sudo mv /tmp/eksctl /usr/local/bin
eksctl version
5. Cài đặt Helm
curl -fsSL -o get_helm.sh <https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3>
chmod 700 get_helm.sh
./get_helm.sh
helm version
6. Tạo EKS Cluster với eksctl
eksctl create cluster \\
--name eks-fis-lab \\
--version 1.33 \\
--region ap-southeast-1 \\
--nodegroup-name worker-nodes \\
--node-type t2.large \\
--nodes 2 \\
--nodes-min 2 \\
--nodes-max 3
aws eks update-kubeconfig --name eks-fis-lab --region ap-southeast-1
kubectl get nodes
7. Tạo EFS và Access Point
File 1: ap-prometheus-server.json
{
"ClientToken": "prometheus-server-001",
"Tags": [
{
"Key": "Name",
"Value": "Prometheus Server"
}
],
"FileSystemId": "fs-07303b466d04e3bc1",
"PosixUser": {
"Uid": 500,
"Gid": 500,
"SecondaryGids": [2000]
},
"RootDirectory": {
"Path": "/prometheus/server",
"CreationInfo": {
"OwnerUid": 500,
"OwnerGid": 500,
"Permissions": "0755"
}
}
}
File 2: ap-alertmanager.json
{
"ClientToken": "alertmanager-001",
"Tags": [
{
"Key": "Name",
"Value": "Prometheus Alert Manager"
}
],
"FileSystemId": "fs-07303b466d04e3bc1",
"PosixUser": {
"Uid": 501,
"Gid": 501,
"SecondaryGids": [2000]
},
"RootDirectory": {
"Path": "/prometheus/alertmanager",
"CreationInfo": {
"OwnerUid": 501,
"OwnerGid": 501,
"Permissions": "0755"
}
}
}
File 3: ap-grafana.json
{
"ClientToken": "grafana-001",
"Tags": [
{
"Key": "Name",
"Value": "Grafana Server"
}
],
"FileSystemId": "fs-0d74e95bb2f798d94",
"PosixUser": {
"Uid": 472,
"Gid": 472,
"SecondaryGids": [2000]
},
"RootDirectory": {
"Path": "/grafana",
"CreationInfo": {
"OwnerUid": 472,
"OwnerGid": 472,
"Permissions": "0755"
}
}
}
Tạo Access Point
aws efs create-access-point --cli-input-json file://ap-prometheus-server.json
aws efs create-access-point --cli-input-json file://ap-alertmanager.json
aws efs create-access-point --cli-input-json file://ap-grafana.json
8. Tạo StorageClass và PersistentVolume
File: efs-sc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: efs-sc
provisioner: efs.csi.aws.com
parameters:
provisioningMode: efs-ap
fileSystemId: fs-0d74e95bb2f798d94
directoryPerms: "700"
kubectl apply -f efs-sc.yaml
File: pv-server.yml
apiVersion: v1
kind: PersistentVolume
metadata:
name: prometheus-server
spec:
capacity:
storage: 8Gi
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: efs-sc
csi:
driver: efs.csi.aws.com
volumeHandle: fs-071e026c105d0bc8e::fsap-0b18bb9627fac04c1
File: pv-alertmanager.yml
apiVersion: v1
kind: PersistentVolume
metadata:
name: alertmanager-pv
spec:
capacity:
storage: 2Gi
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: efs-sc
claimRef:
namespace: prometheus
name: storage-prometheus-alertmanager-0
csi:
driver: efs.csi.aws.com
volumeHandle: fs-071e026c105d0bc8e::fsap-07e39dff2ec748386
kubectl apply -f pv-server.yml
kubectl apply -f pv-alertmanager.yml
kubectl get pv
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS VOLUMEATTRIBUTESCLASS REASON AGE
alertmanager-pv 2Gi RWO Retain Available prometheus/storage-prometheus-alertmanager-0 efs-sc <unset> 5s
prometheus-server 8Gi RWO Retain Available efs-sc <unset> 12s
9. Cài đặt Prometheus
helm repo add prometheus-community <https://prometheus-community.github.io/helm-charts>
helm repo update
helm repo list
kubectl create namespace prometheus
Cài đặt Prometheus dùng EFS:
helm install prometheus prometheus-community/prometheus \\
--namespace prometheus \\
--set alertmanager.persistentVolume.enabled=true \\
--set alertmanager.persistentVolume.storageClass="efs-sc" \\
--set alertmanager.persistentVolume.size="2Gi" \\
--set server.persistentVolume.enabled=true \\
--set server.persistentVolume.storageClass="efs-sc" \\
--set server.persistentVolume.size="8Gi"
10. Cấu hình OIDC + EFS CSI Driver
oidc_id=$(aws eks describe-cluster --name eks-fis-lab --query "cluster.identity.oidc.issuer" --output text | cut -d '/' -f 5)
aws iam list-open-id-connect-providers | grep $oidc_id | cut -d "/" -f4
eksctl utils associate-iam-oidc-provider --cluster eks-fis-lab --approve
Tạo IAM Role cho driver:
eksctl create iamserviceaccount \\
--name efs-csi-controller-sa \\
--namespace kube-system \\
--cluster eks-fis-lab \\
--attach-policy-arn arn:aws:iam::aws:policy/service-role/AmazonEFSCSIDriverPolicy \\
--approve \\
--role-only \\
--role-name AmazonEKS_EFS_CSI_DriverRole
eksctl create addon \\
--name aws-efs-csi-driver \\
--cluster eks-fis-lab \\
--service-account-role-arn arn:aws:iam::602320714312:role/AmazonEKS_EFS_CSI_DriverRole \\
--force
Kiểm tra driver:
kubectl get pods -n kube-system | grep efs
kubectl delete pod prometheus-alertmanager-0 -n prometheus
kubectl get all -n prometheus
11. Cài Grafana kèm PersistentVolume + SMTP Alert
helm repo add grafana <https://grafana.github.io/helm-charts>
helm repo update
kubectl create namespace grafana
File: prometheus-datasource.yaml
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: <http://prometheus-server.prometheus.svc.cluster.local>
access: proxy
isDefault: true
File: config-grafana.yml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: efs-sc
provisioner: efs.csi.aws.com
parameters:
provisioningMode: efs-ap
fileSystemId: fs-0d74e95bb2f798d94
accessPointId: fsap-088ba05ae7c2ddf83
directoryPerms: "0775"
uid: "472"
gid: "472"
File: pv-grafana.yml
apiVersion: v1
kind: PersistentVolume
metadata:
name: grafana-server
spec:
capacity:
storage: 8Gi
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: efs-sc
csi:
driver: efs.csi.aws.com
volumeHandle: fs-0d74e95bb2f798d94::fsap-088ba05ae7c2ddf83
kubectl apply -f config-grafana.yml
kubectl apply -f pv-grafana.yml
File cấu hình SMTP: values-smtp.yaml
smtp:
enabled: true
host: smtp.gmail.com:587
user: chemgiopro009@gmail.com
from_address: chemgiopro009@gmail.com
from_name: Grafana Alerts
password: ${SMTP_PASSWORD}
skip_verify: true
initChownData:
enabled: false
securityContext:
runAsUser: 472
runAsGroup: 472
fsGroup: 472
kubectl create secret generic grafana-smtp-secret \\
--from-literal=SMTP_PASSWORD='qcow tzcb vefn pshl' \\
-n grafana
Cài Grafana:
helm install grafana grafana/grafana \\
--namespace grafana \\
--set persistence.enabled=true \\
--set persistence.storageClassName="efs-sc" \\
--set persistence.size="8Gi" \\
--set adminPassword='admin@123' \\
--values prometheus-datasource.yaml \\
--set service.type=LoadBalancer
File cấu hình SMTP: values-smtp.yaml
kubectl create secret generic grafana-smtp-secret \\
--from-literal=SMTP_PASSWORD='qcow tzcb vefn pshl' \\
-n grafana
envFromSecret: grafana-smtp-secret
grafana.ini:
smtp:
enabled: true
host: smtp.gmail.com:587
user: chemgiopro009@gmail.com
from_address: chemgiopro009@gmail.com
from_name: Grafana Alerts
password: ${SMTP_PASSWORD}
skip_verify: true
#Neu loi thi them doan nay
initChownData:
enabled: false
securityContext:
runAsUser: 472
runAsGroup: 472
fsGroup: 472
helm upgrade grafana grafana/grafana \\
--namespace grafana \\
--reuse-values \\
--values values-smtp.yaml
12. Thêm target Windows Exporter
kubectl get configmap -n prometheus
kubectl edit configmap prometheus-server -n prometheus
Thêm:
- job_name: window
static_configs:
- targets:
- 10.0.14.118:9115
Sau khi kubectl edit xong, Kubernetes sẽ cập nhật lại ConfigMap. Nhưng để Prometheus áp dụng config mới, cần restart Pod:
kubectl rollout restart deployment prometheus-server -n prometheus
13. Một số lệnh quản lý
Xoá Prometheus:
helm uninstall prometheus -n prometheus
Recreate pod:
kubectl delete pod prometheus-alertmanager-0 -n prometheus
Xoá Cluster:
eksctl delete cluster --name eks-fis-lab --region ap-southeast-1
Kiểm tra kết nối:
wget -O - <http://10.0.14.118:9115/metrics>
kubectl exec -it prometheus-server-57c888c7c4-brn55 -n prometheus -- /bin/sh
Sửa Service để đổi từ ClusterIP → LoadBalancer
bash
Sao chépChỉnh sửa
kubectl patch svc prometheus-server -n prometheus -p '{"spec": {"type": "LoadBalancer"}}'
Đợi vài chục giây, rồi lấy địa chỉ IP:
bash
Sao chépChỉnh sửa
kubectl get svc -n prometheus
Reset
for ns in grafana prometheus; do
echo "Delete namespace $ns"
kubectl delete ns $ns
done
kubectl get pv --no-headers | awk '{print $1}' | while read pv; do
echo "Delete PV $pv"
kubectl delete pv $pv
done
kubectl run curl-test -n monitoring --image=alpine --restart=Never -it -- sh
aws ec2 describe-subnets \\
--query "Subnets[*].{ID:SubnetId,Name:Tags[?Key=='Name']|[0].Value,CIDR:CidrBlock,VPC:VpcId,AZ:AvailabilityZone}" \\
--output table