1.编辑prometheus的configmap文件
kubectl edit cm prometheus-1738826520-server
2.添加如下红色字体
apiVersion: v1 data:alerting_rules.yml: |-groups:- name: deployment Monitoringrules:- alert: DeploymentReplicasUnavailableexpr: kube_deployment_status_replicas_unavailable > 0 #这里的prometheus语句可以自己修改for: 10slabels:severity: criticalannotations:summary: "命名空间 {{ $labels.namespace }} 中的 Deployment {{ $labels.deployment }} 存在不可用副本"description: "命名空间 {{ $labels.namespace }} 中的 Deployment {{ $labels.deployment }} 当前有 {{ $value }} 个不可用副本,受影响的 Pod:{{ $labels.pod }},当前状态:{{ $labels.phase }}"alerts: |{}allow-snippet-annotations: "false"prometheus.yml: |global:evaluation_interval: 1mscrape_interval: 30sscrape_timeout: 10srule_files:- /etc/config/recording_rules.yml- /etc/config/alerting_rules.yml- /etc/config/rules- /etc/config/alertsscrape_configs:- job_name: prometheus ......
3.编辑alertmanager的configmap
kubectl edit cm prometheus-1738826520-alertmanager
4.内容如下:
global:resolve_timeout: 20ssmtp_smarthost: 'smtp.126.com:465'smtp_from: 'xxx@126.com'smtp_auth_username: 'xx@126.com'smtp_auth_password: 'xxx'smtp_require_tls: false route:group_by: ['alertname', 'cluster', 'alertsource']group_wait: 30sgroup_interval: 20srepeat_interval: 90sreceiver: 'default-receiver' receivers: - name: 'default-receiver'email_configs:- to: 'xxxx@126.com'send_resolved: truehtml: '{{ template "email.html" . }}'headers:#Subject: "[告警] {{ .CommonLabels.alertname }} - {{ (index .Alerts 0).Annotations.summary }}"Subject: '{{ if eq .Status "firing" }}🚨 告警触发: {{ (index .Alerts 0).Annotations.summary }}{{ else }}✅ 告警恢复: {{ (index .Alerts 0).Annotations.summary }}{{ end }}' templates: - /etc/alertmanager/*.tmpl email.html.tmpl: |- {{ define "email.html" }} <!DOCTYPE html> <html> <body><p>*状态*: {{ .Status | toUpper }}</p><p>*详情*: {{ (index .Alerts 0).Annotations.description }}</p><p>*触发时间*: {{ (index .Alerts 0).StartsAt | tz "Asia/Shanghai" | date "2006-01-02 15:04:05" }} (北京时间)</p> </body> </html> {{ end }}
5.最后报警内容如下: