From 6f38ba4501de18d19e6e50e5becf40e3f59066ba Mon Sep 17 00:00:00 2001 From: "lin.huang" Date: Wed, 15 Nov 2023 12:43:54 +0800 Subject: [PATCH] feature: add alertmanager function --- config/alertmanager.yml | 25 +++++++++++++++++++++++++ config/email.tmpl | 13 +++++++++++++ config/instanceDown_rules.yml | 11 +++++++++++ deployments/templates/env_template.yaml | 13 ++++++++++--- deployments/templates/prometheus.yml | 8 ++++---- docker-compose.yml | 15 +++++++++++++++ scripts/install/environment.sh | 7 +++++++ 7 files changed, 85 insertions(+), 7 deletions(-) create mode 100644 config/alertmanager.yml create mode 100644 config/email.tmpl create mode 100644 config/instanceDown_rules.yml diff --git a/config/alertmanager.yml b/config/alertmanager.yml new file mode 100644 index 000000000..072bfeead --- /dev/null +++ b/config/alertmanager.yml @@ -0,0 +1,25 @@ +global: + resolve_timeout: 5m + smtp_from: '18565885972@163.com' + smtp_smarthost: 'smtp.163.com:465' + smtp_auth_username: '18565885972@163.com' + smtp_auth_password: 'ZTASKEORBBNKWPRV' + smtp_require_tls: false + smtp_hello: 'xxx监控告警' + +templates: + - '/etc/alertmanager/email.tmpl' + +route: + group_by: ['alertname'] + group_wait: 5s + group_interval: 5s + repeat_interval: 5m + receiver: 'email' +receivers: + - name: 'email' + email_configs: + - to: '2393740379@qq.com' + html: '{{ template "email.to.html" . }}' + headers: { Subject: "[WARN]告警" } + send_resolved: true \ No newline at end of file diff --git a/config/email.tmpl b/config/email.tmpl new file mode 100644 index 000000000..48ced598e --- /dev/null +++ b/config/email.tmpl @@ -0,0 +1,13 @@ +{{ define "email.to.html" }} +{{ range .Alerts }} + +告警程序: prometheus_alert
+告警级别: {{ .Labels.severity }} 级
+告警类型: {{ .Labels.alertname }}
+故障主机: {{ .Labels.instance }}
+故障服务: {{ .Labels.job }}
+告警主题: {{ .Annotations.summary }}
+触发时间: {{ .StartsAt.Format "2020-01-02 15:04:05"}}
+ +{{ end }} +{{ end }} \ No newline at end of file diff --git a/config/instanceDown_rules.yml b/config/instanceDown_rules.yml new file mode 100644 index 000000000..b266e4e73 --- /dev/null +++ b/config/instanceDown_rules.yml @@ -0,0 +1,11 @@ +groups: + - name: node_down + rules: + - alert: InstanceDown + expr: up == 0 + for: 1m + labels: + user: test + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." \ No newline at end of file diff --git a/deployments/templates/env_template.yaml b/deployments/templates/env_template.yaml index 5c8e2628e..398fbb820 100644 --- a/deployments/templates/env_template.yaml +++ b/deployments/templates/env_template.yaml @@ -94,7 +94,7 @@ OPENIM_CHAT_NETWORK_ADDRESS=${OPENIM_CHAT_NETWORK_ADDRESS} # Address or hostname for the Prometheus network. # Default: PROMETHEUS_NETWORK_ADDRESS=172.28.0.11 PROMETHEUS_NETWORK_ADDRESS=${PROMETHEUS_NETWORK_ADDRESS} - + # Address or hostname for the Grafana network. # Default: GRAFANA_NETWORK_ADDRESS=172.28.0.12 GRAFANA_NETWORK_ADDRESS=${GRAFANA_NETWORK_ADDRESS} @@ -106,7 +106,10 @@ NODE_EXPORTER_NETWORK_ADDRESS=${NODE_EXPORTER_NETWORK_ADDRESS} # Address or hostname for the OpenIM admin network. # Default: OPENIM_ADMIN_NETWORK_ADDRESS=172.28.0.14 OPENIM_ADMIN_FRONT_NETWORK_ADDRESS=${OPENIM_ADMIN_FRONT_NETWORK_ADDRESS} - + +# Address or hostname for the alertmanager network. +# Default: ALERT_MANAGER_NETWORK_ADDRESS=172.28.0.15 +ALERT_MANAGER_NETWORK_ADDRESS=${ALERT_MANAGER_NETWORK_ADDRESS} # =============================================== # = Component Extension Configuration = # =============================================== @@ -305,4 +308,8 @@ GRAFANA_PORT=${GRAFANA_PORT} # Port for the admin front. # Default: OPENIM_ADMIN_FRONT_PORT=11002 -OPENIM_ADMIN_FRONT_PORT=${OPENIM_ADMIN_FRONT_PORT} \ No newline at end of file +OPENIM_ADMIN_FRONT_PORT=${OPENIM_ADMIN_FRONT_PORT} + +# Port for the alertmanager. +# Default: ALERT_MANAGER_PORT=19093 +ALERT_MANAGER_PORT=${ALERT_MANAGER_PORT} \ No newline at end of file diff --git a/deployments/templates/prometheus.yml b/deployments/templates/prometheus.yml index 2b67a18ed..9c2b10c29 100644 --- a/deployments/templates/prometheus.yml +++ b/deployments/templates/prometheus.yml @@ -6,13 +6,13 @@ global: # Alertmanager configuration alerting: -#alertmanagers: -# - static_configs: -# - targets: ['172.29.166.17:9093'] #alertmanager地址 + alertmanagers: + - static_configs: + - targets: ['${ALERT_MANAGER_ADDRESS}:${ALERT_MANAGER_PORT}'] # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: -# - "node_down.yml" + - "instanceDown_rules.yml" # - "first_rules.yml" # - "second_rules.yml" diff --git a/docker-compose.yml b/docker-compose.yml index 6b9dc264d..a4adc8a66 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -162,12 +162,27 @@ services: restart: always volumes: - ./config/prometheus.yml:/etc/prometheus/prometheus.yml + - ./config/instanceDown_rules.yml:/etc/prometheus/instanceDown_rules.yml ports: - "${PROMETHEUS_PORT}:9090" networks: server: ipv4_address: ${PROMETHEUS_NETWORK_ADDRESS} + alertmanager: + image: prom/alertmanager + container_name: alertmanager + hostname: alertmanager + restart: always + volumes: + - ./config/alertmanager.yml:/etc/alertmanager/alertmanager.yml + - ./config/email.tmpl:/etc/alertmanager/email.tmpl + ports: + - "${ALERT_MANAGER_PORT}:9093" + networks: + server: + ipv4_address: ${ALERT_MANAGER_NETWORK_ADDRESS} + grafana: image: grafana/grafana container_name: grafana diff --git a/scripts/install/environment.sh b/scripts/install/environment.sh index 8198cd460..b32dc52cb 100755 --- a/scripts/install/environment.sh +++ b/scripts/install/environment.sh @@ -120,6 +120,8 @@ LAST_OCTET=$((LAST_OCTET + 1)) NODE_EXPORTER_NETWORK_ADDRESS=$(generate_ip) LAST_OCTET=$((LAST_OCTET + 1)) OPENIM_ADMIN_FRONT_NETWORK_ADDRESS=$(generate_ip) +LAST_OCTET=$((LAST_OCTET + 1)) +ALERT_MANAGER_NETWORK_ADDRESS=$(generate_ip) ###################### openim 配置 ###################### # read: https://github.com/openimsdk/open-im-server/blob/main/deployment/README.md def "OPENIM_DATA_DIR" "/data/openim" @@ -259,6 +261,11 @@ def "PROMETHEUS_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # Prometheus的地址 ###################### node-exporter 配置 ###################### def "NODE_EXPORTER_PORT" "19100" # node-exporter的端口 def "NODE_EXPORTER_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # node-exporter的地址 + +###################### alertmanagerS 配置 ###################### +def "ALERT_MANAGER_PORT" "19093" # node-exporter的端口 +def "ALERT_MANAGER_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # node-exporter的地址 + ###################### Grafana 配置信息 ###################### def "GRAFANA_PORT" "3000" # Grafana的端口 def "GRAFANA_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # Grafana的地址