diff --git a/config/alertmanager.yml b/config/alertmanager.yml
new file mode 100644
index 000000000..072bfeead
--- /dev/null
+++ b/config/alertmanager.yml
@@ -0,0 +1,25 @@
+global:
+ resolve_timeout: 5m
+ smtp_from: '18565885972@163.com'
+ smtp_smarthost: 'smtp.163.com:465'
+ smtp_auth_username: '18565885972@163.com'
+ smtp_auth_password: 'ZTASKEORBBNKWPRV'
+ smtp_require_tls: false
+ smtp_hello: 'xxx监控告警'
+
+templates:
+ - '/etc/alertmanager/email.tmpl'
+
+route:
+ group_by: ['alertname']
+ group_wait: 5s
+ group_interval: 5s
+ repeat_interval: 5m
+ receiver: 'email'
+receivers:
+ - name: 'email'
+ email_configs:
+ - to: '2393740379@qq.com'
+ html: '{{ template "email.to.html" . }}'
+ headers: { Subject: "[WARN]告警" }
+ send_resolved: true
\ No newline at end of file
diff --git a/config/email.tmpl b/config/email.tmpl
new file mode 100644
index 000000000..48ced598e
--- /dev/null
+++ b/config/email.tmpl
@@ -0,0 +1,13 @@
+{{ define "email.to.html" }}
+{{ range .Alerts }}
+
+告警程序: prometheus_alert
+告警级别: {{ .Labels.severity }} 级
+告警类型: {{ .Labels.alertname }}
+故障主机: {{ .Labels.instance }}
+故障服务: {{ .Labels.job }}
+告警主题: {{ .Annotations.summary }}
+触发时间: {{ .StartsAt.Format "2020-01-02 15:04:05"}}
+
+{{ end }}
+{{ end }}
\ No newline at end of file
diff --git a/config/instanceDown_rules.yml b/config/instanceDown_rules.yml
new file mode 100644
index 000000000..b266e4e73
--- /dev/null
+++ b/config/instanceDown_rules.yml
@@ -0,0 +1,11 @@
+groups:
+ - name: node_down
+ rules:
+ - alert: InstanceDown
+ expr: up == 0
+ for: 1m
+ labels:
+ user: test
+ annotations:
+ summary: "Instance {{ $labels.instance }} down"
+ description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
\ No newline at end of file
diff --git a/deployments/templates/env_template.yaml b/deployments/templates/env_template.yaml
index 5c8e2628e..398fbb820 100644
--- a/deployments/templates/env_template.yaml
+++ b/deployments/templates/env_template.yaml
@@ -94,7 +94,7 @@ OPENIM_CHAT_NETWORK_ADDRESS=${OPENIM_CHAT_NETWORK_ADDRESS}
# Address or hostname for the Prometheus network.
# Default: PROMETHEUS_NETWORK_ADDRESS=172.28.0.11
PROMETHEUS_NETWORK_ADDRESS=${PROMETHEUS_NETWORK_ADDRESS}
-
+
# Address or hostname for the Grafana network.
# Default: GRAFANA_NETWORK_ADDRESS=172.28.0.12
GRAFANA_NETWORK_ADDRESS=${GRAFANA_NETWORK_ADDRESS}
@@ -106,7 +106,10 @@ NODE_EXPORTER_NETWORK_ADDRESS=${NODE_EXPORTER_NETWORK_ADDRESS}
# Address or hostname for the OpenIM admin network.
# Default: OPENIM_ADMIN_NETWORK_ADDRESS=172.28.0.14
OPENIM_ADMIN_FRONT_NETWORK_ADDRESS=${OPENIM_ADMIN_FRONT_NETWORK_ADDRESS}
-
+
+# Address or hostname for the alertmanager network.
+# Default: ALERT_MANAGER_NETWORK_ADDRESS=172.28.0.15
+ALERT_MANAGER_NETWORK_ADDRESS=${ALERT_MANAGER_NETWORK_ADDRESS}
# ===============================================
# = Component Extension Configuration =
# ===============================================
@@ -305,4 +308,8 @@ GRAFANA_PORT=${GRAFANA_PORT}
# Port for the admin front.
# Default: OPENIM_ADMIN_FRONT_PORT=11002
-OPENIM_ADMIN_FRONT_PORT=${OPENIM_ADMIN_FRONT_PORT}
\ No newline at end of file
+OPENIM_ADMIN_FRONT_PORT=${OPENIM_ADMIN_FRONT_PORT}
+
+# Port for the alertmanager.
+# Default: ALERT_MANAGER_PORT=19093
+ALERT_MANAGER_PORT=${ALERT_MANAGER_PORT}
\ No newline at end of file
diff --git a/deployments/templates/prometheus.yml b/deployments/templates/prometheus.yml
index 2b67a18ed..9c2b10c29 100644
--- a/deployments/templates/prometheus.yml
+++ b/deployments/templates/prometheus.yml
@@ -6,13 +6,13 @@ global:
# Alertmanager configuration
alerting:
-#alertmanagers:
-# - static_configs:
-# - targets: ['172.29.166.17:9093'] #alertmanager地址
+ alertmanagers:
+ - static_configs:
+ - targets: ['${ALERT_MANAGER_ADDRESS}:${ALERT_MANAGER_PORT}']
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
-# - "node_down.yml"
+ - "instanceDown_rules.yml"
# - "first_rules.yml"
# - "second_rules.yml"
diff --git a/docker-compose.yml b/docker-compose.yml
index 6b9dc264d..a4adc8a66 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -162,12 +162,27 @@ services:
restart: always
volumes:
- ./config/prometheus.yml:/etc/prometheus/prometheus.yml
+ - ./config/instanceDown_rules.yml:/etc/prometheus/instanceDown_rules.yml
ports:
- "${PROMETHEUS_PORT}:9090"
networks:
server:
ipv4_address: ${PROMETHEUS_NETWORK_ADDRESS}
+ alertmanager:
+ image: prom/alertmanager
+ container_name: alertmanager
+ hostname: alertmanager
+ restart: always
+ volumes:
+ - ./config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
+ - ./config/email.tmpl:/etc/alertmanager/email.tmpl
+ ports:
+ - "${ALERT_MANAGER_PORT}:9093"
+ networks:
+ server:
+ ipv4_address: ${ALERT_MANAGER_NETWORK_ADDRESS}
+
grafana:
image: grafana/grafana
container_name: grafana
diff --git a/scripts/install/environment.sh b/scripts/install/environment.sh
index 8198cd460..b32dc52cb 100755
--- a/scripts/install/environment.sh
+++ b/scripts/install/environment.sh
@@ -120,6 +120,8 @@ LAST_OCTET=$((LAST_OCTET + 1))
NODE_EXPORTER_NETWORK_ADDRESS=$(generate_ip)
LAST_OCTET=$((LAST_OCTET + 1))
OPENIM_ADMIN_FRONT_NETWORK_ADDRESS=$(generate_ip)
+LAST_OCTET=$((LAST_OCTET + 1))
+ALERT_MANAGER_NETWORK_ADDRESS=$(generate_ip)
###################### openim 配置 ######################
# read: https://github.com/openimsdk/open-im-server/blob/main/deployment/README.md
def "OPENIM_DATA_DIR" "/data/openim"
@@ -259,6 +261,11 @@ def "PROMETHEUS_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # Prometheus的地址
###################### node-exporter 配置 ######################
def "NODE_EXPORTER_PORT" "19100" # node-exporter的端口
def "NODE_EXPORTER_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # node-exporter的地址
+
+###################### alertmanagerS 配置 ######################
+def "ALERT_MANAGER_PORT" "19093" # node-exporter的端口
+def "ALERT_MANAGER_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # node-exporter的地址
+
###################### Grafana 配置信息 ######################
def "GRAFANA_PORT" "3000" # Grafana的端口
def "GRAFANA_ADDRESS" "${DOCKER_BRIDGE_GATEWAY}" # Grafana的地址