commit
14c486b1dd
@ -0,0 +1,25 @@
|
|||||||
|
global:
|
||||||
|
resolve_timeout: 5m
|
||||||
|
smtp_from: alert@openim.io
|
||||||
|
smtp_smarthost: smtp.163.com:465
|
||||||
|
smtp_auth_username: alert@openim.io
|
||||||
|
smtp_auth_password: YOURAUTHPASSWORD
|
||||||
|
smtp_require_tls: false
|
||||||
|
smtp_hello: xxx
|
||||||
|
|
||||||
|
templates:
|
||||||
|
- /etc/alertmanager/email.tmpl
|
||||||
|
|
||||||
|
route:
|
||||||
|
group_by: ['alertname']
|
||||||
|
group_wait: 5s
|
||||||
|
group_interval: 5s
|
||||||
|
repeat_interval: 5m
|
||||||
|
receiver: email
|
||||||
|
receivers:
|
||||||
|
- name: email
|
||||||
|
email_configs:
|
||||||
|
- to: 'alert@example.com'
|
||||||
|
html: '{{ template "email.to.html" . }}'
|
||||||
|
headers: { Subject: "[OPENIM-SERVER]Alarm" }
|
||||||
|
send_resolved: true
|
@ -0,0 +1,16 @@
|
|||||||
|
{{ define "email.to.html" }}
|
||||||
|
{{ range .Alerts }}
|
||||||
|
<!-- Begin of OpenIM Alert -->
|
||||||
|
<div style="border:1px solid #ccc; padding:10px; margin-bottom:10px;">
|
||||||
|
<h3>OpenIM Alert</h3>
|
||||||
|
<p><strong>Alert Program:</strong> Prometheus Alert</p>
|
||||||
|
<p><strong>Severity Level:</strong> {{ .Labels.severity }}</p>
|
||||||
|
<p><strong>Alert Type:</strong> {{ .Labels.alertname }}</p>
|
||||||
|
<p><strong>Affected Host:</strong> {{ .Labels.instance }}</p>
|
||||||
|
<p><strong>Affected Service:</strong> {{ .Labels.job }}</p>
|
||||||
|
<p><strong>Alert Subject:</strong> {{ .Annotations.summary }}</p>
|
||||||
|
<p><strong>Trigger Time:</strong> {{ .StartsAt.Format "2006-01-02 15:04:05" }}</p>
|
||||||
|
</div>
|
||||||
|
<!-- End of OpenIM Alert -->
|
||||||
|
{{ end }}
|
||||||
|
{{ end }}
|
@ -0,0 +1,22 @@
|
|||||||
|
groups:
|
||||||
|
- name: instance_down
|
||||||
|
rules:
|
||||||
|
- alert: InstanceDown
|
||||||
|
expr: up == 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
|
||||||
|
|
||||||
|
- name: database_insert_failure_alerts
|
||||||
|
rules:
|
||||||
|
- alert: DatabaseInsertFailed
|
||||||
|
expr: (increase(msg_insert_redis_failed_total[5m]) > 0) or (increase(msg_insert_mongo_failed_total[5m]) > 0)
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Increase in MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter detected"
|
||||||
|
description: "Either MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter has increased in the last 5 minutes, indicating failures in message insert operations to Redis or MongoDB,maybe the redis or mongodb is crash."
|
@ -0,0 +1,83 @@
|
|||||||
|
# my global config
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||||
|
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Alertmanager configuration
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets: ['192.168.2.22:19093']
|
||||||
|
|
||||||
|
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||||
|
rule_files:
|
||||||
|
- "instance-down-rules.yml"
|
||||||
|
# - "first_rules.yml"
|
||||||
|
# - "second_rules.yml"
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label "job='job_name'"" to any timeseries scraped from this config.
|
||||||
|
# Monitored information captured by prometheus
|
||||||
|
|
||||||
|
# prometheus fetches application services
|
||||||
|
- job_name: 'node_exporter'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20114' ]
|
||||||
|
- job_name: 'openimserver-openim-api'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20113' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-msggateway'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20112' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-msgtransfer'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ 192.168.2.22:20111, 192.168.2.22:20110, 192.168.2.22:20109, 192.168.2.22:20108 ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-push'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20107' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-auth'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20106' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-conversation'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20105' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-friend'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20104' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-group'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20103' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-msg'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20102' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-third'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20101' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
||||||
|
- job_name: 'openimserver-openim-rpc-user'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ '192.168.2.22:20100' ]
|
||||||
|
labels:
|
||||||
|
namespace: 'default'
|
Loading…
Reference in new issue