diff --git a/config/instance-down-rules.yml b/config/instance-down-rules.yml index 72b1f5aa3..719f673c6 100644 --- a/config/instance-down-rules.yml +++ b/config/instance-down-rules.yml @@ -8,4 +8,15 @@ groups: severity: critical annotations: summary: "Instance {{ $labels.instance }} down" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." \ No newline at end of file + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." + + - name: database_insert_failure_alerts + rules: + - alert: DatabaseInsertFailed + expr: (increase(MsgInsertRedisFailedCounter[5m]) > 0) or (increase(MsgInsertMongoFailedCounter[5m]) > 0) + for: 1m + labels: + severity: critical + annotations: + summary: "Increase in MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter detected" + description: "Either MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter has increased in the last 5 minutes, indicating failures in message insert operations to Redis or MongoDB,maybe the redis or mongodb is crash."