From badc92fe2da9bc81f4b5d0f8b92055871ef218ea Mon Sep 17 00:00:00 2001 From: "Xinwei Xiong(cubxxw)" <3293172751nss@gmail.com> Date: Thu, 16 Nov 2023 15:15:19 +0800 Subject: [PATCH] feat: deployment and design of management backend and monitoring Signed-off-by: Xinwei Xiong(cubxxw) <3293172751nss@gmail.com> --- config/instance-down-rules.yml | 11 +++++++++ deployments/templates/alertmanager.yml | 32 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 config/instance-down-rules.yml create mode 100644 deployments/templates/alertmanager.yml diff --git a/config/instance-down-rules.yml b/config/instance-down-rules.yml new file mode 100644 index 000000000..72b1f5aa3 --- /dev/null +++ b/config/instance-down-rules.yml @@ -0,0 +1,11 @@ +groups: + - name: instance_down + rules: + - alert: InstanceDown + expr: up == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." \ No newline at end of file diff --git a/deployments/templates/alertmanager.yml b/deployments/templates/alertmanager.yml new file mode 100644 index 000000000..95e96571d --- /dev/null +++ b/deployments/templates/alertmanager.yml @@ -0,0 +1,32 @@ +###################### AlertManager Configuration ###################### +# AlertManager configuration using environment variables +# +# Resolve timeout +# SMTP configuration for sending alerts +# Templates for email notifications +# Routing configurations for alerts +# Receiver configurations +global: + resolve_timeout: ${ALERTMANAGER_RESOLVE_TIMEOUT} + smtp_from: ${ALERTMANAGER_SMTP_FROM} + smtp_smarthost: ${ALERTMANAGER_SMTP_SMARTHOST} + smtp_auth_username: ${ALERTMANAGER_SMTP_AUTH_USERNAME} + smtp_auth_password: ${ALERTMANAGER_SMTP_AUTH_PASSWORD} + smtp_require_tls: ${ALERTMANAGER_SMTP_REQUIRE_TLS} + smtp_hello: ${ALERTMANAGER_SMTP_HELLO} + +templates: + - /etc/alertmanager/email.tmpl + +route: + group_wait: 5s + group_interval: 5s + repeat_interval: 5m + receiver: email +receivers: + - name: email + email_configs: + - to: ${ALERTMANAGER_EMAIL_TO} + html: '{{ template "email.to.html" . }}' + headers: { Subject: "[OPENIM-SERVER]Alarm" } + send_resolved: true \ No newline at end of file