diff --git a/config/alertmanager.yml b/config/alertmanager.yml index a02944851..6c675ab6f 100644 --- a/config/alertmanager.yml +++ b/config/alertmanager.yml @@ -11,11 +11,20 @@ templates: - /etc/alertmanager/email.tmpl route: - group_by: ['alertname'] + group_by: [ 'alertname' ] group_wait: 5s group_interval: 5s repeat_interval: 5m receiver: email + routes: + - matchers: + - alertname = "XXX" + group_by: [ 'instance' ] + group_wait: 5s + group_interval: 5s + repeat_interval: 5m + receiver: email + receivers: - name: email email_configs: diff --git a/config/instance-down-rules.yml b/config/instance-down-rules.yml index 5541d2c54..bcac7ba60 100644 --- a/config/instance-down-rules.yml +++ b/config/instance-down-rules.yml @@ -20,3 +20,25 @@ groups: annotations: summary: "Increase in MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter detected" description: "Either MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter has increased in the last 5 minutes, indicating failures in message insert operations to Redis or MongoDB,maybe the redis or mongodb is crash." + + - name: registrations_few + rules: + - alert: RegistrationsFew + expr: increase(user_login_total[1h]) == 0 + for: 1m + labels: + severity: info + annotations: + summary: "Too few registrations within the time frame" + description: "The number of registrations in the last hour is 0. There might be some issues." + + - name: messages_few + rules: + - alert: MessagesFew + expr: (increase(single_chat_msg_process_success_total[1h])+increase(group_chat_msg_process_success_total[1h])) == 0 + for: 1m + labels: + severity: info + annotations: + summary: "Too few messages within the time frame" + description: "The number of messages sent in the last hour is 0. There might be some issues." diff --git a/internal/rpc/user/user.go b/internal/rpc/user/user.go index 779d9b0c4..1e534437d 100644 --- a/internal/rpc/user/user.go +++ b/internal/rpc/user/user.go @@ -19,6 +19,7 @@ import ( "errors" "github.com/openimsdk/open-im-server/v3/internal/rpc/friend" "github.com/openimsdk/open-im-server/v3/pkg/common/config" + "github.com/openimsdk/open-im-server/v3/pkg/common/prommetrics" "github.com/openimsdk/open-im-server/v3/pkg/common/storage/cache" "github.com/openimsdk/open-im-server/v3/pkg/common/storage/cache/redis" "github.com/openimsdk/open-im-server/v3/pkg/common/storage/database/mgo" @@ -310,6 +311,8 @@ func (s *userServer) UserRegister(ctx context.Context, req *pbuser.UserRegisterR return nil, err } + prommetrics.UserRegisterCounter.Add(float64(len(users))) + s.webhookAfterUserRegister(ctx, &s.config.WebhooksConfig.AfterUserRegister, req) return resp, nil } diff --git a/pkg/common/prommetrics/grpc_user.go b/pkg/common/prommetrics/grpc_user.go new file mode 100644 index 000000000..cc2fc42e6 --- /dev/null +++ b/pkg/common/prommetrics/grpc_user.go @@ -0,0 +1,10 @@ +package prommetrics + +import "github.com/prometheus/client_golang/prometheus" + +var ( + UserRegisterCounter = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "user_register_total", + Help: "The number of user login", + }) +) diff --git a/pkg/common/prommetrics/rpc.go b/pkg/common/prommetrics/rpc.go index 1da2c1510..dc16322da 100644 --- a/pkg/common/prommetrics/rpc.go +++ b/pkg/common/prommetrics/rpc.go @@ -52,6 +52,8 @@ func GetGrpcCusMetrics(registerName string, share *config.Share) []prometheus.Co return []prometheus.Collector{MsgOfflinePushFailedCounter} case share.RpcRegisterName.Auth: return []prometheus.Collector{UserLoginCounter} + case share.RpcRegisterName.User: + return []prometheus.Collector{UserRegisterCounter} default: return nil }