From 7f4d6def53163bac9bc542364fd7c759f73dd35a Mon Sep 17 00:00:00 2001 From: icey-yu <1186114839@qq.com> Date: Mon, 15 Jul 2024 18:04:54 +0800 Subject: [PATCH 1/2] feat: prometheus --- .env | 3 + .gitignore | 4 -- config/alertmanager.yml | 25 +++++++ config/email.tmpl | 16 +++++ config/instance-down-rules.yml | 22 ++++++ config/prometheus.yml | 83 ++++++++++++++++++++++ docker-compose.yml | 45 ++++++++++++ internal/api/init.go | 4 +- internal/msgtransfer/init.go | 15 +--- pkg/common/prommetrics/api.go | 20 +++--- pkg/common/prommetrics/prommetrics.go | 47 ++++-------- pkg/common/prommetrics/prommetrics_test.go | 73 +++++++++---------- pkg/common/prommetrics/rpc.go | 39 ++++++++-- pkg/common/prommetrics/transfer.go | 14 ++++ pkg/common/startrpc/start.go | 23 +++--- 15 files changed, 315 insertions(+), 118 deletions(-) create mode 100644 config/alertmanager.yml create mode 100644 config/email.tmpl create mode 100644 config/instance-down-rules.yml create mode 100644 config/prometheus.yml diff --git a/.env b/.env index 1e7b1e11a..3199b3714 100644 --- a/.env +++ b/.env @@ -5,6 +5,9 @@ ZOOKEEPER_IMAGE=bitnami/zookeeper:3.8 KAFKA_IMAGE=bitnami/kafka:3.5.1 MINIO_IMAGE=minio/minio:RELEASE.2024-01-11T07-46-16Z ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 +PROMETHEUS_IMAGE=prom/prometheus:v2.45.6 +ALERTMANAGER_IMAGE=prom/alertmanager:v0.27.0 +GRAFANA_IMAGE=grafana/grafana:11.0.1 OPENIM_WEB_FRONT_IMAGE=openim/openim-web-front:release-v3.5.1 OPENIM_ADMIN_FRONT_IMAGE=openim/openim-admin-front:release-v1.7 diff --git a/.gitignore b/.gitignore index fb8d428d2..77cf855b7 100644 --- a/.gitignore +++ b/.gitignore @@ -34,11 +34,7 @@ deployments/charts/generated-configs/ ### OpenIM Config ### .env config/config.yaml -config/alertmanager.yml -config/prometheus.yml -config/email.tmpl config/notification.yaml -config/instance-down-rules.yml ### OpenIM deploy ### deployments/openim-server/charts diff --git a/config/alertmanager.yml b/config/alertmanager.yml new file mode 100644 index 000000000..bdb076a63 --- /dev/null +++ b/config/alertmanager.yml @@ -0,0 +1,25 @@ +global: + resolve_timeout: 5m + smtp_from: alert@openim.io + smtp_smarthost: smtp.163.com:465 + smtp_auth_username: alert@openim.io + smtp_auth_password: YOURAUTHPASSWORD + smtp_require_tls: false + smtp_hello: xxx监控告警 + +templates: + - /etc/alertmanager/email.tmpl + +route: + group_by: ['alertname'] + group_wait: 5s + group_interval: 5s + repeat_interval: 5m + receiver: email +receivers: + - name: email + email_configs: + - to: 'alert@example.com' + html: '{{ template "email.to.html" . }}' + headers: { Subject: "[OPENIM-SERVER]Alarm" } + send_resolved: true diff --git a/config/email.tmpl b/config/email.tmpl new file mode 100644 index 000000000..0385601d0 --- /dev/null +++ b/config/email.tmpl @@ -0,0 +1,16 @@ +{{ define "email.to.html" }} +{{ range .Alerts }} + +
+

OpenIM Alert

+

Alert Program: Prometheus Alert

+

Severity Level: {{ .Labels.severity }}

+

Alert Type: {{ .Labels.alertname }}

+

Affected Host: {{ .Labels.instance }}

+

Affected Service: {{ .Labels.job }}

+

Alert Subject: {{ .Annotations.summary }}

+

Trigger Time: {{ .StartsAt.Format "2006-01-02 15:04:05" }}

+
+ +{{ end }} +{{ end }} diff --git a/config/instance-down-rules.yml b/config/instance-down-rules.yml new file mode 100644 index 000000000..5541d2c54 --- /dev/null +++ b/config/instance-down-rules.yml @@ -0,0 +1,22 @@ +groups: + - name: instance_down + rules: + - alert: InstanceDown + expr: up == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." + + - name: database_insert_failure_alerts + rules: + - alert: DatabaseInsertFailed + expr: (increase(msg_insert_redis_failed_total[5m]) > 0) or (increase(msg_insert_mongo_failed_total[5m]) > 0) + for: 1m + labels: + severity: critical + annotations: + summary: "Increase in MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter detected" + description: "Either MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter has increased in the last 5 minutes, indicating failures in message insert operations to Redis or MongoDB,maybe the redis or mongodb is crash." diff --git a/config/prometheus.yml b/config/prometheus.yml new file mode 100644 index 000000000..99f2e4df3 --- /dev/null +++ b/config/prometheus.yml @@ -0,0 +1,83 @@ +# my global config +global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: ['192.168.2.22:19093'] + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + - "instance-down-rules.yml" +# - "first_rules.yml" +# - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label "job='job_name'"" to any timeseries scraped from this config. + # Monitored information captured by prometheus + + # prometheus fetches application services + - job_name: 'node_exporter' + static_configs: + - targets: [ '192.168.2.22:20114' ] + - job_name: 'openimserver-openim-api' + static_configs: + - targets: [ '192.168.2.22:20113' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-msggateway' + static_configs: + - targets: [ '192.168.2.22:20112' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-msgtransfer' + static_configs: + - targets: [ 192.168.2.22:20111, 192.168.2.22:20110, 192.168.2.22:20109, 192.168.2.22:20108 ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-push' + static_configs: + - targets: [ '192.168.2.22:20107' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-auth' + static_configs: + - targets: [ '192.168.2.22:20106' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-conversation' + static_configs: + - targets: [ '192.168.2.22:20105' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-friend' + static_configs: + - targets: [ '192.168.2.22:20104' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-group' + static_configs: + - targets: [ '192.168.2.22:20103' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-msg' + static_configs: + - targets: [ '192.168.2.22:20102' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-third' + static_configs: + - targets: [ '192.168.2.22:20101' ] + labels: + namespace: 'default' + - job_name: 'openimserver-openim-rpc-user' + static_configs: + - targets: [ '192.168.2.22:20100' ] + labels: + namespace: 'default' \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index d72c1a2fa..8cc1f24b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -140,5 +140,50 @@ services: networks: - openim + prometheus: + image: ${PROMETHEUS_IMAGE} + container_name: prometheus + restart: always + volumes: + - ./config/prometheus.yml:/etc/prometheus/prometheus.yml + - ./config/instance-down-rules.yml:/etc/prometheus/instance-down-rules.yml + - ${DATA_DIR}/components/prometheus/data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + ports: + - "19091:9090" + networks: + - openim + + alertmanager: + image: ${ALERTMANAGER_IMAGE} + container_name: alertmanager + restart: always + volumes: + - ./config/alertmanager.yml:/etc/alertmanager/alertmanager.yml + - ./config/email.tmpl:/etc/alertmanager/email.tmpl + ports: + - "19093:9093" + networks: + - openim + + grafana: + image: ${GRAFANA_IMAGE} + container_name: grafana + user: root + restart: always + environment: + - GF_SECURITY_ALLOW_EMBEDDING=true + - GF_SESSION_COOKIE_SAMESITE=none + - GF_SESSION_COOKIE_SECURE=true + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + ports: + - "13000:3000" + volumes: + - ${DATA_DIR:-./}/components/grafana:/var/lib/grafana + networks: + - openim diff --git a/internal/api/init.go b/internal/api/init.go index 679d901f4..e83dfc2ea 100644 --- a/internal/api/init.go +++ b/internal/api/init.go @@ -71,9 +71,7 @@ func Start(ctx context.Context, index int, config *Config) error { netDone <- struct{}{} return } - srv := http.NewServeMux() - srv.Handle(prommetrics.ApiPath, prommetrics.ApiHandler()) - if err := http.ListenAndServe(fmt.Sprintf(":%d", prometheusPort), srv); err != nil && err != http.ErrServerClosed { + if err := prommetrics.ApiInit(prometheusPort); err != nil && err != http.ErrServerClosed { netErr = errs.WrapMsg(err, fmt.Sprintf("api prometheus start err: %d", prometheusPort)) netDone <- struct{}{} } diff --git a/internal/msgtransfer/init.go b/internal/msgtransfer/init.go index c17abbc30..b4b2245eb 100644 --- a/internal/msgtransfer/init.go +++ b/internal/msgtransfer/init.go @@ -17,6 +17,7 @@ package msgtransfer import ( "context" "fmt" + "github.com/openimsdk/open-im-server/v3/pkg/common/prommetrics" "github.com/openimsdk/open-im-server/v3/pkg/common/storage/cache/redis" "github.com/openimsdk/open-im-server/v3/pkg/common/storage/database/mgo" "github.com/openimsdk/tools/db/mongoutil" @@ -29,16 +30,12 @@ import ( "github.com/openimsdk/open-im-server/v3/pkg/common/config" kdisc "github.com/openimsdk/open-im-server/v3/pkg/common/discoveryregister" - "github.com/openimsdk/open-im-server/v3/pkg/common/prommetrics" "github.com/openimsdk/open-im-server/v3/pkg/common/storage/controller" "github.com/openimsdk/open-im-server/v3/pkg/rpcclient" "github.com/openimsdk/tools/errs" "github.com/openimsdk/tools/log" "github.com/openimsdk/tools/mw" "github.com/openimsdk/tools/system/program" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" - "github.com/prometheus/client_golang/prometheus/promhttp" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) @@ -139,14 +136,8 @@ func (m *MsgTransfer) Start(index int, config *Config) error { netDone <- struct{}{} return } - proreg := prometheus.NewRegistry() - proreg.MustRegister( - collectors.NewGoCollector(), - ) - proreg.MustRegister(prommetrics.GetGrpcCusMetrics("Transfer", &config.Share)...) - http.Handle("/metrics", promhttp.HandlerFor(proreg, promhttp.HandlerOpts{Registry: proreg})) - err = http.ListenAndServe(fmt.Sprintf(":%d", prometheusPort), nil) - if err != nil && err != http.ErrServerClosed { + + if err := prommetrics.TransferInit(prometheusPort); err != nil && err != http.ErrServerClosed { netErr = errs.WrapMsg(err, "prometheus start error", "prometheusPort", prometheusPort) netDone <- struct{}{} } diff --git a/pkg/common/prommetrics/api.go b/pkg/common/prommetrics/api.go index 806198977..95b5c06b6 100644 --- a/pkg/common/prommetrics/api.go +++ b/pkg/common/prommetrics/api.go @@ -3,15 +3,11 @@ package prommetrics import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" - "net/http" "strconv" ) -const ApiPath = "/metrics" - var ( - apiRegistry = prometheus.NewRegistry() - apiCounter = prometheus.NewCounterVec( + apiCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "api_count", Help: "Total number of API calls", @@ -27,8 +23,14 @@ var ( ) ) -func init() { - apiRegistry.MustRegister(apiCounter, httpCounter) +func ApiInit(prometheusPort int) error { + apiRegistry := prometheus.NewRegistry() + cs := append( + baseCollector, + apiCounter, + httpCounter, + ) + return Init(apiRegistry, prometheusPort, commonPath, promhttp.HandlerFor(apiRegistry, promhttp.HandlerOpts{}), cs...) } func APICall(path string, method string, apiCode int) { @@ -44,7 +46,3 @@ func HttpCall(path string, method string, status int) { // apiRegistry, promhttp.HandlerFor(apiRegistry, promhttp.HandlerOpts{}), // ) //} - -func ApiHandler() http.Handler { - return promhttp.HandlerFor(apiRegistry, promhttp.HandlerOpts{}) -} diff --git a/pkg/common/prommetrics/prommetrics.go b/pkg/common/prommetrics/prommetrics.go index 3955d8ea5..02e408d63 100644 --- a/pkg/common/prommetrics/prommetrics.go +++ b/pkg/common/prommetrics/prommetrics.go @@ -15,43 +15,24 @@ package prommetrics import ( - gp "github.com/grpc-ecosystem/go-grpc-prometheus" - config2 "github.com/openimsdk/open-im-server/v3/pkg/common/config" + "fmt" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" + "net/http" ) -func NewGrpcPromObj(cusMetrics []prometheus.Collector) (*prometheus.Registry, *gp.ServerMetrics, error) { - reg := prometheus.NewRegistry() - grpcMetrics := gp.NewServerMetrics() - grpcMetrics.EnableHandlingTimeHistogram() - cusMetrics = append(cusMetrics, grpcMetrics, collectors.NewGoCollector()) - reg.MustRegister(cusMetrics...) - return reg, grpcMetrics, nil -} +const commonPath = "/metrics" -func GetGrpcCusMetrics(registerName string, share *config2.Share) []prometheus.Collector { - switch registerName { - case share.RpcRegisterName.MessageGateway: - return []prometheus.Collector{OnlineUserGauge} - case share.RpcRegisterName.Msg: - return []prometheus.Collector{SingleChatMsgProcessSuccessCounter, SingleChatMsgProcessFailedCounter, GroupChatMsgProcessSuccessCounter, GroupChatMsgProcessFailedCounter} - case "Transfer": - return []prometheus.Collector{MsgInsertRedisSuccessCounter, MsgInsertRedisFailedCounter, MsgInsertMongoSuccessCounter, MsgInsertMongoFailedCounter, SeqSetFailedCounter} - case share.RpcRegisterName.Push: - return []prometheus.Collector{MsgOfflinePushFailedCounter} - case share.RpcRegisterName.Auth: - return []prometheus.Collector{UserLoginCounter} - default: - return nil +var ( + baseCollector = []prometheus.Collector{ + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + collectors.NewGoCollector(), } -} +) -//func GetGinCusMetrics(name string) []*ginprometheus.Metric { -// switch name { -// case "Api": -// return []*ginprometheus.Metric{ApiCustomCnt} -// default: -// return []*ginprometheus.Metric{ApiCustomCnt} -// } -//} +func Init(registry *prometheus.Registry, prometheusPort int, path string, handler http.Handler, cs ...prometheus.Collector) error { + registry.MustRegister(cs...) + srv := http.NewServeMux() + srv.Handle(path, handler) + return http.ListenAndServe(fmt.Sprintf(":%d", prometheusPort), srv) +} diff --git a/pkg/common/prommetrics/prommetrics_test.go b/pkg/common/prommetrics/prommetrics_test.go index 65b05652f..14b1aaff3 100644 --- a/pkg/common/prommetrics/prommetrics_test.go +++ b/pkg/common/prommetrics/prommetrics_test.go @@ -14,46 +14,39 @@ package prommetrics -import ( - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/assert" -) - -func TestNewGrpcPromObj(t *testing.T) { - // Create a custom metric to pass into the NewGrpcPromObj function. - customMetric := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "test_metric", - Help: "This is a test metric.", - }) - cusMetrics := []prometheus.Collector{customMetric} - - // Call NewGrpcPromObj with the custom metrics. - reg, grpcMetrics, err := NewGrpcPromObj(cusMetrics) - - // Assert no error was returned. - assert.NoError(t, err) - - // Assert the registry was correctly initialized. - assert.NotNil(t, reg) - - // Assert the grpcMetrics was correctly initialized. - assert.NotNil(t, grpcMetrics) - - // Assert that the custom metric is registered. - mfs, err := reg.Gather() - assert.NoError(t, err) - assert.NotEmpty(t, mfs) // Ensure some metrics are present. - found := false - for _, mf := range mfs { - if *mf.Name == "test_metric" { - found = true - break - } - } - assert.True(t, found, "Custom metric not found in registry") -} +//func TestNewGrpcPromObj(t *testing.T) { +// // Create a custom metric to pass into the NewGrpcPromObj function. +// customMetric := prometheus.NewCounter(prometheus.CounterOpts{ +// Name: "test_metric", +// Help: "This is a test metric.", +// }) +// cusMetrics := []prometheus.Collector{customMetric} +// +// // Call NewGrpcPromObj with the custom metrics. +// reg, grpcMetrics, err := NewGrpcPromObj(cusMetrics) +// +// // Assert no error was returned. +// assert.NoError(t, err) +// +// // Assert the registry was correctly initialized. +// assert.NotNil(t, reg) +// +// // Assert the grpcMetrics was correctly initialized. +// assert.NotNil(t, grpcMetrics) +// +// // Assert that the custom metric is registered. +// mfs, err := reg.Gather() +// assert.NoError(t, err) +// assert.NotEmpty(t, mfs) // Ensure some metrics are present. +// found := false +// for _, mf := range mfs { +// if *mf.Name == "test_metric" { +// found = true +// break +// } +// } +// assert.True(t, found, "Custom metric not found in registry") +//} //func TestGetGrpcCusMetrics(t *testing.T) { // conf := config2.NewGlobalConfig() diff --git a/pkg/common/prommetrics/rpc.go b/pkg/common/prommetrics/rpc.go index 834f76a9f..1da2c1510 100644 --- a/pkg/common/prommetrics/rpc.go +++ b/pkg/common/prommetrics/rpc.go @@ -1,16 +1,17 @@ package prommetrics import ( + gp "github.com/grpc-ecosystem/go-grpc-prometheus" + "github.com/openimsdk/open-im-server/v3/pkg/common/config" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" - "net/http" "strconv" ) -const RpcPath = "/metrics" +const rpcPath = commonPath var ( - rpcRegistry = prometheus.NewRegistry() + grpcMetrics *gp.ServerMetrics rpcCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "rpc_count", @@ -20,14 +21,38 @@ var ( ) ) -func init() { - rpcRegistry.MustRegister(rpcCounter) +func RpcInit(cs []prometheus.Collector, prometheusPort int) error { + reg := prometheus.NewRegistry() + cs = append(append( + baseCollector, + rpcCounter, + ), cs...) + return Init(reg, prometheusPort, rpcPath, promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}), cs...) } func RPCCall(name string, path string, code int) { rpcCounter.With(prometheus.Labels{"name": name, "path": path, "code": strconv.Itoa(code)}).Inc() } -func RPCHandler() http.Handler { - return promhttp.HandlerFor(rpcRegistry, promhttp.HandlerOpts{}) +func GetGrpcServerMetrics() *gp.ServerMetrics { + if grpcMetrics == nil { + grpcMetrics = gp.NewServerMetrics() + grpcMetrics.EnableHandlingTimeHistogram() + } + return grpcMetrics +} + +func GetGrpcCusMetrics(registerName string, share *config.Share) []prometheus.Collector { + switch registerName { + case share.RpcRegisterName.MessageGateway: + return []prometheus.Collector{OnlineUserGauge} + case share.RpcRegisterName.Msg: + return []prometheus.Collector{SingleChatMsgProcessSuccessCounter, SingleChatMsgProcessFailedCounter, GroupChatMsgProcessSuccessCounter, GroupChatMsgProcessFailedCounter} + case share.RpcRegisterName.Push: + return []prometheus.Collector{MsgOfflinePushFailedCounter} + case share.RpcRegisterName.Auth: + return []prometheus.Collector{UserLoginCounter} + default: + return nil + } } diff --git a/pkg/common/prommetrics/transfer.go b/pkg/common/prommetrics/transfer.go index 197b6f7fc..f0abb8285 100644 --- a/pkg/common/prommetrics/transfer.go +++ b/pkg/common/prommetrics/transfer.go @@ -16,6 +16,7 @@ package prommetrics import ( "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" ) var ( @@ -40,3 +41,16 @@ var ( Help: "The number of failed set seq", }) ) + +func TransferInit(prometheusPort int) error { + reg := prometheus.NewRegistry() + cs := append( + baseCollector, + MsgInsertRedisSuccessCounter, + MsgInsertRedisFailedCounter, + MsgInsertMongoSuccessCounter, + MsgInsertMongoFailedCounter, + SeqSetFailedCounter, + ) + return Init(reg, prometheusPort, commonPath, promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}), cs...) +} diff --git a/pkg/common/startrpc/start.go b/pkg/common/startrpc/start.go index a88797793..4091a5f6e 100644 --- a/pkg/common/startrpc/start.go +++ b/pkg/common/startrpc/start.go @@ -17,7 +17,7 @@ package startrpc import ( "context" "fmt" - config2 "github.com/openimsdk/open-im-server/v3/pkg/common/config" + "github.com/openimsdk/open-im-server/v3/pkg/common/config" "github.com/openimsdk/tools/utils/datautil" "google.golang.org/grpc/status" "net" @@ -42,8 +42,8 @@ import ( ) // Start rpc server. -func Start[T any](ctx context.Context, discovery *config2.Discovery, prometheusConfig *config2.Prometheus, listenIP, - registerIP string, rpcPorts []int, index int, rpcRegisterName string, share *config2.Share, config T, rpcFn func(ctx context.Context, +func Start[T any](ctx context.Context, discovery *config.Discovery, prometheusConfig *config.Prometheus, listenIP, + registerIP string, rpcPorts []int, index int, rpcRegisterName string, share *config.Share, config T, rpcFn func(ctx context.Context, config T, client discovery.SvcDiscoveryRegistry, server *grpc.Server) error, options ...grpc.ServerOption) error { rpcPort, err := datautil.GetElemByIndex(rpcPorts, index) @@ -82,7 +82,11 @@ func Start[T any](ctx context.Context, discovery *config2.Discovery, prometheusC //reg, metric, _ = prommetrics.NewGrpcPromObj(cusMetrics) //options = append(options, mw.GrpcServer(), grpc.StreamInterceptor(metric.StreamServerInterceptor()), // grpc.UnaryInterceptor(metric.UnaryServerInterceptor())) - options = append(options, mw.GrpcServer(), prommetricsInterceptor(rpcRegisterName)) + options = append( + options, mw.GrpcServer(), + prommetricsUnaryInterceptor(rpcRegisterName), + prommetricsStreamInterceptor(rpcRegisterName), + ) } else { options = append(options, mw.GrpcServer()) } @@ -121,9 +125,8 @@ func Start[T any](ctx context.Context, discovery *config2.Discovery, prometheusC netDone <- struct{}{} return } - srv := http.NewServeMux() - srv.Handle(prommetrics.RpcPath, prommetrics.RPCHandler()) - if err := http.ListenAndServe(fmt.Sprintf(":%d", prometheusPort), srv); err != nil && err != http.ErrServerClosed { + cs := prommetrics.GetGrpcCusMetrics(rpcRegisterName, share) + if err := prommetrics.RpcInit(cs, prometheusPort); err != nil && err != http.ErrServerClosed { netErr = errs.WrapMsg(err, fmt.Sprintf("rpc %s prometheus start err: %d", rpcRegisterName, prometheusPort)) netDone <- struct{}{} } @@ -181,7 +184,7 @@ func gracefulStopWithCtx(ctx context.Context, f func()) error { } } -func prommetricsInterceptor(rpcRegisterName string) grpc.ServerOption { +func prommetricsUnaryInterceptor(rpcRegisterName string) grpc.ServerOption { getCode := func(err error) int { if err == nil { return 0 @@ -198,3 +201,7 @@ func prommetricsInterceptor(rpcRegisterName string) grpc.ServerOption { return resp, err }) } + +func prommetricsStreamInterceptor(rpcRegisterName string) grpc.ServerOption { + return grpc.ChainStreamInterceptor() +} From 4738d1729cf2b1ff8020a819082cee9dabc2ca70 Mon Sep 17 00:00:00 2001 From: icey-yu <1186114839@qq.com> Date: Mon, 15 Jul 2024 18:26:58 +0800 Subject: [PATCH 2/2] feat: prometheus --- config/alertmanager.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/alertmanager.yml b/config/alertmanager.yml index bdb076a63..a02944851 100644 --- a/config/alertmanager.yml +++ b/config/alertmanager.yml @@ -5,7 +5,7 @@ global: smtp_auth_username: alert@openim.io smtp_auth_password: YOURAUTHPASSWORD smtp_require_tls: false - smtp_hello: xxx监控告警 + smtp_hello: xxx templates: - /etc/alertmanager/email.tmpl