Add ability to reset metrics between crawl to limit payload absorbed (#5)

by the prometheus/victoria metric crawlers.
This commit is contained in:
2023-11-16 17:45:48 +01:00
committed by GitHub
parent 9c9fa94140
commit 0b642f8be1
6 changed files with 34 additions and 13 deletions
+1 -1
View File
@@ -11,7 +11,7 @@ help: ## display this help
.PHONY: run .PHONY: run
run: build ## run application run: build ## run application
@LOG_LEVEL=debug BLOCK_SCHEMA_INTROSPECTION=false CACHE_TTL=10 JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/ HEALTHCHECK_GRAPHQL_URL=https://hasura8.lan/v1/graphql ./graphql-proxy @LOG_LEVEL=debug PURGE_METRICS_ON_CRAWL=true BLOCK_SCHEMA_INTROSPECTION=false CACHE_TTL=10 JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/ HEALTHCHECK_GRAPHQL_URL=https://hasura8.lan/v1/graphql ./graphql-proxy
.PHONY: build .PHONY: build
build: ## build the binary build: ## build the binary
+8
View File
@@ -23,6 +23,7 @@ This project is in active use by [telegram-bot.app](https://telegram-bot.app), a
- [API endpoints](#api-endpoints) - [API endpoints](#api-endpoints)
- [Ban or unban the user](#ban-or-unban-the-user) - [Ban or unban the user](#ban-or-unban-the-user)
- [General](#general) - [General](#general)
- [Metrics which matter](#metrics-which-matter)
- [Healthcheck](#healthcheck) - [Healthcheck](#healthcheck)
- [Monitoring endpoint](#monitoring-endpoint) - [Monitoring endpoint](#monitoring-endpoint)
@@ -123,6 +124,7 @@ In this case, both proxy and websockets will be available under the `/v1/graphql
| `API_PORT` | The port to expose the monitoring API | `9090` | | `API_PORT` | The port to expose the monitoring API | `9090` |
| `BANNED_USERS_FILE` | The path to the file with banned users | `/go/src/app/banned_users.json` | | `BANNED_USERS_FILE` | The path to the file with banned users | `/go/src/app/banned_users.json` |
| `PROXIED_CLIENT_TIMEOUT` | The timeout for the proxied client in seconds | `120` | | `PROXIED_CLIENT_TIMEOUT` | The timeout for the proxied client in seconds | `120` |
| `PURGE_METRICS_ON_CRAWL` | Purge metrics on each /metrics crawl | `false` |
### Speed ### Speed
@@ -227,6 +229,12 @@ Ban details will be stored in the `banned_users.json` file, which you can mount
### General ### General
#### Metrics which matter
You can always enable `PURGE_METRICS_ON_CRAWL` environment variable to purge the metrics on each `/metrics` crawl. This will allow you to see only the current metrics, without potential leftovers from the previous crawls. This is useful if you want to monitor the metrics in real-time and / or limit the amount of data ingested into the monitoring system. When enabled you will most likely need to update your monitoring queries.
With the `PURGE_METRICS_ON_CRAWL` enabled, the `graphql_proxy_requests_failed`, `graphql_proxy_requests_skipped` and `graphql_proxy_requests_succesful` metrics will remain between resets.
#### Healthcheck #### Healthcheck
If you'd like the `/healthz` endpoint to perform actual check for the connectivity to the graphql endpoint - set the `HEALTHCHECK_GRAPHQL_URL` environment variable to the exact URL of the graphql endpoint. The query executed will be `query { __typename }` and if the response is not `200 OK` - the healthcheck will fail. Remember that the endpoint is a full URL which you'd like to check, so it should include the protocol, host and path - for example `http://localhost:8080/v1/graphql` and it's NOT the same as value of `HOST_GRAPHQL` environment variable which should provide only the host, without path, ending with slash. If you'd like the `/healthz` endpoint to perform actual check for the connectivity to the graphql endpoint - set the `HEALTHCHECK_GRAPHQL_URL` environment variable to the exact URL of the graphql endpoint. The query executed will be `query { __typename }` and if the response is not `200 OK` - the healthcheck will fail. Remember that the endpoint is a full URL which you'd like to check, so it should include the protocol, host and path - for example `http://localhost:8080/v1/graphql` and it's NOT the same as value of `HOST_GRAPHQL` environment variable which should provide only the host, without path, ending with slash.
+1
View File
@@ -55,6 +55,7 @@ func parseConfig() {
c.Server.EnableApi = envutil.GetBool("ENABLE_API", false) c.Server.EnableApi = envutil.GetBool("ENABLE_API", false)
c.Server.ApiPort = envutil.GetInt("API_PORT", 9090) c.Server.ApiPort = envutil.GetInt("API_PORT", 9090)
c.Api.BannedUsersFile = envutil.Getenv("BANNED_USERS_FILE", "/go/src/app/banned_users.json") c.Api.BannedUsersFile = envutil.Getenv("BANNED_USERS_FILE", "/go/src/app/banned_users.json")
c.Server.PurgeOnCrawl = envutil.GetBool("PURGE_METRICS_ON_CRAWL", false)
cfg = &c cfg = &c
enableCache() // takes close to no resources, but can be used with dynamic query cache enableCache() // takes close to no resources, but can be used with dynamic query cache
loadRatelimitConfig() loadRatelimitConfig()
+1 -1
View File
@@ -5,7 +5,7 @@ import (
) )
func StartMonitoringServer() { func StartMonitoringServer() {
cfg.Monitoring = libpack_monitoring.NewMonitoring() cfg.Monitoring = libpack_monitoring.NewMonitoring(cfg.Server.PurgeOnCrawl)
cfg.Monitoring.AddMetricsPrefix("graphql_proxy") cfg.Monitoring.AddMetricsPrefix("graphql_proxy")
cfg.Monitoring.RegisterDefaultMetrics() cfg.Monitoring.RegisterDefaultMetrics()
} }
+22 -11
View File
@@ -15,18 +15,22 @@ import (
) )
type MetricsSetup struct { type MetricsSetup struct {
metrics_prefix string metrics_prefix string
metrics_set *metrics.Set metrics_set *metrics.Set
metrics_set_custom *metrics.Set
} }
var ( var (
log *logging.LogConfig log *logging.LogConfig
purgeMetricsOnCrawl bool
) )
func NewMonitoring() *MetricsSetup { func NewMonitoring(purgeOnCrawl bool) *MetricsSetup {
purgeMetricsOnCrawl = purgeOnCrawl
log = logging.NewLogger() log = logging.NewLogger()
ms := &MetricsSetup{} ms := &MetricsSetup{}
ms.metrics_set = metrics.NewSet() ms.metrics_set = metrics.NewSet()
ms.metrics_set_custom = metrics.NewSet()
go ms.startPrometheusEndpoint() go ms.startPrometheusEndpoint()
return ms return ms
} }
@@ -45,6 +49,10 @@ func (ms *MetricsSetup) startPrometheusEndpoint() {
func (ms *MetricsSetup) metricsEndpoint(c *fiber.Ctx) error { func (ms *MetricsSetup) metricsEndpoint(c *fiber.Ctx) error {
ms.metrics_set.WritePrometheus(c.Response().BodyWriter()) ms.metrics_set.WritePrometheus(c.Response().BodyWriter())
ms.metrics_set_custom.WritePrometheus(c.Response().BodyWriter())
if purgeMetricsOnCrawl {
ms.PurgeMetrics()
}
return nil return nil
} }
@@ -61,7 +69,7 @@ func (ms *MetricsSetup) RegisterMetricsGauge(metric_name string, labels map[stri
log.Critical("RegisterMetricsGauge() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name}) log.Critical("RegisterMetricsGauge() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil return nil
} }
return ms.metrics_set.GetOrCreateGauge(ms.get_metrics_name(metric_name, labels), func() float64 { return ms.metrics_set_custom.GetOrCreateGauge(ms.get_metrics_name(metric_name, labels), func() float64 {
// get current value of the gauge and add val to it // get current value of the gauge and add val to it
return val return val
}) })
@@ -72,7 +80,10 @@ func (ms *MetricsSetup) RegisterMetricsCounter(metric_name string, labels map[st
log.Critical("RegisterMetricsCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name}) log.Critical("RegisterMetricsCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil return nil
} }
return ms.metrics_set.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels)) if metric_name == MetricsSucceeded || metric_name == MetricsFailed || metric_name == MetricsSkipped {
return ms.metrics_set.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
}
return ms.metrics_set_custom.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
} }
func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[string]string) *metrics.FloatCounter { func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[string]string) *metrics.FloatCounter {
@@ -80,7 +91,7 @@ func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[stri
log.Critical("RegisterFloatCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name}) log.Critical("RegisterFloatCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil return nil
} }
return ms.metrics_set.GetOrCreateFloatCounter(ms.get_metrics_name(metric_name, labels)) return ms.metrics_set_custom.GetOrCreateFloatCounter(ms.get_metrics_name(metric_name, labels))
} }
func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[string]string) *metrics.Summary { func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[string]string) *metrics.Summary {
@@ -88,7 +99,7 @@ func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[st
log.Critical("RegisterMetricsSummary() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name}) log.Critical("RegisterMetricsSummary() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil return nil
} }
return ms.metrics_set.GetOrCreateSummary(ms.get_metrics_name(metric_name, labels)) return ms.metrics_set_custom.GetOrCreateSummary(ms.get_metrics_name(metric_name, labels))
} }
func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[string]string) *metrics.Histogram { func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[string]string) *metrics.Histogram {
@@ -96,7 +107,7 @@ func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[
log.Critical("RegisterMetricsHistogram() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name}) log.Critical("RegisterMetricsHistogram() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil return nil
} }
return ms.metrics_set.GetOrCreateHistogram(ms.get_metrics_name(metric_name, labels)) return ms.metrics_set_custom.GetOrCreateHistogram(ms.get_metrics_name(metric_name, labels))
} }
func (ms *MetricsSetup) Increment(metric_name string, labels map[string]string) { func (ms *MetricsSetup) Increment(metric_name string, labels map[string]string) {
@@ -124,9 +135,9 @@ func (ms *MetricsSetup) UpdateSummary(metric_name string, labels map[string]stri
} }
func (ms *MetricsSetup) RemoveMetrics(metric_name string, labels map[string]string) { func (ms *MetricsSetup) RemoveMetrics(metric_name string, labels map[string]string) {
ms.metrics_set.UnregisterMetric(ms.get_metrics_name(metric_name, labels)) ms.metrics_set_custom.UnregisterMetric(ms.get_metrics_name(metric_name, labels))
} }
func (ms *MetricsSetup) PurgeMetrics() { func (ms *MetricsSetup) PurgeMetrics() {
ms.metrics_set.UnregisterAllMetrics() ms.metrics_set_custom.UnregisterAllMetrics()
} }
+1
View File
@@ -24,6 +24,7 @@ type config struct {
AllowURLs []string AllowURLs []string
EnableApi bool EnableApi bool
ApiPort int ApiPort int
PurgeOnCrawl bool
} }
Client struct { Client struct {