Add ability to reset metrics between crawl to limit payload absorbed (#5)

by the prometheus/victoria metric crawlers.
This commit is contained in:
2023-11-16 17:45:48 +01:00
committed by GitHub
parent 9c9fa94140
commit 0b642f8be1
6 changed files with 34 additions and 13 deletions
+1 -1
View File
@@ -11,7 +11,7 @@ help: ## display this help
.PHONY: run
run: build ## run application
@LOG_LEVEL=debug BLOCK_SCHEMA_INTROSPECTION=false CACHE_TTL=10 JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/ HEALTHCHECK_GRAPHQL_URL=https://hasura8.lan/v1/graphql ./graphql-proxy
@LOG_LEVEL=debug PURGE_METRICS_ON_CRAWL=true BLOCK_SCHEMA_INTROSPECTION=false CACHE_TTL=10 JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/ HEALTHCHECK_GRAPHQL_URL=https://hasura8.lan/v1/graphql ./graphql-proxy
.PHONY: build
build: ## build the binary
+8
View File
@@ -23,6 +23,7 @@ This project is in active use by [telegram-bot.app](https://telegram-bot.app), a
- [API endpoints](#api-endpoints)
- [Ban or unban the user](#ban-or-unban-the-user)
- [General](#general)
- [Metrics which matter](#metrics-which-matter)
- [Healthcheck](#healthcheck)
- [Monitoring endpoint](#monitoring-endpoint)
@@ -123,6 +124,7 @@ In this case, both proxy and websockets will be available under the `/v1/graphql
| `API_PORT` | The port to expose the monitoring API | `9090` |
| `BANNED_USERS_FILE` | The path to the file with banned users | `/go/src/app/banned_users.json` |
| `PROXIED_CLIENT_TIMEOUT` | The timeout for the proxied client in seconds | `120` |
| `PURGE_METRICS_ON_CRAWL` | Purge metrics on each /metrics crawl | `false` |
### Speed
@@ -227,6 +229,12 @@ Ban details will be stored in the `banned_users.json` file, which you can mount
### General
#### Metrics which matter
You can always enable `PURGE_METRICS_ON_CRAWL` environment variable to purge the metrics on each `/metrics` crawl. This will allow you to see only the current metrics, without potential leftovers from the previous crawls. This is useful if you want to monitor the metrics in real-time and / or limit the amount of data ingested into the monitoring system. When enabled you will most likely need to update your monitoring queries.
With the `PURGE_METRICS_ON_CRAWL` enabled, the `graphql_proxy_requests_failed`, `graphql_proxy_requests_skipped` and `graphql_proxy_requests_succesful` metrics will remain between resets.
#### Healthcheck
If you'd like the `/healthz` endpoint to perform actual check for the connectivity to the graphql endpoint - set the `HEALTHCHECK_GRAPHQL_URL` environment variable to the exact URL of the graphql endpoint. The query executed will be `query { __typename }` and if the response is not `200 OK` - the healthcheck will fail. Remember that the endpoint is a full URL which you'd like to check, so it should include the protocol, host and path - for example `http://localhost:8080/v1/graphql` and it's NOT the same as value of `HOST_GRAPHQL` environment variable which should provide only the host, without path, ending with slash.
+1
View File
@@ -55,6 +55,7 @@ func parseConfig() {
c.Server.EnableApi = envutil.GetBool("ENABLE_API", false)
c.Server.ApiPort = envutil.GetInt("API_PORT", 9090)
c.Api.BannedUsersFile = envutil.Getenv("BANNED_USERS_FILE", "/go/src/app/banned_users.json")
c.Server.PurgeOnCrawl = envutil.GetBool("PURGE_METRICS_ON_CRAWL", false)
cfg = &c
enableCache() // takes close to no resources, but can be used with dynamic query cache
loadRatelimitConfig()
+1 -1
View File
@@ -5,7 +5,7 @@ import (
)
func StartMonitoringServer() {
cfg.Monitoring = libpack_monitoring.NewMonitoring()
cfg.Monitoring = libpack_monitoring.NewMonitoring(cfg.Server.PurgeOnCrawl)
cfg.Monitoring.AddMetricsPrefix("graphql_proxy")
cfg.Monitoring.RegisterDefaultMetrics()
}
+22 -11
View File
@@ -15,18 +15,22 @@ import (
)
type MetricsSetup struct {
metrics_prefix string
metrics_set *metrics.Set
metrics_prefix string
metrics_set *metrics.Set
metrics_set_custom *metrics.Set
}
var (
log *logging.LogConfig
log *logging.LogConfig
purgeMetricsOnCrawl bool
)
func NewMonitoring() *MetricsSetup {
func NewMonitoring(purgeOnCrawl bool) *MetricsSetup {
purgeMetricsOnCrawl = purgeOnCrawl
log = logging.NewLogger()
ms := &MetricsSetup{}
ms.metrics_set = metrics.NewSet()
ms.metrics_set_custom = metrics.NewSet()
go ms.startPrometheusEndpoint()
return ms
}
@@ -45,6 +49,10 @@ func (ms *MetricsSetup) startPrometheusEndpoint() {
func (ms *MetricsSetup) metricsEndpoint(c *fiber.Ctx) error {
ms.metrics_set.WritePrometheus(c.Response().BodyWriter())
ms.metrics_set_custom.WritePrometheus(c.Response().BodyWriter())
if purgeMetricsOnCrawl {
ms.PurgeMetrics()
}
return nil
}
@@ -61,7 +69,7 @@ func (ms *MetricsSetup) RegisterMetricsGauge(metric_name string, labels map[stri
log.Critical("RegisterMetricsGauge() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateGauge(ms.get_metrics_name(metric_name, labels), func() float64 {
return ms.metrics_set_custom.GetOrCreateGauge(ms.get_metrics_name(metric_name, labels), func() float64 {
// get current value of the gauge and add val to it
return val
})
@@ -72,7 +80,10 @@ func (ms *MetricsSetup) RegisterMetricsCounter(metric_name string, labels map[st
log.Critical("RegisterMetricsCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
if metric_name == MetricsSucceeded || metric_name == MetricsFailed || metric_name == MetricsSkipped {
return ms.metrics_set.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
}
return ms.metrics_set_custom.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[string]string) *metrics.FloatCounter {
@@ -80,7 +91,7 @@ func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[stri
log.Critical("RegisterFloatCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateFloatCounter(ms.get_metrics_name(metric_name, labels))
return ms.metrics_set_custom.GetOrCreateFloatCounter(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[string]string) *metrics.Summary {
@@ -88,7 +99,7 @@ func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[st
log.Critical("RegisterMetricsSummary() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateSummary(ms.get_metrics_name(metric_name, labels))
return ms.metrics_set_custom.GetOrCreateSummary(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[string]string) *metrics.Histogram {
@@ -96,7 +107,7 @@ func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[
log.Critical("RegisterMetricsHistogram() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateHistogram(ms.get_metrics_name(metric_name, labels))
return ms.metrics_set_custom.GetOrCreateHistogram(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) Increment(metric_name string, labels map[string]string) {
@@ -124,9 +135,9 @@ func (ms *MetricsSetup) UpdateSummary(metric_name string, labels map[string]stri
}
func (ms *MetricsSetup) RemoveMetrics(metric_name string, labels map[string]string) {
ms.metrics_set.UnregisterMetric(ms.get_metrics_name(metric_name, labels))
ms.metrics_set_custom.UnregisterMetric(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) PurgeMetrics() {
ms.metrics_set.UnregisterAllMetrics()
ms.metrics_set_custom.UnregisterAllMetrics()
}
+1
View File
@@ -24,6 +24,7 @@ type config struct {
AllowURLs []string
EnableApi bool
ApiPort int
PurgeOnCrawl bool
}
Client struct {