Compare commits

...

5 Commits

7 changed files with 58 additions and 13 deletions
+1 -1
View File
@@ -11,7 +11,7 @@ help: ## display this help
.PHONY: run
run: build ## run application
@LOG_LEVEL=debug BLOCK_SCHEMA_INTROSPECTION=false CACHE_TTL=10 JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/ HEALTHCHECK_GRAPHQL_URL=https://hasura8.lan/v1/graphql ./graphql-proxy
@LOG_LEVEL=debug PURGE_METRICS_ON_CRAWL=true BLOCK_SCHEMA_INTROSPECTION=false CACHE_TTL=10 JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/ HEALTHCHECK_GRAPHQL_URL=https://hasura8.lan/v1/graphql ./graphql-proxy
.PHONY: build
build: ## build the binary
+11
View File
@@ -23,6 +23,7 @@ This project is in active use by [telegram-bot.app](https://telegram-bot.app), a
- [API endpoints](#api-endpoints)
- [Ban or unban the user](#ban-or-unban-the-user)
- [General](#general)
- [Metrics which matter](#metrics-which-matter)
- [Healthcheck](#healthcheck)
- [Monitoring endpoint](#monitoring-endpoint)
@@ -123,6 +124,8 @@ In this case, both proxy and websockets will be available under the `/v1/graphql
| `API_PORT` | The port to expose the monitoring API | `9090` |
| `BANNED_USERS_FILE` | The path to the file with banned users | `/go/src/app/banned_users.json` |
| `PROXIED_CLIENT_TIMEOUT` | The timeout for the proxied client in seconds | `120` |
| `PURGE_METRICS_ON_CRAWL` | Purge metrics on each /metrics crawl | `false` |
| `PURGE_METRICS_ON_TIMER` | Purge metrics every x seconds. `0` - disabled | `0` |
### Speed
@@ -227,6 +230,14 @@ Ban details will be stored in the `banned_users.json` file, which you can mount
### General
#### Metrics which matter
You can always enable `PURGE_METRICS_ON_CRAWL` environment variable to purge the metrics on each `/metrics` crawl. This will allow you to see only the current metrics, without potential leftovers from the previous crawls. This is useful if you want to monitor the metrics in real-time and / or limit the amount of data ingested into the monitoring system. When enabled you will most likely need to update your monitoring queries.
With the `PURGE_METRICS_ON_CRAWL` enabled, the `graphql_proxy_requests_failed`, `graphql_proxy_requests_skipped` and `graphql_proxy_requests_succesful` metrics will remain between resets.
If you prefer more control over the metrics purging - you can enable `PURGE_METRICS_ON_TIMER` environment variable and set the interval in seconds. This will allow you to purge the metrics on a regular basis, for example every 90 seconds. It could be better solution if you have multiple crawlers checking the metrics endpoints and you want to avoid the situation when metrics are purged by for example healthcheck.
#### Healthcheck
If you'd like the `/healthz` endpoint to perform actual check for the connectivity to the graphql endpoint - set the `HEALTHCHECK_GRAPHQL_URL` environment variable to the exact URL of the graphql endpoint. The query executed will be `query { __typename }` and if the response is not `200 OK` - the healthcheck will fail. Remember that the endpoint is a full URL which you'd like to check, so it should include the protocol, host and path - for example `http://localhost:8080/v1/graphql` and it's NOT the same as value of `HOST_GRAPHQL` environment variable which should provide only the host, without path, ending with slash.
+2
View File
@@ -55,6 +55,8 @@ func parseConfig() {
c.Server.EnableApi = envutil.GetBool("ENABLE_API", false)
c.Server.ApiPort = envutil.GetInt("API_PORT", 9090)
c.Api.BannedUsersFile = envutil.Getenv("BANNED_USERS_FILE", "/go/src/app/banned_users.json")
c.Server.PurgeOnCrawl = envutil.GetBool("PURGE_METRICS_ON_CRAWL", false)
c.Server.PurgeEvery = envutil.GetInt("PURGE_METRICS_ON_TIMER", 0)
cfg = &c
enableCache() // takes close to no resources, but can be used with dynamic query cache
loadRatelimitConfig()
+1 -1
View File
@@ -5,7 +5,7 @@ import (
)
func StartMonitoringServer() {
cfg.Monitoring = libpack_monitoring.NewMonitoring()
cfg.Monitoring = libpack_monitoring.NewMonitoring(cfg.Server.PurgeOnCrawl, cfg.Server.PurgeEvery)
cfg.Monitoring.AddMetricsPrefix("graphql_proxy")
cfg.Monitoring.RegisterDefaultMetrics()
}
+6
View File
@@ -2,16 +2,22 @@ package libpack_monitoring
import (
"fmt"
"os"
"strings"
libpack_config "github.com/lukaszraczylo/graphql-monitoring-proxy/config"
)
func (ms *MetricsSetup) get_metrics_name(name string, labels map[string]string) (complete_name string) {
var err error
if labels == nil {
labels = make(map[string]string)
}
labels["microservice"] = libpack_config.PKG_NAME
labels["pod"], err = os.Hostname()
if err != nil {
labels["pod"] = "unknown"
}
if ms.metrics_prefix != "" {
complete_name = ms.metrics_prefix + "_" + name
+35 -11
View File
@@ -15,19 +15,35 @@ import (
)
type MetricsSetup struct {
metrics_prefix string
metrics_set *metrics.Set
metrics_prefix string
metrics_set *metrics.Set
metrics_set_custom *metrics.Set
}
var (
log *logging.LogConfig
log *logging.LogConfig
purgeMetricsOnCrawl bool
purgeMetricsEvery int
)
func NewMonitoring() *MetricsSetup {
func NewMonitoring(purgeOnCrawl bool, purgeEvery int) *MetricsSetup {
purgeMetricsOnCrawl = purgeOnCrawl
purgeMetricsEvery = purgeEvery
log = logging.NewLogger()
ms := &MetricsSetup{}
ms.metrics_set = metrics.NewSet()
ms.metrics_set_custom = metrics.NewSet()
go ms.startPrometheusEndpoint()
if purgeEvery > 0 {
ticker := time.NewTicker(time.Duration(purgeEvery) * time.Second)
go func() {
for range ticker.C {
ms.PurgeMetrics()
}
}()
}
return ms
}
@@ -45,6 +61,11 @@ func (ms *MetricsSetup) startPrometheusEndpoint() {
func (ms *MetricsSetup) metricsEndpoint(c *fiber.Ctx) error {
ms.metrics_set.WritePrometheus(c.Response().BodyWriter())
ms.metrics_set_custom.WritePrometheus(c.Response().BodyWriter())
if purgeMetricsOnCrawl && purgeMetricsEvery == 0 {
ms.PurgeMetrics()
}
return nil
}
@@ -61,7 +82,7 @@ func (ms *MetricsSetup) RegisterMetricsGauge(metric_name string, labels map[stri
log.Critical("RegisterMetricsGauge() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateGauge(ms.get_metrics_name(metric_name, labels), func() float64 {
return ms.metrics_set_custom.GetOrCreateGauge(ms.get_metrics_name(metric_name, labels), func() float64 {
// get current value of the gauge and add val to it
return val
})
@@ -72,7 +93,10 @@ func (ms *MetricsSetup) RegisterMetricsCounter(metric_name string, labels map[st
log.Critical("RegisterMetricsCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
if metric_name == MetricsSucceeded || metric_name == MetricsFailed || metric_name == MetricsSkipped {
return ms.metrics_set.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
}
return ms.metrics_set_custom.GetOrCreateCounter(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[string]string) *metrics.FloatCounter {
@@ -80,7 +104,7 @@ func (ms *MetricsSetup) RegisterFloatCounter(metric_name string, labels map[stri
log.Critical("RegisterFloatCounter() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateFloatCounter(ms.get_metrics_name(metric_name, labels))
return ms.metrics_set_custom.GetOrCreateFloatCounter(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[string]string) *metrics.Summary {
@@ -88,7 +112,7 @@ func (ms *MetricsSetup) RegisterMetricsSummary(metric_name string, labels map[st
log.Critical("RegisterMetricsSummary() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateSummary(ms.get_metrics_name(metric_name, labels))
return ms.metrics_set_custom.GetOrCreateSummary(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[string]string) *metrics.Histogram {
@@ -96,7 +120,7 @@ func (ms *MetricsSetup) RegisterMetricsHistogram(metric_name string, labels map[
log.Critical("RegisterMetricsHistogram() error", map[string]interface{}{"_error": "Invalid metric name", "_metric_name": metric_name})
return nil
}
return ms.metrics_set.GetOrCreateHistogram(ms.get_metrics_name(metric_name, labels))
return ms.metrics_set_custom.GetOrCreateHistogram(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) Increment(metric_name string, labels map[string]string) {
@@ -124,9 +148,9 @@ func (ms *MetricsSetup) UpdateSummary(metric_name string, labels map[string]stri
}
func (ms *MetricsSetup) RemoveMetrics(metric_name string, labels map[string]string) {
ms.metrics_set.UnregisterMetric(ms.get_metrics_name(metric_name, labels))
ms.metrics_set_custom.UnregisterMetric(ms.get_metrics_name(metric_name, labels))
}
func (ms *MetricsSetup) PurgeMetrics() {
ms.metrics_set.UnregisterAllMetrics()
ms.metrics_set_custom.UnregisterAllMetrics()
}
+2
View File
@@ -24,6 +24,8 @@ type config struct {
AllowURLs []string
EnableApi bool
ApiPort int
PurgeOnCrawl bool
PurgeEvery int
}
Client struct {