diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..4047684 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: [ lukaszraczylo ] +custom: [ monzo.me/lukaszraczylo ] \ No newline at end of file diff --git a/README.md b/README.md index 234f2c4..005c353 100644 --- a/README.md +++ b/README.md @@ -2,23 +2,57 @@ Creates a passthrough proxy to a graphql endpoint(s), allowing you for analysis of the queries and responses, producing the prometheus metrics at a fraction of the cost - because as we know - $0 is a fair price. +This project is in active use by [telegram-bot.app](https://telegram-bot.app), and was tested with 30k queries per second on a single instance, consuming 10mb of RAM and 0.1% CPU. + +![Example of monitoring dashboard](static/monitoring-at-glance.png?raw=true) + +You can find the example of the kubernetes manifest in the [example deployment](static/kubernetes-deployment.yaml) file. + +### Why this project exists + +I wanted to monitor the queries and responses of our graphql endpoint, but we didn't want to pay the price of the graphql server itself ( and I will not point fingers and certain well-known project), as monitoring and basic security features should be a common, free functionality. + ### Endpoints /v1/graphql - the graphql endpoint /metrics - the prometheus metrics endpoint /healthz - the healthcheck endpoint +### Features + +* MONITORING: Prometheus / VictoriaMetrics metrics +* MONITORING: Extracting user id from JWT token and adding it as a label to the metrics +* MONITORING: Extracting the query name and type and adding it as a label to the metrics +* MONITORING: Calculating the query duration and adding it to the metrics +* SPEED: Caching the queries +* SECURITY: Blocking schema introspection + ### Configuration -`MONITORING_PORT` - the port to expose the metrics endpoint on (default: 9393) -`PORT_GRAPHQL` - the port to expose the graphql endpoint on (default: 8080) -`HOST_GRAPHQL` - the host to proxy the graphql endpoint to (default: `localhost/v1/graphql`) +* `MONITORING_PORT` - the port to expose the metrics endpoint on (default: 9393) +* `PORT_GRAPHQL` - the port to expose the graphql endpoint on (default: 8080) +* `HOST_GRAPHQL` - the host to proxy the graphql endpoint to (default: `http://localhost/v1/graphql`) +* `JWT_USER_CLAIM_PATH` - the path to the user claim in the JWT token (default: ``) +* `ENABLE_CACHE` - enable the cache (default: `false`) +* `CACHE_TTL` - the cache TTL (default: `60s`) +* `LOG_LEVEL` - the log level (default: `info`) +* `BLOCK_SCHEMA_INTROSPECTION` - blocks the schema introspection (default: `false`) -`JWT_USER_CLAIM_PATH` - the path to the user claim in the JWT token (default: ``) +### Monitoring endpoint -`ENABLE_CACHE` - enable the cache (default: `false`) -`CACHE_TTL` - the cache TTL (default: `60s`) +Example metrics produced by the proxy: -`LOG_LEVEL` - the log level (default: `info`) - -`BLOCK_SCHEMA_INTROSPECTION` - blocks the schema introspection (default: `false`) \ No newline at end of file +``` +graphql_proxy_timed_query_bucket{cached="false",user_id="-",op_type="mutation",op_name="updateUserDetails",vmrange="1.000e-02...1.136e-02"} 6 +graphql_proxy_timed_query_count{op_name="",cached="false",user_id="-",op_type=""} 78 +graphql_proxy_timed_query_bucket{op_name="MyQuery",cached="false",user_id="-",op_type="query",vmrange="5.995e+00...6.813e+00"} 1 +graphql_proxy_timed_query_sum{op_name="MyQuery",cached="false",user_id="-",op_type="query"} 6 +graphql_proxy_timed_query_count{op_name="MyQuery",cached="false",user_id="-",op_type="query"} 1 +graphql_proxy_executed_query{user_id="-",op_type="mutation",op_name="updateKnownSpammer",cached="false"} 1486 +graphql_proxy_executed_query{user_id="-",op_type="query",op_name="checkIfAdminsNeedRefreshing",cached="false"} 13167 +graphql_proxy_executed_query{user_id="1337",op_type="query",op_name="checkIfKnownMedia",cached="false"} 429 +graphql_proxy_executed_query{user_id="-",op_type="query",op_name="checkIfSpamAIRequiresUpdate",cached="false"} 8891 +graphql_proxy_requests_failed 324 +graphql_proxy_requests_skipped 0 +graphql_proxy_requests_succesful 454823 +``` \ No newline at end of file diff --git a/graphql.go b/graphql.go index 39776f6..e5b52fc 100644 --- a/graphql.go +++ b/graphql.go @@ -4,7 +4,6 @@ import ( fiber "github.com/gofiber/fiber/v2" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/parser" - "github.com/k0kubun/pp" libpack_monitoring "github.com/telegram-bot-app/libpack/monitoring" ) @@ -31,6 +30,9 @@ var retrospection_queries = []string{ "__directives", } +// Saving the introspection queries as a map O(1) operation instead of O(n) for a slice. +var retrospectionQuerySet = make(map[string]struct{}, len(retrospection_queries)) + func parseGraphQLQuery(c *fiber.Ctx) (operationType, operationName string, cacheRequest bool, should_block bool) { m := make(map[string]interface{}) err := json.Unmarshal(c.Body(), &m) @@ -71,15 +73,12 @@ func parseGraphQLQuery(c *fiber.Ctx) (operationType, operationName string, cache if cfg.Security.BlockIntrospection { for _, s := range oper.SelectionSet.Selections { for _, s2 := range s.GetSelectionSet().Selections { - pp.Println(s2.(*ast.Field).Name.Value) - for _, introspection_query := range retrospection_queries { - if s2.(*ast.Field).Name.Value == introspection_query { - cfg.Logger.Warning("Introspection query blocked", m) - cfg.Monitoring.Increment(libpack_monitoring.MetricsSkipped, nil) - c.Status(403).SendString("Introspection queries are not allowed") - should_block = true - return - } + if _, exists := retrospectionQuerySet[s2.(*ast.Field).Name.Value]; exists { + cfg.Logger.Warning("Introspection query blocked", m) + cfg.Monitoring.Increment(libpack_monitoring.MetricsSkipped, nil) + c.Status(403).SendString("Introspection queries are not allowed") + should_block = true + return } } } diff --git a/main.go b/main.go index a412df3..adfa842 100644 --- a/main.go +++ b/main.go @@ -9,12 +9,18 @@ import ( var cfg *config +func init() { + for _, query := range retrospection_queries { + retrospectionQuerySet[query] = struct{}{} + } +} + func parseConfig() { libpack_config.PKG_NAME = "graphql_proxy" var c config c.Server.PortGraphQL = envutil.GetInt("PORT_GRAPHQL", 8080) c.Server.PortMonitoring = envutil.GetInt("MONITORING_PORT", 9393) - c.Server.HostGraphQL = envutil.Getenv("HOST_GRAPHQL", "localhost/v1/graphql") + c.Server.HostGraphQL = envutil.Getenv("HOST_GRAPHQL", "http://localhost/v1/graphql") c.Client.JWTUserClaimPath = envutil.Getenv("JWT_USER_CLAIM_PATH", "") c.Cache.CacheEnable = envutil.GetBool("CACHE_ENABLE", false) c.Cache.CacheTTL = envutil.GetInt("CACHE_TTL", 60) diff --git a/static/kubernetes-deployment.yaml b/static/kubernetes-deployment.yaml new file mode 100644 index 0000000..26e5d88 --- /dev/null +++ b/static/kubernetes-deployment.yaml @@ -0,0 +1,92 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hasura-proxy-internal + labels: + app: hasura-proxy-internal + type: support +spec: + replicas: 2 + selector: + matchLabels: + app: hasura-proxy-internal + type: support + template: + metadata: + labels: + app: hasura-proxy-internal + type: support + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9393" + prometheus.io/path: "/metrics" + spec: + securityContext: + runAsUser: 65534 # nobody + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + containers: + - name: graphql-proxy + image: ghcr.io/lukaszraczylo/graphql-monitoring-proxy:latest + imagePullPolicy: Always + resources: + limits: + cpu: "1" + memory: "640Mi" + requests: + cpu: "0.75" + memory: "512Mi" + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + ports: + - name: web + containerPort: 8080 + - name: monitoring + containerPort: 9393 + env: + - name: PORT_GRAPHQL + value: "8080" + - name: MONITORING_PORT + value: "9393" + - name: HOST_GRAPHQL + value: http://hasura-internal:8080/v1/graphql + - name: ENABLE_CACHE + value: "true" + - name: CACHE_TTL + value: "10" + - name: BLOCK_SCHEMA_INTROSPECTION + value: "true" + +--- +apiVersion: v1 +kind: Service +metadata: + name: hasura-proxy-internal + labels: + app: hasura-proxy-internal + type: support + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9393" + prometheus.io/path: "/metrics" +spec: + ports: + - name: web + port: 8080 + targetPort: 8080 + - name: monitoring + port: 9393 + targetPort: 9393 + selector: + app: hasura-proxy-internal + type: support + type: ClusterIP \ No newline at end of file diff --git a/static/monitoring-at-glance.png b/static/monitoring-at-glance.png new file mode 100644 index 0000000..ef06b69 Binary files /dev/null and b/static/monitoring-at-glance.png differ