mirror of
https://github.com/lukaszraczylo/graphql-monitoring-proxy.git
synced 2026-06-11 00:09:37 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
0758cd5b52
|
|||
|
51dfc8d9be
|
|||
|
2f87f40822
|
|||
|
377a1a4a26
|
|||
|
7de1cf7cc7
|
|||
| 917ee1a431 |
@@ -11,7 +11,7 @@ help: ## display this help
|
||||
|
||||
.PHONY: run
|
||||
run: build ## run application
|
||||
@LOG_LEVEL=warn BLOCK_SCHEMA_INTROSPECTION=false JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/v1/graphql ./graphql-proxy
|
||||
@LOG_LEVEL=debug BLOCK_SCHEMA_INTROSPECTION=false JWT_ROLE_RATE_LIMIT=false JWT_ROLE_CLAIM_PATH="Hasura.x-hasura-default-role" JWT_USER_CLAIM_PATH="Hasura.x-hasura-user-id" HOST_GRAPHQL=https://hasura8.lan/v1/graphql ./graphql-proxy
|
||||
|
||||
.PHONY: build
|
||||
build: ## build the binary
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
## graphql monitoring proxy
|
||||
|
||||
Creates a passthrough proxy to a graphql endpoint(s), allowing you for analysis of the queries and responses, producing the prometheus metrics at a fraction of the cost - because as we know - $0 is a fair price.
|
||||
Creates a passthrough proxy to a graphql endpoint(s), allowing you to analyse the queries and responses, producing the Prometheus metrics at a fraction of the cost - because, as we know - $0 is a fair price.
|
||||
|
||||
This project is in active use by [telegram-bot.app](https://telegram-bot.app), and was tested with 30k queries per second on a single instance, consuming 10mb of RAM and 0.1% CPU.
|
||||
This project is in active use by [telegram-bot.app](https://telegram-bot.app), and was tested with 30k queries per second on a single instance, consuming 10 MB of RAM and 0.1% CPU.
|
||||
|
||||

|
||||
|
||||
You can find the example of the kubernetes manifest in the [example deployment](static/kubernetes-deployment.yaml) file.
|
||||
You can find the example of the Kubernetes manifest in the [example deployment](static/kubernetes-deployment.yaml) file.
|
||||
|
||||
### Why this project exists
|
||||
|
||||
I wanted to monitor the queries and responses of our graphql endpoint, but we didn't want to pay the price of the graphql server itself ( and I will not point fingers and certain well-known project), as monitoring and basic security features should be a common, free functionality.
|
||||
I wanted to monitor the queries and responses of our graphql endpoint. Still, we didn't want to pay the price of the graphql server itself ( and I will not point fingers at a particular well-known project), as monitoring and basic security features should be a standard, free functionality.
|
||||
|
||||
### Endpoints
|
||||
|
||||
@@ -20,44 +20,58 @@ I wanted to monitor the queries and responses of our graphql endpoint, but we di
|
||||
|
||||
### Features
|
||||
|
||||
* MONITORING: Prometheus / VictoriaMetrics metrics
|
||||
* MONITORING: Extracting user id from JWT token and adding it as a label to the metrics
|
||||
* MONITORING: Extracting the query name and type and adding it as a label to the metrics
|
||||
* MONITORING: Calculating the query duration and adding it to the metrics
|
||||
* SPEED: Caching the queries
|
||||
* SECURITY: Blocking schema introspection
|
||||
* SECURITY: Rate limiting queries based on user role
|
||||
| Category | Detail |
|
||||
|------------|-----------------------------------------------------------------------|
|
||||
| monitor | Prometheus / VictoriaMetrics metrics |
|
||||
| monitor | Extracting user id from JWT token and adding it as a label to metrics |
|
||||
| monitor | Extracting the query name and type and adding it as a label to metrics|
|
||||
| monitor | Calculating the query duration and adding it to the metrics |
|
||||
| speed | Caching the queries, together with per-query cache and TTL |
|
||||
| security | Blocking schema introspection |
|
||||
| security | Rate limiting queries based on user role |
|
||||
| security | Blocking mutations in read-only mode |
|
||||
|
||||
|
||||
### Configuration
|
||||
|
||||
* `MONITORING_PORT` - the port to expose the metrics endpoint on (default: 9393)
|
||||
* `PORT_GRAPHQL` - the port to expose the graphql endpoint on (default: 8080)
|
||||
* `HOST_GRAPHQL` - the host to proxy the graphql endpoint to (default: `http://localhost/v1/graphql`)
|
||||
* `JWT_USER_CLAIM_PATH` - the path to the user claim in the JWT token (default: ``)
|
||||
* `JWT_ROLE_CLAIM_PATH` - the path to the role claim in the JWT token (default: ``)
|
||||
* `JWT_ROLE_RATE_LIMITING` - enable request rate limiting based on the role (default: `false`)
|
||||
* `ENABLE_GLOBAL_CACHE` - enable the cache (default: `false`)
|
||||
* `CACHE_TTL` - the cache TTL (default: `60s`)
|
||||
* `LOG_LEVEL` - the log level (default: `info`)
|
||||
* `BLOCK_SCHEMA_INTROSPECTION` - blocks the schema introspection (default: `false`)
|
||||
* `ENABLE_ACCESS_LOG` - enable the access log (default: `false`)
|
||||
| Parameter | Description | Default Value |
|
||||
|---------------------------|------------------------------------------|----------------------------|
|
||||
| `MONITORING_PORT` | The port to expose the metrics endpoint | `9393` |
|
||||
| `PORT_GRAPHQL` | The port to expose the graphql endpoint | `8080` |
|
||||
| `HOST_GRAPHQL` | The host to proxy the graphql endpoint | `http://localhost/v1/graphql` |
|
||||
| `JWT_USER_CLAIM_PATH` | Path to the user claim in the JWT token | `` |
|
||||
| `JWT_ROLE_CLAIM_PATH` | Path to the role claim in the JWT token | `` |
|
||||
| `JWT_ROLE_FROM_HEADER` | Header name to extract the role from | `` |
|
||||
| `ROLE_RATE_LIMIT` | Enable request rate limiting based on role| `false` |
|
||||
| `ENABLE_GLOBAL_CACHE` | Enable the cache | `false` |
|
||||
| `CACHE_TTL` | The cache TTL | `60` |
|
||||
| `LOG_LEVEL` | The log level | `info` |
|
||||
| `BLOCK_SCHEMA_INTROSPECTION`| Blocks the schema introspection | `false` |
|
||||
| `ENABLE_ACCESS_LOG` | Enable the access log | `false` |
|
||||
| `READ_ONLY_MODE` | Enable the read only mode | `false` |
|
||||
|
||||
|
||||
### Caching
|
||||
|
||||
Cache engine is enabled in background as it does not use any additional resources.
|
||||
You can then start using the cache by setting the `ENABLE_GLOBAL_CACHE` environment variable to `true` - which will enable the cache for all queries, without introspection of the query. You can leave the global cache disabled and enable the cache for specific queries by adding the `@cache` directive to the query.
|
||||
The cache engine is enabled in the background by default, using no additional resources.
|
||||
You can then start using the cache by setting the `ENABLE_GLOBAL_CACHE` environment variable to `true` - which will enable the cache for all queries without introspection. You can leave the global cache disabled and enable the cache for specific queries by adding the `@cached` directive to the query.
|
||||
|
||||
### Role based rate limiting
|
||||
In the case of the `@cached` you can add additional parameters to the directive which will set the cache for specific queries to the provided time.
|
||||
For example, `query MyCachedQuery @cached(ttl: 90) ....` will set the cache for the query to 90 seconds.
|
||||
|
||||
You are able to rate limit requests using the `JWT_ROLE_RATE_LIMITING` environment variable. If enabled, the proxy will rate limit the requests based on the role claim in the JWT token. You can then provide the json file in following format to specify the limits.
|
||||
Default interval is `second`, but you can use other values as well. If you want to disable the rate limiting for specific role, you can set the `req` to `0`.
|
||||
### Role-based rate limiting
|
||||
|
||||
You can rate limit requests using the `ROLE_RATE_LIMIT` environment variable. If enabled, the proxy will rate limit the requests based on the role claim in the JWT token. You can then provide the JSON file in the following format to specify the limits.
|
||||
The default interval is `second`, but you can use other values as well. If you want to disable the rate limiting for a specific role, you can set the `req` to `0`.
|
||||
|
||||
Available values:
|
||||
`nano`, `micro`, `milli`, `second`, `minute`, `hour`, `day`
|
||||
|
||||
To define path in JWT token where current user role is present use the `JWT_ROLE_CLAIM_PATH` environment variable.
|
||||
To define path in JWT token where the current user role is present, use the `JWT_ROLE_CLAIM_PATH` environment variable.
|
||||
|
||||
*Default / sample configuration:*
|
||||
You can also set up the `ROLE_FROM_HEADER` environment variable to extract the role from the header instead of the JWT token. This is useful if you want to rate limit the requests for unauthenticated users. It's worth mentioning that `ROLE_FROM_HEADER` takes a priority over the `JWT_ROLE_CLAIM_PATH` environment variable and if its set, the proxy will not try to extract the role from the JWT token.
|
||||
|
||||
*Default/sample configuration:*
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -82,6 +96,11 @@ If you'd like to change it - mount your configmap as `/app/ratelimit.json` file.
|
||||
Remember to include the `-` role, which is used for unauthenticated users or when claim can't be found for any reason.
|
||||
If rate limit has been reached - the proxy will return `429 Too Many Requests` error.
|
||||
|
||||
|
||||
### Read-only mode
|
||||
|
||||
You can enable the read-only mode by setting the `READ_ONLY_MODE` environment variable to `true` - which will block all the `mutation` queries.
|
||||
|
||||
### Monitoring endpoint
|
||||
|
||||
Example metrics produced by the proxy:
|
||||
|
||||
@@ -42,7 +42,7 @@ require (
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/wI2L/jsondiff v0.4.0 // indirect
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
golang.org/x/net v0.16.0 // indirect
|
||||
golang.org/x/net v0.17.0 // indirect
|
||||
golang.org/x/sync v0.4.0 // indirect
|
||||
golang.org/x/sys v0.13.0 // indirect
|
||||
golang.org/x/term v0.13.0 // indirect
|
||||
|
||||
@@ -90,8 +90,8 @@ github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavM
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||
golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos=
|
||||
golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||
golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
|
||||
golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
|
||||
+22
-1
@@ -1,6 +1,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
fiber "github.com/gofiber/fiber/v2"
|
||||
"github.com/graphql-go/graphql/language/ast"
|
||||
"github.com/graphql-go/graphql/language/parser"
|
||||
@@ -33,7 +36,7 @@ var retrospection_queries = []string{
|
||||
// Saving the introspection queries as a map O(1) operation instead of O(n) for a slice.
|
||||
var retrospectionQuerySet = make(map[string]struct{}, len(retrospection_queries))
|
||||
|
||||
func parseGraphQLQuery(c *fiber.Ctx) (operationType, operationName string, cacheRequest bool, should_block bool) {
|
||||
func parseGraphQLQuery(c *fiber.Ctx) (operationType, operationName string, cacheRequest bool, cache_time int, should_block bool) {
|
||||
m := make(map[string]interface{})
|
||||
err := json.Unmarshal(c.Body(), &m)
|
||||
if err != nil {
|
||||
@@ -60,6 +63,14 @@ func parseGraphQLQuery(c *fiber.Ctx) (operationType, operationName string, cache
|
||||
for _, d := range p.Definitions {
|
||||
if oper, ok := d.(*ast.OperationDefinition); ok {
|
||||
operationType = oper.Operation
|
||||
if strings.ToLower(operationType) == "mutation" && cfg.Server.ReadOnlyMode {
|
||||
cfg.Logger.Warning("Mutation blocked", m)
|
||||
cfg.Monitoring.Increment(libpack_monitoring.MetricsSkipped, nil)
|
||||
c.Status(403).SendString("The server is in read-only mode")
|
||||
should_block = true
|
||||
return
|
||||
}
|
||||
|
||||
if oper.Name != nil {
|
||||
operationName = oper.Name.Value
|
||||
} else {
|
||||
@@ -68,6 +79,16 @@ func parseGraphQLQuery(c *fiber.Ctx) (operationType, operationName string, cache
|
||||
for _, dir := range oper.Directives {
|
||||
if dir.Name.Value == "cached" {
|
||||
cacheRequest = true
|
||||
for _, arg := range dir.Arguments {
|
||||
if arg.Name.Value == "ttl" {
|
||||
cache_time, err = strconv.Atoi(arg.Value.GetValue().(string))
|
||||
if err != nil {
|
||||
cfg.Logger.Error("Can't parse the ttl", map[string]interface{}{"ttl": arg.Value.GetValue().(string)})
|
||||
cfg.Monitoring.Increment(libpack_monitoring.MetricsFailed, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if cfg.Security.BlockIntrospection {
|
||||
|
||||
@@ -23,7 +23,8 @@ func parseConfig() {
|
||||
c.Server.HostGraphQL = envutil.Getenv("HOST_GRAPHQL", "http://localhost/v1/graphql")
|
||||
c.Client.JWTUserClaimPath = envutil.Getenv("JWT_USER_CLAIM_PATH", "")
|
||||
c.Client.JWTRoleClaimPath = envutil.Getenv("JWT_ROLE_CLAIM_PATH", "")
|
||||
c.Client.JWTRoleRateLimit = envutil.GetBool("JWT_ROLE_RATE_LIMIT", false)
|
||||
c.Client.RoleFromHeader = envutil.Getenv("ROLE_FROM_HEADER", "")
|
||||
c.Client.RoleRateLimit = envutil.GetBool("ROLE_RATE_LIMIT", false)
|
||||
c.Cache.CacheEnable = envutil.GetBool("ENABLE_GLOBAL_CACHE", false)
|
||||
c.Cache.CacheTTL = envutil.GetInt("CACHE_TTL", 60)
|
||||
c.Security.BlockIntrospection = envutil.GetBool("BLOCK_SCHEMA_INTROSPECTION", false)
|
||||
@@ -31,6 +32,7 @@ func parseConfig() {
|
||||
c.Client.GQLClient = graphql.NewConnection()
|
||||
c.Client.GQLClient.SetEndpoint(c.Server.HostGraphQL)
|
||||
c.Server.AccessLog = envutil.GetBool("ENABLE_ACCESS_LOG", false)
|
||||
c.Server.ReadOnlyMode = envutil.GetBool("READ_ONLY_MODE", false)
|
||||
cfg = &c
|
||||
enableCache() // takes close to no resources, but can be used with dynamic query cache
|
||||
loadRatelimitConfig()
|
||||
|
||||
@@ -54,8 +54,15 @@ func processGraphQLRequest(c *fiber.Ctx) error {
|
||||
extractedUserID, extractedRoleName = extractClaimsFromJWTHeader(string(authorization))
|
||||
}
|
||||
|
||||
if len(cfg.Client.RoleFromHeader) > 0 {
|
||||
extractedRoleName = string(c.Request().Header.Peek(cfg.Client.RoleFromHeader))
|
||||
if extractedRoleName == "" {
|
||||
extractedRoleName = "-"
|
||||
}
|
||||
}
|
||||
|
||||
// Implementing rate limiting if enabled
|
||||
if cfg.Client.JWTRoleRateLimit {
|
||||
if cfg.Client.RoleRateLimit {
|
||||
cfg.Logger.Debug("Rate limiting enabled", map[string]interface{}{"user_id": extractedUserID, "role_name": extractedRoleName})
|
||||
if !rateLimitedRequest(extractedUserID, extractedRoleName) {
|
||||
c.Status(429).SendString("Rate limit exceeded, try again later")
|
||||
@@ -63,11 +70,16 @@ func processGraphQLRequest(c *fiber.Ctx) error {
|
||||
}
|
||||
}
|
||||
|
||||
opType, opName, cacheFromQuery, shouldBlock := parseGraphQLQuery(c)
|
||||
opType, opName, cacheFromQuery, cache_time, shouldBlock := parseGraphQLQuery(c)
|
||||
if shouldBlock {
|
||||
return nil
|
||||
}
|
||||
|
||||
if cache_time > 0 {
|
||||
cfg.Logger.Debug("Cache time set via query", map[string]interface{}{"cache_time": cache_time})
|
||||
cache_time = cfg.Cache.CacheTTL
|
||||
}
|
||||
|
||||
wasCached := false
|
||||
|
||||
// Handling Cache Logic
|
||||
@@ -81,7 +93,7 @@ func processGraphQLRequest(c *fiber.Ctx) error {
|
||||
wasCached = true
|
||||
} else {
|
||||
cfg.Logger.Debug("Cache miss", map[string]interface{}{"hash": queryCacheHash, "user_id": extractedUserID})
|
||||
proxyAndCacheTheRequest(c, queryCacheHash)
|
||||
proxyAndCacheTheRequest(c, queryCacheHash, cache_time)
|
||||
}
|
||||
} else {
|
||||
proxyTheRequest(c)
|
||||
@@ -96,9 +108,9 @@ func processGraphQLRequest(c *fiber.Ctx) error {
|
||||
}
|
||||
|
||||
// Additional helper function to avoid code repetition
|
||||
func proxyAndCacheTheRequest(c *fiber.Ctx, queryCacheHash string) {
|
||||
func proxyAndCacheTheRequest(c *fiber.Ctx, queryCacheHash string, cache_time int) {
|
||||
proxyTheRequest(c)
|
||||
cfg.Cache.CacheClient.Set(queryCacheHash, c.Response().Body(), time.Duration(cfg.Cache.CacheTTL)*time.Second)
|
||||
cfg.Cache.CacheClient.Set(queryCacheHash, c.Response().Body(), time.Duration(cache_time)*time.Second)
|
||||
c.Send(c.Response().Body())
|
||||
}
|
||||
|
||||
|
||||
+3
-1
@@ -18,12 +18,14 @@ type config struct {
|
||||
PortMonitoring int
|
||||
HostGraphQL string
|
||||
AccessLog bool
|
||||
ReadOnlyMode bool
|
||||
}
|
||||
|
||||
Client struct {
|
||||
JWTUserClaimPath string
|
||||
JWTRoleClaimPath string
|
||||
JWTRoleRateLimit bool
|
||||
RoleRateLimit bool
|
||||
RoleFromHeader string
|
||||
GQLClient *graphql.BaseClient
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user