improvements mid may 2025 (#24)

* General improvements and bug fixes.

* Improve tests coverage.

* fixup! Improve tests coverage.

* Update README.md with latest changes.

* Fix the uint32

* Resolve issue with race condition for logging.

* fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* Fix the test of the rate limiter

* Add default ratelimit.json file

* Update dependencies.

* Significant refactor.

* fixup! Significant refactor.

* fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! fixup! fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025

* fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! Merge remote-tracking branch 'origin/main' into improvements-mid-apr-2025
This commit is contained in:
2025-09-30 18:27:33 +01:00
committed by GitHub
parent 3bd96cbd8a
commit cedee416a8
80 changed files with 22799 additions and 647 deletions
+276 -29
View File
@@ -1,6 +1,8 @@
package main
import (
"context"
"crypto/subtle"
"fmt"
"os"
"sync"
@@ -12,6 +14,7 @@ import (
libpack_cache "github.com/lukaszraczylo/graphql-monitoring-proxy/cache"
libpack_config "github.com/lukaszraczylo/graphql-monitoring-proxy/config"
libpack_logger "github.com/lukaszraczylo/graphql-monitoring-proxy/logging"
"github.com/sony/gobreaker"
)
var (
@@ -19,9 +22,43 @@ var (
bannedUsersIDsMutex sync.RWMutex
)
func enableApi() {
// authMiddleware provides API key authentication for admin endpoints
func authMiddleware(c *fiber.Ctx) error {
apiKey := c.Get("X-API-Key")
// Get expected key from config (try GMP_ prefix first, then fallback)
expectedKey := os.Getenv("GMP_ADMIN_API_KEY")
if expectedKey == "" {
expectedKey = os.Getenv("ADMIN_API_KEY")
}
// If no API key is configured, authentication is optional (internal service pattern)
// Admin endpoints are typically protected by network segmentation
if expectedKey == "" {
cfg.Logger.Debug(&libpack_logger.LogMessage{
Message: "Admin API authentication disabled - endpoints protected by network segmentation",
Pairs: map[string]interface{}{"endpoint": c.Path()},
})
return c.Next()
}
// Use constant-time comparison to prevent timing attacks
if subtle.ConstantTimeCompare([]byte(apiKey), []byte(expectedKey)) != 1 {
cfg.Logger.Warning(&libpack_logger.LogMessage{
Message: "Unauthorized API access attempt",
Pairs: map[string]interface{}{"endpoint": c.Path(), "ip": c.IP()},
})
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{
"error": "Unauthorized",
})
}
return c.Next()
}
func enableApi(ctx context.Context) error {
if !cfg.Server.EnableApi {
return
return nil
}
apiserver := fiber.New(fiber.Config{
@@ -30,31 +67,57 @@ func enableApi() {
})
api := apiserver.Group("/api")
// Apply authentication middleware to all admin routes
api.Use(authMiddleware)
api.Post("/user-ban", apiBanUser)
api.Post("/user-unban", apiUnbanUser)
api.Post("/cache-clear", apiClearCache)
api.Get("/cache-stats", apiCacheStats)
api.Get("/circuit-breaker/health", apiCircuitBreakerHealth)
api.Get("/backend/health", apiBackendHealth)
api.Get("/connection-pool/health", apiConnectionPoolHealth)
go periodicallyReloadBannedUsers()
// Start banned users reload in a separate goroutine with context
go periodicallyReloadBannedUsers(ctx)
if err := apiserver.Listen(fmt.Sprintf(":%d", cfg.Server.ApiPort)); err != nil {
cfg.Logger.Critical(&libpack_logger.LogMessage{
Message: "Can't start the service",
Pairs: map[string]interface{}{"port": cfg.Server.ApiPort},
// Start server in a goroutine and handle shutdown
errCh := make(chan error, 1)
go func() {
if err := apiserver.Listen(fmt.Sprintf(":%d", cfg.Server.ApiPort)); err != nil {
errCh <- err
}
}()
// Wait for context cancellation or error
select {
case <-ctx.Done():
cfg.Logger.Info(&libpack_logger.LogMessage{
Message: "Shutting down API server",
})
return apiserver.Shutdown()
case err := <-errCh:
return err
}
}
func periodicallyReloadBannedUsers() {
func periodicallyReloadBannedUsers(ctx context.Context) {
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for range ticker.C {
loadBannedUsers()
cfg.Logger.Debug(&libpack_logger.LogMessage{
Message: "Banned users reloaded",
Pairs: map[string]interface{}{"users": bannedUsersIDs},
})
for {
select {
case <-ctx.Done():
cfg.Logger.Info(&libpack_logger.LogMessage{
Message: "Stopping banned users reload",
})
return
case <-ticker.C:
loadBannedUsers()
cfg.Logger.Debug(&libpack_logger.LogMessage{
Message: "Banned users reloaded",
Pairs: map[string]interface{}{"users": bannedUsersIDs},
})
}
}
}
@@ -73,7 +136,12 @@ func checkIfUserIsBanned(c *fiber.Ctx, userID string) bool {
Message: "User is banned",
Pairs: map[string]interface{}{"user_id": userID},
})
c.Status(fiber.StatusForbidden).SendString("User is banned")
if err := c.Status(fiber.StatusForbidden).SendString("User is banned"); err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Failed to send banned user response",
Pairs: map[string]interface{}{"error": err.Error()},
})
}
}
return found
}
@@ -93,6 +161,58 @@ func apiCacheStats(c *fiber.Ctx) error {
return c.JSON(libpack_cache.GetCacheStats())
}
// apiCircuitBreakerHealth returns the health status of the circuit breaker
func apiCircuitBreakerHealth(c *fiber.Ctx) error {
if cb == nil {
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
"status": "disabled",
"message": "Circuit breaker is not enabled",
})
}
// Get circuit breaker state
state := cb.State()
counts := cb.Counts()
// Determine health status
var status string
var httpStatus int
switch state {
case gobreaker.StateClosed:
status = "healthy"
httpStatus = fiber.StatusOK
case gobreaker.StateHalfOpen:
status = "recovering"
httpStatus = fiber.StatusOK
case gobreaker.StateOpen:
status = "unhealthy"
httpStatus = fiber.StatusServiceUnavailable
}
response := fiber.Map{
"status": status,
"state": state.String(),
"counts": fiber.Map{
"requests": counts.Requests,
"total_successes": counts.TotalSuccesses,
"total_failures": counts.TotalFailures,
"consecutive_successes": counts.ConsecutiveSuccesses,
"consecutive_failures": counts.ConsecutiveFailures,
},
"configuration": fiber.Map{
"max_failures": cfg.CircuitBreaker.MaxFailures,
"failure_ratio": cfg.CircuitBreaker.FailureRatio,
"sample_size": cfg.CircuitBreaker.SampleSize,
"timeout_seconds": cfg.CircuitBreaker.Timeout,
"max_half_open_reqs": cfg.CircuitBreaker.MaxRequestsInHalfOpen,
"backoff_multiplier": cfg.CircuitBreaker.BackoffMultiplier,
},
}
return c.Status(httpStatus).JSON(response)
}
type apiBanUserRequest struct {
UserID string `json:"user_id"`
Reason string `json:"reason"`
@@ -163,7 +283,14 @@ func storeBannedUsers() error {
if err := lockFile(fileLock); err != nil {
return err
}
defer fileLock.Unlock()
defer func() {
if err := fileLock.Unlock(); err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Failed to unlock file",
Pairs: map[string]interface{}{"error": err.Error()},
})
}
}()
bannedUsersIDsMutex.RLock()
data, err := json.Marshal(bannedUsersIDs)
@@ -177,7 +304,7 @@ func storeBannedUsers() error {
return err
}
if err := os.WriteFile(cfg.Api.BannedUsersFile, data, 0644); err != nil {
if err := os.WriteFile(cfg.Api.BannedUsersFile, data, 0o644); err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Can't write banned users to file",
Pairs: map[string]interface{}{"error": err.Error()},
@@ -194,7 +321,7 @@ func loadBannedUsers() {
Message: "Banned users file doesn't exist - creating it",
Pairs: map[string]interface{}{"file": cfg.Api.BannedUsersFile},
})
if err := os.WriteFile(cfg.Api.BannedUsersFile, []byte("{}"), 0644); err != nil {
if err := os.WriteFile(cfg.Api.BannedUsersFile, []byte("{}"), 0o644); err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Can't create and write to the file",
Pairs: map[string]interface{}{"error": err.Error()},
@@ -211,7 +338,14 @@ func loadBannedUsers() {
})
return
}
defer fileLock.Unlock()
defer func() {
if err := fileLock.Unlock(); err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Failed to unlock file",
Pairs: map[string]interface{}{"error": err.Error()},
})
}
}()
data, err := os.ReadFile(cfg.Api.BannedUsersFile)
if err != nil {
@@ -237,23 +371,136 @@ func loadBannedUsers() {
}
func lockFile(fileLock *flock.Flock) error {
if err := fileLock.Lock(); err != nil {
// Add timeout to prevent indefinite blocking
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Try to acquire lock with timeout
lockChan := make(chan error, 1)
go func() {
lockChan <- fileLock.Lock()
}()
select {
case err := <-lockChan:
if err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Can't lock the file",
Pairs: map[string]interface{}{"error": err.Error()},
})
return err
}
return nil
case <-ctx.Done():
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Can't lock the file",
Pairs: map[string]interface{}{"error": err.Error()},
Message: "File lock timeout",
Pairs: map[string]interface{}{"timeout": "30s"},
})
return err
return fmt.Errorf("file lock timeout after 30 seconds")
}
return nil
}
func lockFileRead(fileLock *flock.Flock) error {
if err := fileLock.RLock(); err != nil {
// Add timeout to prevent indefinite blocking
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Try to acquire read lock with timeout
lockChan := make(chan error, 1)
go func() {
lockChan <- fileLock.RLock()
}()
select {
case err := <-lockChan:
if err != nil {
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Can't lock the file for reading",
Pairs: map[string]interface{}{"error": err.Error()},
})
return err
}
return nil
case <-ctx.Done():
cfg.Logger.Error(&libpack_logger.LogMessage{
Message: "Can't lock the file for reading",
Pairs: map[string]interface{}{"error": err.Error()},
Message: "File read lock timeout",
Pairs: map[string]interface{}{"timeout": "30s"},
})
return err
return fmt.Errorf("file read lock timeout after 30 seconds")
}
return nil
}
// apiBackendHealth returns the health status of the GraphQL backend
func apiBackendHealth(c *fiber.Ctx) error {
healthMgr := GetBackendHealthManager()
if healthMgr == nil {
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
"status": "unknown",
"message": "Backend health manager not initialized",
})
}
isHealthy := healthMgr.IsHealthy()
lastCheck := healthMgr.GetLastHealthCheck()
consecutiveFailures := healthMgr.GetConsecutiveFailures()
var status string
var httpStatus int
if isHealthy {
status = "healthy"
httpStatus = fiber.StatusOK
} else {
status = "unhealthy"
httpStatus = fiber.StatusServiceUnavailable
}
response := fiber.Map{
"status": status,
"backend_url": cfg.Server.HostGraphQL,
"last_health_check": lastCheck,
"consecutive_failures": consecutiveFailures,
"check_interval": "5s",
}
return c.Status(httpStatus).JSON(response)
}
// apiConnectionPoolHealth returns the health status of the connection pool
func apiConnectionPoolHealth(c *fiber.Ctx) error {
poolMgr := GetConnectionPoolManager()
if poolMgr == nil {
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
"status": "unknown",
"message": "Connection pool manager not initialized",
})
}
stats := poolMgr.GetConnectionStats()
connectionFailures := stats["connection_failures"].(int64)
var status string
var httpStatus int
// Consider pool healthy if we haven't had too many recent failures
if connectionFailures < 10 {
status = "healthy"
httpStatus = fiber.StatusOK
} else {
status = "degraded"
httpStatus = fiber.StatusOK // Still return 200 since pool is functional
}
response := fiber.Map{
"status": status,
"active_connections": stats["active_connections"],
"total_connections": stats["total_connections"],
"connection_failures": connectionFailures,
"last_recovery_attempt": stats["last_recovery_attempt"],
"cleanup_interval": "30s",
"keepalive_interval": "15s",
"recovery_check_interval": "60s",
}
return c.Status(httpStatus).JSON(response)
}