mirror of
https://github.com/lukaszraczylo/graphql-monitoring-proxy.git
synced 2026-06-04 22:59:26 +00:00
c2c75d69c0
Performance / resource usage: - circuit_breaker_metrics: fix data race on failCounters map (RWMutex + double-checked locking) - server.go: drop user_id and op_name metric labels (Prometheus cardinality bound); de-duplicate extractUserInfo - graphql.go: gate runtime.ReadMemStats per-request behind ENABLE_ALLOCATION_TRACKING flag (default off) - graphql.go: collapse two-pass AST scan into single pass; lower-case once - sanitization.go: cache compiled redaction regexes per pattern via sync.Map; hoist inner constants to pkg vars - proxy.go: hoist connection/timeout substrings to pkg vars; sentinel errors for static error paths; drop dead Headers map alloc - metrics_aggregator.go: log-field allocation guarded by Logger.IsLevelEnabled - logging/logger.go: add IsLevelEnabled helper - lru_cache.go: 16-shard sharding, FNV-1a routing (concurrent throughput +22%) - cache/memory/lru_memory_cache.go: gzip compress/decompress moved outside mu.Lock - rps_tracker.go: RWMutex+uint64 -> atomic.Uint64 - retry_budget.go: drop unused mutex - api.go: bannedUsersIDs map+RWMutex -> sync.Map (+ snapshot/replace helpers) - tracing/tracing.go: pkg-level constSpanAttrs, copy-then-append in StartSpanWithAttributes - admin_dashboard.go: handleStatsWebSocket reuses bytes.Buffer + json.Encoder per connection Build / runtime: - Makefile: -ldflags="-s -w" -trimpath, CGO_ENABLED=0 for build (=1 for test recipes) - Dockerfile + Dockerfile.goreleaser: ENV GOMEMLIMIT=512MiB - main.go: blank import go.uber.org/automaxprocs (cgroup-aware GOMAXPROCS) - main.go: PPROF_PORT env var wires net/http/pprof on 127.0.0.1 only with full server timeouts - README.md: env-var docs + metric-label docs updated; cardinality note Test coverage push (per package): - main 51.2% -> 74.7% - cache 66.3% -> 93.7% - cache/redis 45.5% -> 98.2% - tracing 66.7% -> 72.9% - (cache/memory 91.6%, logging 91.9%, monitoring 77.6%, pkg/pools 100% unchanged) New test files: coverage_micro_test, coverage_extras_test, server_handlers_test, api_health_test, admin_dashboard_cluster_test, metrics_aggregator_test, concerns_test, cache/cache_coverage_test, cache/redis/redis_coverage_test, tracing/tracing_coverage_test. Bug fix: connection_resilience_test.go TestIntegratedHealthManagement.health_manager_startup was sync.Once-coupled to InitializeBackendHealth and panicked when another test (e.g. via parseConfig) had already triggered Once. Use NewBackendHealthManager directly.
268 lines
8.4 KiB
Go
268 lines
8.4 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
libpack_logger "github.com/lukaszraczylo/graphql-monitoring-proxy/logging"
|
|
"github.com/stretchr/testify/suite"
|
|
)
|
|
|
|
// ConnectionResilienceTestSuite tests connection resilience features
|
|
type ConnectionResilienceTestSuite struct {
|
|
suite.Suite
|
|
originalConfig *config
|
|
outputBuffer *bytes.Buffer
|
|
mockServer *httptest.Server
|
|
mockServerCalled atomic.Int32
|
|
}
|
|
|
|
func (suite *ConnectionResilienceTestSuite) SetupTest() {
|
|
// Store original config
|
|
suite.originalConfig = cfg
|
|
|
|
// Create a buffer to capture logger output
|
|
suite.outputBuffer = &bytes.Buffer{}
|
|
|
|
// Setup a new config with a real logger that writes to our buffer
|
|
cfg = &config{}
|
|
cfg.Logger = libpack_logger.New().SetOutput(suite.outputBuffer)
|
|
|
|
// Reset call counter
|
|
suite.mockServerCalled.Store(0)
|
|
|
|
// Create a mock GraphQL server
|
|
suite.mockServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
suite.mockServerCalled.Add(1)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(`{"data":{"__typename":"Query"}}`))
|
|
}))
|
|
|
|
// Configure the test with mock server URL
|
|
cfg.Server.HostGraphQL = suite.mockServer.URL
|
|
cfg.Client.ClientTimeout = 5
|
|
cfg.Client.MaxConnsPerHost = 10
|
|
cfg.Client.MaxIdleConnDuration = 30
|
|
cfg.Client.DisableTLSVerify = true
|
|
|
|
// Create fasthttp client
|
|
cfg.Client.FastProxyClient = createFasthttpClient(cfg)
|
|
}
|
|
|
|
func (suite *ConnectionResilienceTestSuite) TearDownTest() {
|
|
// Close mock server
|
|
if suite.mockServer != nil {
|
|
suite.mockServer.Close()
|
|
}
|
|
|
|
// Clean up global instances with proper shutdown
|
|
if backendHealthManager != nil {
|
|
backendHealthManager.Shutdown()
|
|
backendHealthManager = nil
|
|
}
|
|
|
|
if connectionPoolManager != nil {
|
|
connectionPoolManager.Shutdown()
|
|
connectionPoolManager = nil
|
|
}
|
|
|
|
// Restore original config
|
|
cfg = suite.originalConfig
|
|
}
|
|
|
|
// TestBackendHealthManager tests the backend health monitoring
|
|
func (suite *ConnectionResilienceTestSuite) TestBackendHealthManager() {
|
|
suite.Run("initialization", func() {
|
|
healthMgr := NewBackendHealthManager(cfg.Client.FastProxyClient, cfg.Server.HostGraphQL, cfg.Logger)
|
|
suite.NotNil(healthMgr)
|
|
suite.Equal(cfg.Server.HostGraphQL, healthMgr.backendURL)
|
|
suite.Equal(5*time.Second, healthMgr.checkInterval)
|
|
suite.Equal(30, healthMgr.maxRetries)
|
|
})
|
|
|
|
suite.Run("health check success", func() {
|
|
healthMgr := NewBackendHealthManager(cfg.Client.FastProxyClient, cfg.Server.HostGraphQL, cfg.Logger)
|
|
isHealthy := healthMgr.checkBackendHealth()
|
|
suite.True(isHealthy)
|
|
suite.GreaterOrEqual(suite.mockServerCalled.Load(), int32(1))
|
|
})
|
|
|
|
suite.Run("health check failure", func() {
|
|
// Use invalid URL to simulate failure
|
|
healthMgr := NewBackendHealthManager(cfg.Client.FastProxyClient, "http://invalid-url:99999", cfg.Logger)
|
|
isHealthy := healthMgr.checkBackendHealth()
|
|
suite.False(isHealthy)
|
|
})
|
|
|
|
suite.Run("startup readiness with healthy backend", func() {
|
|
healthMgr := NewBackendHealthManager(cfg.Client.FastProxyClient, cfg.Server.HostGraphQL, cfg.Logger)
|
|
err := healthMgr.WaitForBackendReady(10 * time.Second)
|
|
suite.NoError(err)
|
|
suite.True(healthMgr.IsHealthy())
|
|
})
|
|
|
|
suite.Run("startup readiness timeout", func() {
|
|
// Use invalid URL to simulate backend not ready
|
|
healthMgr := NewBackendHealthManager(cfg.Client.FastProxyClient, "http://invalid-url:99999", cfg.Logger)
|
|
err := healthMgr.WaitForBackendReady(2 * time.Second)
|
|
suite.Error(err)
|
|
suite.Contains(err.Error(), "did not become ready")
|
|
})
|
|
}
|
|
|
|
// TestConnectionPoolManager tests the connection pool management
|
|
func (suite *ConnectionResilienceTestSuite) TestConnectionPoolManager() {
|
|
suite.Run("initialization", func() {
|
|
poolMgr := NewConnectionPoolManager(cfg.Client.FastProxyClient)
|
|
suite.NotNil(poolMgr)
|
|
suite.NotNil(poolMgr.client)
|
|
suite.Equal(45*time.Second, poolMgr.keepAliveInterval) // Updated from 15s to 45s for lower backend load
|
|
suite.Equal(30*time.Second, poolMgr.cleanupInterval)
|
|
suite.Equal(60*time.Second, poolMgr.recoveryCheckInterval)
|
|
})
|
|
|
|
suite.Run("connection statistics", func() {
|
|
poolMgr := NewConnectionPoolManager(cfg.Client.FastProxyClient)
|
|
|
|
// Record some connections
|
|
poolMgr.RecordConnectionSuccess()
|
|
poolMgr.RecordConnectionSuccess()
|
|
poolMgr.RecordConnectionFailure()
|
|
|
|
stats := poolMgr.GetConnectionStats()
|
|
suite.Equal(int64(2), stats["active_connections"])
|
|
suite.Equal(int64(2), stats["total_connections"])
|
|
suite.Equal(int64(1), stats["connection_failures"])
|
|
})
|
|
|
|
suite.Run("keep alive functionality", func() {
|
|
poolMgr := NewConnectionPoolManager(cfg.Client.FastProxyClient)
|
|
poolMgr.logger = cfg.Logger
|
|
|
|
// With the optimized keep-alive, it skips when no failures and connections exist
|
|
// So we first record a failure to force keep-alive to execute
|
|
poolMgr.RecordConnectionFailure()
|
|
|
|
// Test keep-alive with valid backend
|
|
poolMgr.performKeepAlive()
|
|
|
|
// Should have made a request to the mock server
|
|
suite.GreaterOrEqual(suite.mockServerCalled.Load(), int32(1))
|
|
})
|
|
|
|
suite.Run("recovery mechanism", func() {
|
|
poolMgr := NewConnectionPoolManager(cfg.Client.FastProxyClient)
|
|
poolMgr.logger = cfg.Logger
|
|
|
|
// Simulate many failures to trigger recovery
|
|
for i := 0; i < 10; i++ {
|
|
poolMgr.RecordConnectionFailure()
|
|
}
|
|
|
|
// Check recovery triggers
|
|
poolMgr.checkAndRecover()
|
|
|
|
// Verify failure count was reset
|
|
stats := poolMgr.GetConnectionStats()
|
|
suite.Equal(int64(0), stats["connection_failures"])
|
|
})
|
|
}
|
|
|
|
// TestIntegratedHealthManagement tests integration between health manager and connection pool
|
|
func (suite *ConnectionResilienceTestSuite) TestIntegratedHealthManagement() {
|
|
suite.Run("global initialization", func() {
|
|
// Initialize global instances
|
|
healthMgr := InitializeBackendHealth(cfg.Client.FastProxyClient, cfg.Server.HostGraphQL, cfg.Logger)
|
|
poolMgr := NewConnectionPoolManager(cfg.Client.FastProxyClient)
|
|
|
|
// Set global instances
|
|
backendHealthManager = healthMgr
|
|
connectionPoolManager = poolMgr
|
|
|
|
// Test global access
|
|
suite.Equal(healthMgr, GetBackendHealthManager())
|
|
suite.Equal(poolMgr, GetConnectionPoolManager())
|
|
})
|
|
|
|
suite.Run("health manager startup", func() {
|
|
// Use NewBackendHealthManager directly: InitializeBackendHealth is sync.Once-gated
|
|
// and may have already fired earlier in the process (e.g. via parseConfig in
|
|
// another test), in which case it returns whatever the global currently is —
|
|
// which TearDownTest above just nilled.
|
|
healthMgr := NewBackendHealthManager(cfg.Client.FastProxyClient, cfg.Server.HostGraphQL, cfg.Logger)
|
|
backendHealthManager = healthMgr
|
|
|
|
// Start health checking
|
|
healthMgr.StartHealthChecking()
|
|
|
|
// Wait for backend to be ready
|
|
err := healthMgr.WaitForBackendReady(10 * time.Second)
|
|
suite.NoError(err)
|
|
|
|
// Give some time for health checks to run
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Verify health status
|
|
suite.True(healthMgr.IsHealthy())
|
|
suite.Equal(int32(0), healthMgr.GetConsecutiveFailures())
|
|
})
|
|
}
|
|
|
|
// TestConnectionErrorDetection tests connection error detection
|
|
func (suite *ConnectionResilienceTestSuite) TestConnectionErrorDetection() {
|
|
testCases := []struct {
|
|
name string
|
|
errorMsg string
|
|
expected bool
|
|
}{
|
|
{"connection refused", "connection refused", true},
|
|
{"connection reset", "connection reset by peer", true},
|
|
{"no route to host", "no route to host", true},
|
|
{"network unreachable", "network is unreachable", true},
|
|
{"broken pipe", "broken pipe", true},
|
|
{"EOF", "EOF", true},
|
|
{"dial tcp", "dial tcp 127.0.0.1:99999: connect: connection refused", true},
|
|
{"regular error", "some other error", false},
|
|
{"timeout error", "timeout exceeded", false},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
suite.Run(tc.name, func() {
|
|
fakeErr := &mockError{msg: tc.errorMsg}
|
|
isConn := isConnectionError(fakeErr)
|
|
suite.Equal(tc.expected, isConn)
|
|
})
|
|
}
|
|
}
|
|
|
|
// mockError is a simple error implementation for testing
|
|
type mockError struct {
|
|
msg string
|
|
}
|
|
|
|
func (e *mockError) Error() string {
|
|
return e.msg
|
|
}
|
|
|
|
// TestRetryLogic tests the enhanced retry mechanism
|
|
func (suite *ConnectionResilienceTestSuite) TestRetryLogic() {
|
|
suite.Run("connection error classification", func() {
|
|
// Test that connection errors are properly identified
|
|
connErr := &mockError{msg: "connection refused"}
|
|
suite.True(isConnectionError(connErr))
|
|
|
|
timeoutErr := &mockError{msg: "timeout exceeded"}
|
|
suite.False(isConnectionError(timeoutErr))
|
|
})
|
|
}
|
|
|
|
// Start the test suite
|
|
func TestConnectionResilienceSuite(t *testing.T) {
|
|
suite.Run(t, new(ConnectionResilienceTestSuite))
|
|
}
|