mirror of
https://github.com/lukaszraczylo/kportal.git
synced 2026-06-09 23:59:45 +00:00
324 lines
8.4 KiB
Go
324 lines
8.4 KiB
Go
package forward
|
|
|
|
import (
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"github.com/stretchr/testify/suite"
|
|
)
|
|
|
|
// WatchdogTestSuite contains tests for the watchdog
|
|
type WatchdogTestSuite struct {
|
|
suite.Suite
|
|
watchdog *Watchdog
|
|
}
|
|
|
|
func TestWatchdogSuite(t *testing.T) {
|
|
suite.Run(t, new(WatchdogTestSuite))
|
|
}
|
|
|
|
func (s *WatchdogTestSuite) SetupTest() {
|
|
// Create watchdog with fast intervals for testing
|
|
s.watchdog = NewWatchdog(100*time.Millisecond, 300*time.Millisecond)
|
|
s.watchdog.Start()
|
|
}
|
|
|
|
func (s *WatchdogTestSuite) TearDownTest() {
|
|
if s.watchdog != nil {
|
|
s.watchdog.Stop()
|
|
}
|
|
}
|
|
|
|
// TestRegisterUnregister tests basic registration and unregistration
|
|
func (s *WatchdogTestSuite) TestRegisterUnregister() {
|
|
callbackCalled := false
|
|
callback := func(forwardID string) {
|
|
callbackCalled = true
|
|
}
|
|
|
|
// Register worker
|
|
s.watchdog.RegisterWorker("test-forward", callback)
|
|
|
|
// Verify worker is registered
|
|
_, _, exists := s.watchdog.GetWorkerState("test-forward")
|
|
assert.True(s.T(), exists, "Worker should be registered")
|
|
|
|
// Unregister worker
|
|
s.watchdog.UnregisterWorker("test-forward")
|
|
|
|
// Verify worker is unregistered
|
|
_, _, exists = s.watchdog.GetWorkerState("test-forward")
|
|
assert.False(s.T(), exists, "Worker should be unregistered")
|
|
assert.False(s.T(), callbackCalled, "Callback should not have been called")
|
|
}
|
|
|
|
// TestHeartbeat tests that heartbeats update worker state
|
|
func (s *WatchdogTestSuite) TestHeartbeat() {
|
|
s.watchdog.RegisterWorker("test-forward", nil)
|
|
|
|
// Send initial heartbeat
|
|
s.watchdog.Heartbeat("test-forward")
|
|
|
|
lastHeartbeat1, count1, exists := s.watchdog.GetWorkerState("test-forward")
|
|
require.True(s.T(), exists)
|
|
assert.Equal(s.T(), uint64(1), count1)
|
|
|
|
// Wait a bit
|
|
time.Sleep(50 * time.Millisecond)
|
|
|
|
// Send another heartbeat
|
|
s.watchdog.Heartbeat("test-forward")
|
|
|
|
lastHeartbeat2, count2, exists := s.watchdog.GetWorkerState("test-forward")
|
|
require.True(s.T(), exists)
|
|
assert.Equal(s.T(), uint64(2), count2)
|
|
assert.True(s.T(), lastHeartbeat2.After(lastHeartbeat1), "Second heartbeat should be after first")
|
|
}
|
|
|
|
// TestHungWorkerDetection tests that hung workers are detected
|
|
func (s *WatchdogTestSuite) TestHungWorkerDetection() {
|
|
callbackCalled := make(chan string, 1)
|
|
callback := func(forwardID string) {
|
|
callbackCalled <- forwardID
|
|
}
|
|
|
|
s.watchdog.RegisterWorker("test-forward", callback)
|
|
|
|
// Send initial heartbeat
|
|
s.watchdog.Heartbeat("test-forward")
|
|
|
|
// Wait for worker to be considered hung (300ms threshold + 100ms check interval)
|
|
timeout := time.After(1 * time.Second)
|
|
|
|
select {
|
|
case forwardID := <-callbackCalled:
|
|
assert.Equal(s.T(), "test-forward", forwardID)
|
|
case <-timeout:
|
|
s.T().Fatal("Timeout waiting for hung worker callback")
|
|
}
|
|
}
|
|
|
|
// TestHealthyWorkerNotDetectedAsHung tests that workers sending heartbeats are not considered hung
|
|
func (s *WatchdogTestSuite) TestHealthyWorkerNotDetectedAsHung() {
|
|
callbackCalled := false
|
|
var mu sync.Mutex
|
|
callback := func(forwardID string) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
callbackCalled = true
|
|
}
|
|
|
|
s.watchdog.RegisterWorker("test-forward", callback)
|
|
|
|
// Send periodic heartbeats (faster than hang threshold)
|
|
ticker := time.NewTicker(50 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
|
|
done := make(chan bool)
|
|
go func() {
|
|
for i := 0; i < 10; i++ {
|
|
<-ticker.C
|
|
s.watchdog.Heartbeat("test-forward")
|
|
}
|
|
done <- true
|
|
}()
|
|
|
|
// Wait for all heartbeats to complete
|
|
<-done
|
|
|
|
// Check that callback was not called
|
|
mu.Lock()
|
|
assert.False(s.T(), callbackCalled, "Callback should not be called for healthy worker")
|
|
mu.Unlock()
|
|
}
|
|
|
|
// TestMultipleWorkers tests monitoring multiple workers simultaneously
|
|
func (s *WatchdogTestSuite) TestMultipleWorkers() {
|
|
callbacks := make(map[string]int)
|
|
var mu sync.Mutex
|
|
|
|
makeCallback := func(id string) func(string) {
|
|
return func(forwardID string) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
callbacks[id]++
|
|
}
|
|
}
|
|
|
|
// Register multiple workers
|
|
s.watchdog.RegisterWorker("worker-1", makeCallback("worker-1"))
|
|
s.watchdog.RegisterWorker("worker-2", makeCallback("worker-2"))
|
|
s.watchdog.RegisterWorker("worker-3", makeCallback("worker-3"))
|
|
|
|
// worker-1: Keep sending heartbeats (healthy)
|
|
// Use a done channel to ensure goroutine exits before test ends
|
|
ticker1 := time.NewTicker(50 * time.Millisecond)
|
|
done := make(chan struct{})
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
defer ticker1.Stop()
|
|
for i := 0; i < 10; i++ {
|
|
select {
|
|
case <-ticker1.C:
|
|
s.watchdog.Heartbeat("worker-1")
|
|
case <-done:
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
// worker-2: Send initial heartbeat then stop (will become hung)
|
|
s.watchdog.Heartbeat("worker-2")
|
|
|
|
// worker-3: Send initial heartbeat then stop (will become hung)
|
|
s.watchdog.Heartbeat("worker-3")
|
|
|
|
// Wait for hung workers to be detected
|
|
time.Sleep(600 * time.Millisecond)
|
|
|
|
// Signal goroutine to stop and wait for it
|
|
close(done)
|
|
wg.Wait()
|
|
|
|
// Check results
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
|
|
assert.Equal(s.T(), 0, callbacks["worker-1"], "worker-1 should not trigger callback (healthy)")
|
|
assert.Greater(s.T(), callbacks["worker-2"], 0, "worker-2 should trigger callback (hung)")
|
|
assert.Greater(s.T(), callbacks["worker-3"], 0, "worker-3 should trigger callback (hung)")
|
|
}
|
|
|
|
// TestCallbackOnlyOnFirstDetection tests that callback is only called once when hung is first detected
|
|
func (s *WatchdogTestSuite) TestCallbackOnlyOnFirstDetection() {
|
|
callbackCount := 0
|
|
var mu sync.Mutex
|
|
callback := func(forwardID string) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
callbackCount++
|
|
}
|
|
|
|
s.watchdog.RegisterWorker("test-forward", callback)
|
|
|
|
// Send initial heartbeat
|
|
s.watchdog.Heartbeat("test-forward")
|
|
|
|
// Wait for multiple check cycles
|
|
time.Sleep(1 * time.Second)
|
|
|
|
// Check that callback was only called once
|
|
mu.Lock()
|
|
assert.Equal(s.T(), 1, callbackCount, "Callback should only be called once")
|
|
mu.Unlock()
|
|
}
|
|
|
|
// TestHeartbeatResetsHungState tests that sending heartbeat after hung detection resets state
|
|
func (s *WatchdogTestSuite) TestHeartbeatResetsHungState() {
|
|
callbackCount := 0
|
|
var mu sync.Mutex
|
|
callback := func(forwardID string) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
callbackCount++
|
|
}
|
|
|
|
s.watchdog.RegisterWorker("test-forward", callback)
|
|
|
|
// Send initial heartbeat
|
|
s.watchdog.Heartbeat("test-forward")
|
|
|
|
// Wait for hung detection
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
mu.Lock()
|
|
firstCount := callbackCount
|
|
mu.Unlock()
|
|
|
|
assert.Equal(s.T(), 1, firstCount, "First hung detection should trigger callback")
|
|
|
|
// Send heartbeat to reset hung state
|
|
s.watchdog.Heartbeat("test-forward")
|
|
|
|
// Wait for worker to become hung again
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
mu.Lock()
|
|
secondCount := callbackCount
|
|
mu.Unlock()
|
|
|
|
assert.Equal(s.T(), 2, secondCount, "Second hung detection should trigger callback again")
|
|
}
|
|
|
|
// TestConcurrentOperations tests thread safety
|
|
func (s *WatchdogTestSuite) TestConcurrentOperations() {
|
|
var wg sync.WaitGroup
|
|
numWorkers := 10
|
|
|
|
for i := 0; i < numWorkers; i++ {
|
|
wg.Add(1)
|
|
go func(id int) {
|
|
defer wg.Done()
|
|
forwardID := string(rune('a' + id))
|
|
s.watchdog.RegisterWorker(forwardID, nil)
|
|
for j := 0; j < 10; j++ {
|
|
s.watchdog.Heartbeat(forwardID)
|
|
time.Sleep(10 * time.Millisecond)
|
|
}
|
|
s.watchdog.UnregisterWorker(forwardID)
|
|
}(i)
|
|
}
|
|
|
|
wg.Wait()
|
|
// If we get here without deadlocks or panics, test passes
|
|
}
|
|
|
|
// TestStopWatchdog tests that stopping watchdog cleans up properly
|
|
func TestStopWatchdog(t *testing.T) {
|
|
watchdog := NewWatchdog(100*time.Millisecond, 300*time.Millisecond)
|
|
watchdog.Start()
|
|
|
|
callbackCalled := false
|
|
callback := func(forwardID string) {
|
|
callbackCalled = true
|
|
}
|
|
|
|
watchdog.RegisterWorker("test-forward", callback)
|
|
watchdog.Heartbeat("test-forward")
|
|
|
|
// Stop watchdog before hang detection
|
|
time.Sleep(100 * time.Millisecond)
|
|
watchdog.Stop()
|
|
|
|
// Wait to ensure no more callbacks after stop
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
assert.False(t, callbackCalled, "Callback should not be called after watchdog is stopped")
|
|
}
|
|
|
|
// TestWatchdogWithZeroHeartbeats tests detecting hung worker that never sends heartbeats
|
|
func (s *WatchdogTestSuite) TestWatchdogWithZeroHeartbeats() {
|
|
callbackCalled := make(chan string, 1)
|
|
callback := func(forwardID string) {
|
|
callbackCalled <- forwardID
|
|
}
|
|
|
|
// Register worker but never send heartbeat
|
|
s.watchdog.RegisterWorker("test-forward", callback)
|
|
|
|
// Wait for hung detection
|
|
timeout := time.After(1 * time.Second)
|
|
|
|
select {
|
|
case forwardID := <-callbackCalled:
|
|
assert.Equal(s.T(), "test-forward", forwardID)
|
|
case <-timeout:
|
|
s.T().Fatal("Timeout waiting for hung worker callback")
|
|
}
|
|
}
|