Fix the watchdog being too aggressive.

This commit is contained in:
2025-11-24 13:19:44 +00:00
parent 2fdc5912e7
commit 39fe4286b4
3 changed files with 49 additions and 24 deletions
+26 -5
View File
@@ -92,14 +92,15 @@ func (w *ForwardWorker) Stop() {
func (w *ForwardWorker) run() {
defer close(w.doneChan)
// Start heartbeat goroutine to continuously send heartbeats to watchdog
// This prevents false "hung worker" detection when connections are long-lived
if w.watchdog != nil {
go w.heartbeatLoop()
}
backoff := retry.NewBackoff()
for {
// Send heartbeat to watchdog to indicate we're alive
if w.watchdog != nil {
w.watchdog.Heartbeat(w.forward.ID())
}
// Check if we should stop
select {
case <-w.ctx.Done():
@@ -202,6 +203,26 @@ func (w *ForwardWorker) run() {
}
}
// heartbeatLoop sends periodic heartbeats to the watchdog to prove the worker is alive
// This runs in a separate goroutine and continues throughout the worker's lifetime
func (w *ForwardWorker) heartbeatLoop() {
// Send heartbeats every 15 seconds (well within typical 60s watchdog timeout)
ticker := time.NewTicker(15 * time.Second)
defer ticker.Stop()
// Send immediate heartbeat
w.watchdog.Heartbeat(w.forward.ID())
for {
select {
case <-ticker.C:
w.watchdog.Heartbeat(w.forward.ID())
case <-w.ctx.Done():
return
}
}
}
// establishForward establishes a port-forward connection.
// This blocks until the connection is closed or an error occurs.
func (w *ForwardWorker) establishForward(podName string) error {