HTTP and SSE dead client fix

Fix 1: HTTP Server timeouts (service.go)
  - Added IdleTimeout: 120s - closes idle keep-alive connections
  - Added ReadTimeout: 30s - prevents hung connections waiting for request data

  Fix 2: SSE dead client cleanup (broadcaster.go) - This was the real leak
  - Bug: When Write() failed on a disconnected client, it just logged and continued - the dead client stayed in the clients map forever
  - Effect: Dead clients accumulated and every Broadcast() call would try (and fail) to write to them
  - Fix: Now tracks failed writes and removes dead clients from the map

  The SSE bug was likely the main cause. Every time a browser tab closed or connection dropped, the client stayed registered. On each broadcast (processing status updates happen frequently), it would try to write to dead connections, fail, but never clean up.
This commit is contained in:
2025-12-15 13:16:55 +00:00
parent 64dd58dbfa
commit 38b3786942
2 changed files with 38 additions and 1 deletions
+3
View File
@@ -669,6 +669,9 @@ func (s *Service) Start() error {
Addr: fmt.Sprintf(":%d", port),
Handler: s.router,
ReadHeaderTimeout: 10 * time.Second,
ReadTimeout: 30 * time.Second,
WriteTimeout: 0, // Disabled for SSE (long-lived connections)
IdleTimeout: 120 * time.Second,
}
// Check if we're in restart mode (after update)
+35 -1
View File
@@ -75,6 +75,31 @@ func (b *Broadcaster) RemoveClient(client *Client) {
Msg("SSE client disconnected")
}
// removeClientByID removes a client by ID (for dead client cleanup).
func (b *Broadcaster) removeClientByID(id string) {
b.mu.Lock()
client, exists := b.clients[id]
if exists {
delete(b.clients, id)
}
clientCount := len(b.clients)
b.mu.Unlock()
if exists && client.Done != nil {
select {
case <-client.Done:
// Already closed
default:
close(client.Done)
}
}
log.Debug().
Str("clientId", id).
Int("totalClients", clientCount).
Msg("Dead SSE client removed")
}
// Broadcast sends a message to all connected clients.
func (b *Broadcaster) Broadcast(data interface{}) {
jsonData, err := json.Marshal(data)
@@ -92,6 +117,9 @@ func (b *Broadcaster) Broadcast(data interface{}) {
}
b.mu.RUnlock()
// Track dead clients for removal
var deadClients []*Client
for _, client := range clients {
select {
case <-client.Done:
@@ -102,12 +130,18 @@ func (b *Broadcaster) Broadcast(data interface{}) {
log.Debug().
Str("clientId", client.ID).
Err(err).
Msg("Failed to write to SSE client")
Msg("Failed to write to SSE client, marking for removal")
deadClients = append(deadClients, client)
continue
}
client.Flusher.Flush()
}
}
// Remove dead clients outside the iteration
for _, client := range deadClients {
b.removeClientByID(client.ID)
}
}
// ClientCount returns the number of connected clients.