Compare commits

..

4 Commits

Author SHA1 Message Date
lukaszraczylo 3a7cc6f502 bugfixes nov2025 pt3 (#6)
* Minor improvements.
* DRY the codebase.
* Add version checker / updater.
2025-11-25 01:28:23 +00:00
lukaszraczylo 49acba5679 Bugfixes nov2025 pt2 (#5)
* UI bugfixes.
* Fix open port check during new fwd setup wizard
2025-11-25 00:09:32 +00:00
lukaszraczylo 39fe4286b4 Fix the watchdog being too aggressive. 2025-11-24 13:19:44 +00:00
lukaszraczylo 2fdc5912e7 healtcheck improvements (#4)
* Advanced healtchecks.
* Add watchdog for stale connections handling.
2025-11-24 13:00:19 +00:00
25 changed files with 2255 additions and 202 deletions
+1 -1
View File
@@ -19,7 +19,7 @@ builds:
- arm64
ldflags:
- -s -w
- -X main.version={{.Version}}
- -X main.appVersion={{.Version}}
archives:
- id: kportal
+21
View File
@@ -1,6 +1,27 @@
# Example kportal configuration
# Copy this file to your project and customize as needed
# Optional: Health check configuration
# These settings control how kportal monitors connection health and detects stale connections
healthCheck:
interval: "3s" # How often to check connection health (default: 3s)
timeout: "2s" # Timeout for health check operations (default: 2s)
method: "data-transfer" # Health check method: "tcp-dial" or "data-transfer" (default: data-transfer)
# - tcp-dial: Simple TCP connection test (fast, less reliable)
# - data-transfer: Attempts to read data (slower, more reliable)
maxConnectionAge: "25m" # Maximum connection age before proactive reconnect (default: 25m)
# Helps avoid Kubernetes API server timeouts (typically 30m)
maxIdleTime: "10m" # Maximum idle time before marking as stale (default: 10m)
# Connections with no data transfer are marked stale
# Optional: Reliability configuration
# These settings improve connection stability for long-running transfers
reliability:
tcpKeepalive: "30s" # TCP keepalive interval for OS-level connection monitoring (default: 30s)
dialTimeout: "30s" # Connection dial timeout (default: 30s)
retryOnStale: true # Automatically reconnect when stale connections detected (default: true)
watchdogPeriod: "30s" # Goroutine watchdog check interval to detect hung workers (default: 30s)
contexts:
# Production context
- name: production
+1 -1
View File
@@ -37,7 +37,7 @@ GOFMT=$(GOCMD) fmt
# Build flags
BUILD_FLAGS=-buildvcs=false
LDFLAGS=-ldflags="-s -w -X main.version=$(VERSION)"
LDFLAGS=-ldflags="-s -w -X main.appVersion=$(VERSION)"
all: fmt vet staticcheck test build
+43 -1
View File
@@ -24,7 +24,8 @@ kportal simplifies managing multiple Kubernetes port-forwards with an elegant, i
- 🗑️ **Live Delete** - Remove port-forwards instantly from the running session
- 🔄 **Auto-Reconnect** - Automatic retry with exponential backoff on connection failures (max 10s)
-**Hot-Reload** - Update configuration without restarting - changes applied automatically
- 🏥 **Health Checks** - Real-time port forward status monitoring with 5-second intervals
- 🏥 **Advanced Health Checks** - Multiple check methods (tcp-dial, data-transfer) with stale connection detection
- 🛡️ **Goroutine Watchdog** - Detects and recovers from completely hung workers
- 🎨 **Multi-Context** - Support for multiple Kubernetes contexts and namespaces
- 📦 **Batch Management** - Manage all port-forwards from a single configuration file
- 🔌 **Toggle Forwards** - Enable/disable individual port-forwards on the fly with Space key
@@ -194,6 +195,47 @@ contexts:
- **Service**: `service/service-name` or `svc/service-name`
- **Deployment**: `deployment/deployment-name` or `deploy/deployment-name`
### Health Check & Reliability (Advanced)
kportal includes advanced health checking to prevent stale connections during long-running operations like database dumps:
```yaml
healthCheck:
interval: "3s" # Health check frequency (default: 3s)
timeout: "2s" # Health check timeout (default: 2s)
method: "data-transfer" # Check method: "tcp-dial" or "data-transfer" (default: data-transfer)
maxConnectionAge: "25m" # Proactive reconnect before k8s timeout (default: 25m)
maxIdleTime: "10m" # Detect hung connections (default: 10m)
reliability:
tcpKeepalive: "30s" # TCP keepalive interval (default: 30s)
dialTimeout: "30s" # Connection dial timeout (default: 30s)
retryOnStale: true # Auto-reconnect stale connections (default: true)
```
**Health Check Methods:**
- **`tcp-dial`**: Fast TCP connection test - verifies local port is listening
- **`data-transfer`**: More reliable - attempts to read data to verify tunnel is functional
**Stale Detection:**
- **Max Connection Age**: Kubernetes API typically has 30-minute timeout. kportal reconnects at 25 minutes by default to avoid hitting this limit. **Important**: Age-based reconnection only occurs when the connection is ALSO idle - active transfers (like database dumps) are never interrupted.
- **Max Idle Time**: Detects connections with no data transfer, common when intermediate firewalls drop idle TCP connections
**Use Case Example - Database Dumps:**
```yaml
# Optimized for long-running pg_dump
healthCheck:
method: "data-transfer"
maxConnectionAge: "20m" # Only applies when idle - won't interrupt active dumps
maxIdleTime: "5m" # Detects truly stale connections
reliability:
tcpKeepalive: "30s"
retryOnStale: true
```
This configuration ensures multi-hour database dumps complete without interruption. The `maxConnectionAge` will only trigger reconnection if the connection has been idle for more than `maxIdleTime`, preventing interruption of active data transfers.
## 🎮 Usage
### Interactive Mode (Default)
+60 -4
View File
@@ -1,6 +1,7 @@
package main
import (
"context"
"flag"
"fmt"
"io"
@@ -19,6 +20,7 @@ import (
"github.com/nvm/kportal/internal/k8s"
"github.com/nvm/kportal/internal/logger"
"github.com/nvm/kportal/internal/ui"
"github.com/nvm/kportal/internal/version"
"k8s.io/klog/v2"
)
@@ -26,6 +28,10 @@ const (
defaultConfigFile = ".kportal.yaml"
initialForwardSettleTime = 100 * time.Millisecond
tableUpdateInterval = 2 * time.Second
// GitHub repository info for update checks
githubOwner = "lukaszraczylo"
githubRepo = "kportal"
)
var (
@@ -34,16 +40,22 @@ var (
logFormat = flag.String("log-format", "text", "Log format: text or json")
check = flag.Bool("check", false, "Validate configuration and exit")
showVersion = flag.Bool("version", false, "Show version and exit")
checkUpdate = flag.Bool("update", false, "Check for updates and exit")
convertInput = flag.String("convert", "", "Convert kftray JSON config to kportal YAML (provide input file path)")
convertOutput = flag.String("convert-output", ".kportal.yaml", "Output file for converted configuration")
version = "0.1.0" // Set via ldflags during build
appVersion = "0.1.0" // Set via ldflags during build
)
func main() {
flag.Parse()
if *showVersion {
fmt.Printf("kportal version %s\n", version)
fmt.Printf("kportal version %s\n", appVersion)
os.Exit(0)
}
if *checkUpdate {
checkForUpdates()
os.Exit(0)
}
@@ -177,7 +189,7 @@ func main() {
// Only log startup messages in verbose mode
if *verbose {
log.Printf("kportal v%s", version)
log.Printf("kportal v%s", appVersion)
log.Printf("Loading configuration from: %s", *configFile)
}
@@ -209,17 +221,40 @@ func main() {
} else {
manager.DisableForward(id)
}
}, version)
}, appVersion)
// Set wizard dependencies
// Note: mutator is always available (for delete/edit), discovery requires valid kubeconfig (for add)
bubbleTeaUI.SetWizardDependencies(discovery, mutator, *configFile)
// Check for updates in background (non-blocking)
go func() {
checker := version.NewChecker(githubOwner, githubRepo, appVersion)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if update := checker.CheckForUpdate(ctx); update != nil {
bubbleTeaUI.SetUpdateAvailable(update.LatestVersion, update.ReleaseURL)
}
}()
manager.SetStatusUI(bubbleTeaUI)
} else {
// Verbose mode with simple table
tableUI = ui.NewTableUI(*verbose)
manager.SetStatusUI(tableUI)
// Check for updates and print to log
go func() {
checker := version.NewChecker(githubOwner, githubRepo, appVersion)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if update := checker.CheckForUpdate(ctx); update != nil {
log.Printf("Update available: v%s (current: v%s) - %s",
update.LatestVersion, update.CurrentVersion, update.ReleaseURL)
}
}()
}
// Start forwards
@@ -322,3 +357,24 @@ func main() {
manager.Stop()
}
}
// checkForUpdates checks for available updates and prints the result
func checkForUpdates() {
fmt.Printf("kportal version %s\n", appVersion)
fmt.Println("Checking for updates...")
checker := version.NewChecker(githubOwner, githubRepo, appVersion)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
update := checker.CheckForUpdate(ctx)
if update == nil {
fmt.Println("You are running the latest version.")
return
}
fmt.Printf("\nUpdate available: v%s\n", update.LatestVersion)
fmt.Printf("Download: %s\n", update.ReleaseURL)
fmt.Println("\nTo update, download the latest release from the URL above")
fmt.Println("or use your package manager (e.g., 'brew upgrade kportal').")
}
+1
View File
@@ -25,6 +25,7 @@ require (
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
+2
View File
@@ -23,6 +23,8 @@ github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsV
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=
github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
+126 -3
View File
@@ -1,19 +1,136 @@
package config
import (
"bytes"
"fmt"
"os"
"time"
"gopkg.in/yaml.v3"
)
const (
maxConfigSize = 10 * 1024 * 1024 // 10MB
// maxConfigSize is the maximum allowed configuration file size (10MB)
maxConfigSize = 10 * 1024 * 1024
// Default health check settings
DefaultHealthCheckInterval = 3 * time.Second // How often to check connection health
DefaultHealthCheckTimeout = 2 * time.Second // Timeout for health check probes
DefaultHealthCheckMethod = "data-transfer" // More reliable than tcp-dial
DefaultMaxConnectionAge = 25 * time.Minute // Reconnect before k8s 30min timeout
DefaultMaxIdleTime = 10 * time.Minute // Reconnect if no activity
// Default reliability settings
DefaultTCPKeepalive = 30 * time.Second // OS-level TCP keepalive interval
DefaultDialTimeout = 30 * time.Second // Connection establishment timeout
DefaultWatchdogPeriod = 30 * time.Second // Goroutine health check interval
)
// Config represents the root configuration structure from .kportal.yaml
type Config struct {
Contexts []Context `yaml:"contexts"`
Contexts []Context `yaml:"contexts"`
HealthCheck *HealthCheckSpec `yaml:"healthCheck,omitempty"`
Reliability *ReliabilitySpec `yaml:"reliability,omitempty"`
}
// HealthCheckSpec configures health check behavior
type HealthCheckSpec struct {
Interval string `yaml:"interval,omitempty"` // e.g., "3s", "5s"
Timeout string `yaml:"timeout,omitempty"` // e.g., "2s"
Method string `yaml:"method,omitempty"` // "tcp-dial" | "data-transfer"
MaxConnectionAge string `yaml:"maxConnectionAge,omitempty"` // e.g., "25m" - reconnect before k8s timeout
MaxIdleTime string `yaml:"maxIdleTime,omitempty"` // e.g., "10m" - reconnect if no activity
}
// ReliabilitySpec configures connection reliability features
type ReliabilitySpec struct {
TCPKeepalive string `yaml:"tcpKeepalive,omitempty"` // e.g., "30s" - OS-level keepalive
DialTimeout string `yaml:"dialTimeout,omitempty"` // e.g., "30s" - connection dial timeout
RetryOnStale bool `yaml:"retryOnStale,omitempty"` // Auto-reconnect on stale detection
WatchdogPeriod string `yaml:"watchdogPeriod,omitempty"` // e.g., "30s" - goroutine watchdog interval
}
// parseDurationOrDefault parses a duration string and returns the default if empty or invalid.
func parseDurationOrDefault(value string, defaultDur time.Duration) time.Duration {
if value == "" {
return defaultDur
}
if d, err := time.ParseDuration(value); err == nil {
return d
}
return defaultDur
}
// GetHealthCheckIntervalOrDefault returns the health check interval or default value
func (c *Config) GetHealthCheckIntervalOrDefault() time.Duration {
if c.HealthCheck == nil {
return DefaultHealthCheckInterval
}
return parseDurationOrDefault(c.HealthCheck.Interval, DefaultHealthCheckInterval)
}
// GetHealthCheckTimeoutOrDefault returns the health check timeout or default value
func (c *Config) GetHealthCheckTimeoutOrDefault() time.Duration {
if c.HealthCheck == nil {
return DefaultHealthCheckTimeout
}
return parseDurationOrDefault(c.HealthCheck.Timeout, DefaultHealthCheckTimeout)
}
// GetHealthCheckMethod returns the health check method or default
func (c *Config) GetHealthCheckMethod() string {
if c.HealthCheck != nil && c.HealthCheck.Method != "" {
return c.HealthCheck.Method
}
return DefaultHealthCheckMethod
}
// GetMaxConnectionAge returns the max connection age or default
func (c *Config) GetMaxConnectionAge() time.Duration {
if c.HealthCheck == nil {
return DefaultMaxConnectionAge
}
return parseDurationOrDefault(c.HealthCheck.MaxConnectionAge, DefaultMaxConnectionAge)
}
// GetMaxIdleTime returns the max idle time or default
func (c *Config) GetMaxIdleTime() time.Duration {
if c.HealthCheck == nil {
return DefaultMaxIdleTime
}
return parseDurationOrDefault(c.HealthCheck.MaxIdleTime, DefaultMaxIdleTime)
}
// GetTCPKeepalive returns the TCP keepalive duration or default
func (c *Config) GetTCPKeepalive() time.Duration {
if c.Reliability == nil {
return DefaultTCPKeepalive
}
return parseDurationOrDefault(c.Reliability.TCPKeepalive, DefaultTCPKeepalive)
}
// GetRetryOnStale returns whether to retry on stale connections
func (c *Config) GetRetryOnStale() bool {
if c.Reliability != nil {
return c.Reliability.RetryOnStale
}
return true // Default: enabled
}
// GetWatchdogPeriod returns the goroutine watchdog check period or default
func (c *Config) GetWatchdogPeriod() time.Duration {
if c.Reliability == nil {
return DefaultWatchdogPeriod
}
return parseDurationOrDefault(c.Reliability.WatchdogPeriod, DefaultWatchdogPeriod)
}
// GetDialTimeout returns the connection dial timeout or default
func (c *Config) GetDialTimeout() time.Duration {
if c.Reliability == nil {
return DefaultDialTimeout
}
return parseDurationOrDefault(c.Reliability.DialTimeout, DefaultDialTimeout)
}
// Context represents a Kubernetes context with its namespaces
@@ -103,9 +220,15 @@ func LoadConfig(path string) (*Config, error) {
}
// ParseConfig parses YAML configuration data into a Config struct.
// It uses strict parsing that rejects unknown keys to catch typos.
func ParseConfig(data []byte) (*Config, error) {
var cfg Config
if err := yaml.Unmarshal(data, &cfg); err != nil {
// Use decoder with KnownFields to reject unknown keys (catches typos)
decoder := yaml.NewDecoder(bytes.NewReader(data))
decoder.KnownFields(true)
if err := decoder.Decode(&cfg); err != nil {
return nil, fmt.Errorf("failed to parse YAML: %w", err)
}
+12 -7
View File
@@ -6,10 +6,15 @@ import (
)
const (
minPort = 1
maxPort = 65535
MinPort = 1
MaxPort = 65535
)
// IsValidPort returns true if the port number is within the valid range (1-65535).
func IsValidPort(port int) bool {
return port >= MinPort && port <= MaxPort
}
// ValidationError represents a configuration validation error with context.
type ValidationError struct {
Field string // The field that failed validation
@@ -84,7 +89,7 @@ func (v *Validator) validateStructure(cfg *Config) []ValidationError {
Field: fmt.Sprintf("contexts[%d].namespaces", i),
Message: fmt.Sprintf("Context '%s' must have at least one namespace", ctx.Name),
})
continue
// Don't continue - still validate other aspects of the context if any
}
for j, ns := range ctx.Namespaces {
@@ -130,17 +135,17 @@ func (v *Validator) validateForward(fwd *Forward) []ValidationError {
}
// Validate ports
if fwd.Port < minPort || fwd.Port > maxPort {
if fwd.Port < MinPort || fwd.Port > MaxPort {
errs = append(errs, ValidationError{
Field: "port",
Message: fmt.Sprintf("Invalid port %d for forward %s (must be between %d and %d)", fwd.Port, fwd.ID(), minPort, maxPort),
Message: fmt.Sprintf("Invalid port %d for forward %s (must be between %d and %d)", fwd.Port, fwd.ID(), MinPort, MaxPort),
})
}
if fwd.LocalPort < minPort || fwd.LocalPort > maxPort {
if fwd.LocalPort < MinPort || fwd.LocalPort > MaxPort {
errs = append(errs, ValidationError{
Field: "localPort",
Message: fmt.Sprintf("Invalid localPort %d for forward %s (must be between %d and %d)", fwd.LocalPort, fwd.ID(), minPort, maxPort),
Message: fmt.Sprintf("Invalid localPort %d for forward %s (must be between %d and %d)", fwd.LocalPort, fwd.ID(), MinPort, MaxPort),
})
}
+110 -11
View File
@@ -12,11 +12,6 @@ import (
"github.com/nvm/kportal/internal/logger"
)
const (
healthCheckInterval = 5 * time.Second
healthCheckTimeout = 2 * time.Second
)
// StatusUpdater is an interface for updating forward status
type StatusUpdater interface {
UpdateStatus(id string, status string)
@@ -34,12 +29,15 @@ type Manager struct {
portForwarder *k8s.PortForwarder
portChecker *PortChecker
healthChecker *healthcheck.Checker
watchdog *Watchdog
verbose bool
currentConfig *config.Config
statusUI StatusUpdater
}
// NewManager creates a new forward Manager.
// The health checker will be created with default settings and can be
// reconfigured via SetConfig().
func NewManager(verbose bool) (*Manager, error) {
clientPool, err := k8s.NewClientPool()
if err != nil {
@@ -49,8 +47,13 @@ func NewManager(verbose bool) (*Manager, error) {
resolver := k8s.NewResourceResolver(clientPool)
portForwarder := k8s.NewPortForwarder(clientPool, resolver)
// Create health checker: check every 5 seconds with 2 second timeout
healthChecker := healthcheck.NewChecker(healthCheckInterval, healthCheckTimeout)
// Create health checker with defaults: check every 3 seconds with 2 second timeout
// Will be reconfigured when config is loaded
healthChecker := healthcheck.NewChecker(3*time.Second, 2*time.Second)
// Create watchdog with default settings: check every 30 seconds, 60 second hang threshold
// Will be reconfigured when config is loaded
watchdog := NewWatchdog(30*time.Second, 60*time.Second)
return &Manager{
workers: make(map[string]*ForwardWorker),
@@ -59,10 +62,56 @@ func NewManager(verbose bool) (*Manager, error) {
portForwarder: portForwarder,
portChecker: NewPortChecker(),
healthChecker: healthChecker,
watchdog: watchdog,
verbose: verbose,
}, nil
}
// configureHealthChecker creates a new health checker with settings from config
func (m *Manager) configureHealthChecker(cfg *config.Config) {
// Stop existing health checker
if m.healthChecker != nil {
m.healthChecker.Stop()
}
// Parse check method
methodStr := cfg.GetHealthCheckMethod()
var method healthcheck.CheckMethod
switch methodStr {
case "tcp-dial":
method = healthcheck.CheckMethodTCPDial
case "data-transfer":
method = healthcheck.CheckMethodDataTransfer
default:
method = healthcheck.CheckMethodDataTransfer
}
// Create new health checker with config settings
m.healthChecker = healthcheck.NewCheckerWithOptions(healthcheck.CheckerOptions{
Interval: cfg.GetHealthCheckIntervalOrDefault(),
Timeout: cfg.GetHealthCheckTimeoutOrDefault(),
Method: method,
MaxConnectionAge: cfg.GetMaxConnectionAge(),
MaxIdleTime: cfg.GetMaxIdleTime(),
})
// Configure TCP settings on port forwarder
tcpKeepalive := cfg.GetTCPKeepalive()
dialTimeout := cfg.GetDialTimeout()
m.portForwarder.SetTCPKeepalive(tcpKeepalive)
m.portForwarder.SetDialTimeout(dialTimeout)
logger.Info("Health checker and reliability configured", map[string]interface{}{
"interval": cfg.GetHealthCheckIntervalOrDefault().String(),
"timeout": cfg.GetHealthCheckTimeoutOrDefault().String(),
"method": methodStr,
"max_connection_age": cfg.GetMaxConnectionAge().String(),
"max_idle_time": cfg.GetMaxIdleTime().String(),
"tcp_keepalive": tcpKeepalive.String(),
"dial_timeout": dialTimeout.String(),
})
}
// SetStatusUI sets the status updater for the manager
func (m *Manager) SetStatusUI(ui StatusUpdater) {
m.statusUI = ui
@@ -76,6 +125,20 @@ func (m *Manager) Start(cfg *config.Config) error {
m.currentConfig = cfg
// Configure health checker with settings from config
m.configureHealthChecker(cfg)
// Start watchdog
watchdogPeriod := cfg.GetWatchdogPeriod()
m.watchdog.checkInterval = watchdogPeriod
m.watchdog.hangThreshold = watchdogPeriod * 2 // Hang threshold is 2x check interval
m.watchdog.Start()
logger.Info("Watchdog started", map[string]interface{}{
"check_interval": watchdogPeriod.String(),
"hang_threshold": (watchdogPeriod * 2).String(),
})
// Get all forwards from config
forwards := cfg.GetAllForwards()
@@ -119,8 +182,9 @@ func (m *Manager) Start(cfg *config.Config) error {
func (m *Manager) Stop() {
log.Printf("Stopping all port-forwards...")
// Stop health checker first
// Stop health checker and watchdog first
m.healthChecker.Stop()
m.watchdog.Stop()
m.workersMu.Lock()
workers := make([]*ForwardWorker, 0, len(m.workers))
@@ -273,21 +337,55 @@ func (m *Manager) startWorker(fwd config.Forward) error {
m.statusUI.AddForward(fwd.ID(), &fwd)
}
// Register with watchdog
m.watchdog.RegisterWorker(fwd.ID(), func(forwardID string) {
logger.Warn("Watchdog triggered reconnection for hung worker", map[string]interface{}{
"forward_id": forwardID,
})
// Find and trigger reconnect on hung worker
m.workersMu.RLock()
worker, exists := m.workers[forwardID]
m.workersMu.RUnlock()
if exists {
worker.TriggerReconnect("watchdog detected hung worker")
}
})
// Register with health checker
m.healthChecker.Register(fwd.ID(), fwd.LocalPort, func(forwardID string, status healthcheck.Status, errorMsg string) {
if m.statusUI != nil {
m.statusUI.UpdateStatus(forwardID, string(status))
// Send error separately if there is one
if status == healthcheck.StatusUnhealthy && errorMsg != "" {
if (status == healthcheck.StatusUnhealthy || status == healthcheck.StatusStale) && errorMsg != "" {
if ui, ok := m.statusUI.(interface{ SetError(id, msg string) }); ok {
ui.SetError(forwardID, errorMsg)
}
}
}
// Handle stale connections: trigger reconnection if retryOnStale is enabled
if status == healthcheck.StatusStale && m.currentConfig.GetRetryOnStale() {
logger.Info("Stale connection detected, triggering reconnection", map[string]interface{}{
"forward_id": forwardID,
"reason": errorMsg,
})
// Find and notify the worker to reconnect
m.workersMu.RLock()
worker, exists := m.workers[forwardID]
m.workersMu.RUnlock()
if exists {
worker.TriggerReconnect("stale connection")
}
}
})
// Create and start worker
worker := NewForwardWorker(fwd, m.portForwarder, m.verbose, m.statusUI, m.healthChecker)
worker := NewForwardWorker(fwd, m.portForwarder, m.verbose, m.statusUI, m.healthChecker, m.watchdog)
worker.Start()
// Store worker
@@ -312,8 +410,9 @@ func (m *Manager) stopWorkerInternal(id string, removeFromUI bool) error {
delete(m.workers, id)
m.workersMu.Unlock()
// Unregister from health checker
// Unregister from health checker and watchdog
m.healthChecker.Unregister(id)
m.watchdog.UnregisterWorker(id)
// Notify UI - either remove or update to disabled status
if m.statusUI != nil {
+144 -41
View File
@@ -6,11 +6,20 @@ import (
"os/exec"
"runtime"
"strings"
"github.com/nvm/kportal/internal/logger"
)
const (
// maxPIDLength is the maximum length of a valid PID string (9 digits covers PIDs up to 999,999,999)
maxPIDLength = 9
// minNetstatFields is the minimum number of fields expected in netstat output
minNetstatFields = 5
)
// isValidPID validates that a PID string contains only digits
func isValidPID(pid string) bool {
if len(pid) == 0 || len(pid) > 9 {
if len(pid) == 0 || len(pid) > maxPIDLength {
return false
}
for _, c := range pid {
@@ -21,6 +30,72 @@ func isValidPID(pid string) bool {
return true
}
// processInfo holds information about a process using a port
type processInfo struct {
pid string
name string
isValid bool
}
// formatProcessInfo formats process information for display
func formatProcessInfo(info processInfo) string {
if !info.isValid {
return "unknown"
}
if info.name != "" {
return fmt.Sprintf("%s (PID %s)", info.name, info.pid)
}
return fmt.Sprintf("PID %s", info.pid)
}
// formatProcessList formats a list of processes into a human-readable string.
// Returns "unknown" if the list is empty.
func formatProcessList(processes []processInfo) string {
if len(processes) == 0 {
return "unknown"
}
if len(processes) == 1 {
return formatProcessInfo(processes[0])
}
// Multiple processes - format as comma-separated list
parts := make([]string, len(processes))
for i, p := range processes {
parts[i] = formatProcessInfo(p)
}
return strings.Join(parts, ", ")
}
// getProcessNameByPID retrieves the process name for a given PID on Unix systems
func getProcessNameByPID(pid string) string {
cmd := exec.Command("ps", "-p", pid, "-o", "comm=")
output, err := cmd.Output()
if err != nil {
return ""
}
return strings.TrimSpace(string(output))
}
// getProcessNameByPIDWindows retrieves the process name for a given PID on Windows
func getProcessNameByPIDWindows(pid string) string {
cmd := exec.Command("tasklist", "/FI", fmt.Sprintf("PID eq %s", pid), "/FO", "CSV", "/NH")
output, err := cmd.Output()
if err != nil {
return ""
}
// Parse CSV output: "process.exe","1234","Console","1","12,345 K"
csvLine := strings.TrimSpace(string(output))
if csvLine == "" {
return ""
}
parts := strings.Split(csvLine, ",")
if len(parts) > 0 {
return strings.Trim(parts[0], "\"")
}
return ""
}
// PortConflict represents a local port that is already in use.
type PortConflict struct {
Port int // The conflicting port number
@@ -102,27 +177,55 @@ func (pc *PortChecker) getProcessUsingPortUnix(port int) string {
return "unknown"
}
// Get the first PID if multiple are returned
// Handle multiple PIDs (multiple processes on same port)
pids := strings.Split(pidStr, "\n")
pid := pids[0]
var validProcesses []processInfo
if !isValidPID(pid) {
return "unknown"
for _, pid := range pids {
pid = strings.TrimSpace(pid)
if pid == "" {
continue
}
if !isValidPID(pid) {
logger.Debug("Invalid PID format from lsof output", map[string]interface{}{
"port": port,
"raw_pid": pid,
})
continue
}
procName := getProcessNameByPID(pid)
validProcesses = append(validProcesses, processInfo{
pid: pid,
name: procName,
isValid: true,
})
}
// Get process name using ps
cmd = exec.Command("ps", "-p", pid, "-o", "comm=")
output, err = cmd.Output()
if err != nil {
return fmt.Sprintf("PID %s", pid)
return formatProcessList(validProcesses)
}
// isListeningState checks if a netstat line indicates a listening state.
// This handles both English and potentially other locales by checking for common patterns.
func isListeningState(line string, fields []string) bool {
upperLine := strings.ToUpper(line)
// Check for common listening state indicators across locales
// English: LISTENING, German: ABHÖREN, French: ÉCOUTE, etc.
// The most reliable check is the state field position (4th field, 0-indexed = 3)
// and that it's a TCP connection with 0.0.0.0:0 or *:* as foreign address
if len(fields) >= minNetstatFields {
state := strings.ToUpper(fields[3])
// Common listening state values across Windows locales
if state == "LISTENING" || state == "ABHÖREN" || state == "ÉCOUTE" ||
state == "ESCUCHANDO" || state == "ASCOLTO" || state == "NASŁUCHIWANIE" {
return true
}
}
procName := strings.TrimSpace(string(output))
if procName == "" {
return fmt.Sprintf("PID %s", pid)
}
return fmt.Sprintf("%s (PID %s)", procName, pid)
// Fallback: check if line contains LISTENING (most common case)
return strings.Contains(upperLine, "LISTENING")
}
// getProcessUsingPortWindows uses netstat to find the process using a port on Windows.
@@ -138,6 +241,8 @@ func (pc *PortChecker) getProcessUsingPortWindows(port int) string {
lines := strings.Split(string(output), "\n")
portStr := fmt.Sprintf(":%d", port)
var validProcesses []processInfo
for _, line := range lines {
if !strings.Contains(line, portStr) {
continue
@@ -146,44 +251,42 @@ func (pc *PortChecker) getProcessUsingPortWindows(port int) string {
// Parse the line to extract PID
// Format: TCP 0.0.0.0:8080 0.0.0.0:0 LISTENING 1234
fields := strings.Fields(line)
if len(fields) < 5 {
if len(fields) < minNetstatFields {
continue
}
// Check if this is a LISTENING state
if !strings.Contains(strings.ToUpper(line), "LISTENING") {
// Check if this is a LISTENING state (locale-aware)
if !isListeningState(line, fields) {
continue
}
// Verify the local address field actually contains our port
// (avoid matching port in foreign address)
localAddr := fields[1]
if !strings.HasSuffix(localAddr, portStr) {
continue
}
pid := fields[len(fields)-1]
if !isValidPID(pid) {
return "unknown"
logger.Debug("Invalid PID format from netstat output", map[string]interface{}{
"port": port,
"raw_pid": pid,
"line": line,
})
continue
}
// Get process name using tasklist
cmd = exec.Command("tasklist", "/FI", fmt.Sprintf("PID eq %s", pid), "/FO", "CSV", "/NH")
output, err = cmd.Output()
if err != nil {
return fmt.Sprintf("PID %s", pid)
}
// Parse CSV output: "process.exe","1234","Console","1","12,345 K"
csvLine := strings.TrimSpace(string(output))
if csvLine == "" {
return fmt.Sprintf("PID %s", pid)
}
parts := strings.Split(csvLine, ",")
if len(parts) > 0 {
procName := strings.Trim(parts[0], "\"")
return fmt.Sprintf("%s (PID %s)", procName, pid)
}
return fmt.Sprintf("PID %s", pid)
procName := getProcessNameByPIDWindows(pid)
validProcesses = append(validProcesses, processInfo{
pid: pid,
name: procName,
isValid: true,
})
}
return "unknown"
return formatProcessList(validProcesses)
}
// FormatConflicts formats port conflicts into a human-readable error message.
+174
View File
@@ -0,0 +1,174 @@
package forward
import (
"context"
"sync"
"time"
"github.com/nvm/kportal/internal/logger"
)
// Watchdog monitors worker goroutines to detect hung workers
type Watchdog struct {
mu sync.RWMutex
workers map[string]*workerState // key: forward ID
checkInterval time.Duration
hangThreshold time.Duration // How long without heartbeat before considered hung
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
}
// workerState tracks the health of a single worker
type workerState struct {
forwardID string
lastHeartbeat time.Time
heartbeatCount uint64
isHung bool
onHungCallback func(forwardID string)
}
// NewWatchdog creates a new goroutine watchdog
func NewWatchdog(checkInterval, hangThreshold time.Duration) *Watchdog {
ctx, cancel := context.WithCancel(context.Background())
return &Watchdog{
workers: make(map[string]*workerState),
checkInterval: checkInterval,
hangThreshold: hangThreshold,
ctx: ctx,
cancel: cancel,
}
}
// Start begins the watchdog monitoring loop
func (w *Watchdog) Start() {
w.wg.Add(1)
go w.monitorLoop()
}
// Stop stops the watchdog
func (w *Watchdog) Stop() {
w.cancel()
w.wg.Wait()
}
// RegisterWorker adds a worker to monitor
func (w *Watchdog) RegisterWorker(forwardID string, onHungCallback func(string)) {
w.mu.Lock()
defer w.mu.Unlock()
w.workers[forwardID] = &workerState{
forwardID: forwardID,
lastHeartbeat: time.Now(),
heartbeatCount: 0,
isHung: false,
onHungCallback: onHungCallback,
}
logger.Debug("Watchdog registered worker", map[string]interface{}{
"forward_id": forwardID,
})
}
// UnregisterWorker removes a worker from monitoring
func (w *Watchdog) UnregisterWorker(forwardID string) {
w.mu.Lock()
defer w.mu.Unlock()
delete(w.workers, forwardID)
logger.Debug("Watchdog unregistered worker", map[string]interface{}{
"forward_id": forwardID,
})
}
// Heartbeat records that a worker is alive and processing
// Workers should call this periodically (e.g., in their main loop)
func (w *Watchdog) Heartbeat(forwardID string) {
w.mu.Lock()
defer w.mu.Unlock()
if state, exists := w.workers[forwardID]; exists {
state.lastHeartbeat = time.Now()
state.heartbeatCount++
state.isHung = false
}
}
// GetWorkerState returns the current state of a worker (for testing)
func (w *Watchdog) GetWorkerState(forwardID string) (lastHeartbeat time.Time, count uint64, exists bool) {
w.mu.RLock()
defer w.mu.RUnlock()
if state, ok := w.workers[forwardID]; ok {
return state.lastHeartbeat, state.heartbeatCount, true
}
return time.Time{}, 0, false
}
// monitorLoop periodically checks all workers
func (w *Watchdog) monitorLoop() {
defer w.wg.Done()
ticker := time.NewTicker(w.checkInterval)
defer ticker.Stop()
for {
select {
case <-w.ctx.Done():
return
case <-ticker.C:
w.checkWorkers()
}
}
}
// hungWorkerInfo stores information about a hung worker for deferred callback execution
type hungWorkerInfo struct {
forwardID string
callback func(string)
}
// checkWorkers checks all registered workers for hung state
func (w *Watchdog) checkWorkers() {
// Collect hung workers while holding the lock
var hungWorkers []hungWorkerInfo
w.mu.Lock()
now := time.Now()
for forwardID, state := range w.workers {
timeSinceHeartbeat := now.Sub(state.lastHeartbeat)
// Check if worker is hung
if timeSinceHeartbeat > w.hangThreshold {
if !state.isHung {
// First time detecting hung state
state.isHung = true
logger.Warn("Watchdog detected hung worker", map[string]interface{}{
"forward_id": forwardID,
"time_since_heartbeat": timeSinceHeartbeat.String(),
"hang_threshold": w.hangThreshold.String(),
"heartbeat_count": state.heartbeatCount,
})
// Collect callback for deferred execution outside the lock
if state.onHungCallback != nil {
hungWorkers = append(hungWorkers, hungWorkerInfo{
forwardID: forwardID,
callback: state.onHungCallback,
})
}
}
}
}
w.mu.Unlock()
// Execute callbacks outside the lock to prevent deadlocks and ensure
// consistent state during callback execution. Callbacks are idempotent
// (they trigger reconnection via channels), so concurrent state changes
// between detection and callback execution are safe.
for _, hw := range hungWorkers {
hw.callback(hw.forwardID)
}
}
+310
View File
@@ -0,0 +1,310 @@
package forward
import (
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)
// WatchdogTestSuite contains tests for the watchdog
type WatchdogTestSuite struct {
suite.Suite
watchdog *Watchdog
}
func TestWatchdogSuite(t *testing.T) {
suite.Run(t, new(WatchdogTestSuite))
}
func (s *WatchdogTestSuite) SetupTest() {
// Create watchdog with fast intervals for testing
s.watchdog = NewWatchdog(100*time.Millisecond, 300*time.Millisecond)
s.watchdog.Start()
}
func (s *WatchdogTestSuite) TearDownTest() {
if s.watchdog != nil {
s.watchdog.Stop()
}
}
// TestRegisterUnregister tests basic registration and unregistration
func (s *WatchdogTestSuite) TestRegisterUnregister() {
callbackCalled := false
callback := func(forwardID string) {
callbackCalled = true
}
// Register worker
s.watchdog.RegisterWorker("test-forward", callback)
// Verify worker is registered
_, _, exists := s.watchdog.GetWorkerState("test-forward")
assert.True(s.T(), exists, "Worker should be registered")
// Unregister worker
s.watchdog.UnregisterWorker("test-forward")
// Verify worker is unregistered
_, _, exists = s.watchdog.GetWorkerState("test-forward")
assert.False(s.T(), exists, "Worker should be unregistered")
assert.False(s.T(), callbackCalled, "Callback should not have been called")
}
// TestHeartbeat tests that heartbeats update worker state
func (s *WatchdogTestSuite) TestHeartbeat() {
s.watchdog.RegisterWorker("test-forward", nil)
// Send initial heartbeat
s.watchdog.Heartbeat("test-forward")
lastHeartbeat1, count1, exists := s.watchdog.GetWorkerState("test-forward")
require.True(s.T(), exists)
assert.Equal(s.T(), uint64(1), count1)
// Wait a bit
time.Sleep(50 * time.Millisecond)
// Send another heartbeat
s.watchdog.Heartbeat("test-forward")
lastHeartbeat2, count2, exists := s.watchdog.GetWorkerState("test-forward")
require.True(s.T(), exists)
assert.Equal(s.T(), uint64(2), count2)
assert.True(s.T(), lastHeartbeat2.After(lastHeartbeat1), "Second heartbeat should be after first")
}
// TestHungWorkerDetection tests that hung workers are detected
func (s *WatchdogTestSuite) TestHungWorkerDetection() {
callbackCalled := make(chan string, 1)
callback := func(forwardID string) {
callbackCalled <- forwardID
}
s.watchdog.RegisterWorker("test-forward", callback)
// Send initial heartbeat
s.watchdog.Heartbeat("test-forward")
// Wait for worker to be considered hung (300ms threshold + 100ms check interval)
timeout := time.After(1 * time.Second)
select {
case forwardID := <-callbackCalled:
assert.Equal(s.T(), "test-forward", forwardID)
case <-timeout:
s.T().Fatal("Timeout waiting for hung worker callback")
}
}
// TestHealthyWorkerNotDetectedAsHung tests that workers sending heartbeats are not considered hung
func (s *WatchdogTestSuite) TestHealthyWorkerNotDetectedAsHung() {
callbackCalled := false
var mu sync.Mutex
callback := func(forwardID string) {
mu.Lock()
defer mu.Unlock()
callbackCalled = true
}
s.watchdog.RegisterWorker("test-forward", callback)
// Send periodic heartbeats (faster than hang threshold)
ticker := time.NewTicker(50 * time.Millisecond)
defer ticker.Stop()
done := make(chan bool)
go func() {
for i := 0; i < 10; i++ {
<-ticker.C
s.watchdog.Heartbeat("test-forward")
}
done <- true
}()
// Wait for all heartbeats to complete
<-done
// Check that callback was not called
mu.Lock()
assert.False(s.T(), callbackCalled, "Callback should not be called for healthy worker")
mu.Unlock()
}
// TestMultipleWorkers tests monitoring multiple workers simultaneously
func (s *WatchdogTestSuite) TestMultipleWorkers() {
callbacks := make(map[string]int)
var mu sync.Mutex
makeCallback := func(id string) func(string) {
return func(forwardID string) {
mu.Lock()
defer mu.Unlock()
callbacks[id]++
}
}
// Register multiple workers
s.watchdog.RegisterWorker("worker-1", makeCallback("worker-1"))
s.watchdog.RegisterWorker("worker-2", makeCallback("worker-2"))
s.watchdog.RegisterWorker("worker-3", makeCallback("worker-3"))
// worker-1: Keep sending heartbeats (healthy)
ticker1 := time.NewTicker(50 * time.Millisecond)
defer ticker1.Stop()
go func() {
for i := 0; i < 10; i++ {
<-ticker1.C
s.watchdog.Heartbeat("worker-1")
}
}()
// worker-2: Send initial heartbeat then stop (will become hung)
s.watchdog.Heartbeat("worker-2")
// worker-3: Send initial heartbeat then stop (will become hung)
s.watchdog.Heartbeat("worker-3")
// Wait for hung workers to be detected
time.Sleep(600 * time.Millisecond)
// Check results
mu.Lock()
defer mu.Unlock()
assert.Equal(s.T(), 0, callbacks["worker-1"], "worker-1 should not trigger callback (healthy)")
assert.Greater(s.T(), callbacks["worker-2"], 0, "worker-2 should trigger callback (hung)")
assert.Greater(s.T(), callbacks["worker-3"], 0, "worker-3 should trigger callback (hung)")
}
// TestCallbackOnlyOnFirstDetection tests that callback is only called once when hung is first detected
func (s *WatchdogTestSuite) TestCallbackOnlyOnFirstDetection() {
callbackCount := 0
var mu sync.Mutex
callback := func(forwardID string) {
mu.Lock()
defer mu.Unlock()
callbackCount++
}
s.watchdog.RegisterWorker("test-forward", callback)
// Send initial heartbeat
s.watchdog.Heartbeat("test-forward")
// Wait for multiple check cycles
time.Sleep(1 * time.Second)
// Check that callback was only called once
mu.Lock()
assert.Equal(s.T(), 1, callbackCount, "Callback should only be called once")
mu.Unlock()
}
// TestHeartbeatResetsHungState tests that sending heartbeat after hung detection resets state
func (s *WatchdogTestSuite) TestHeartbeatResetsHungState() {
callbackCount := 0
var mu sync.Mutex
callback := func(forwardID string) {
mu.Lock()
defer mu.Unlock()
callbackCount++
}
s.watchdog.RegisterWorker("test-forward", callback)
// Send initial heartbeat
s.watchdog.Heartbeat("test-forward")
// Wait for hung detection
time.Sleep(500 * time.Millisecond)
mu.Lock()
firstCount := callbackCount
mu.Unlock()
assert.Equal(s.T(), 1, firstCount, "First hung detection should trigger callback")
// Send heartbeat to reset hung state
s.watchdog.Heartbeat("test-forward")
// Wait for worker to become hung again
time.Sleep(500 * time.Millisecond)
mu.Lock()
secondCount := callbackCount
mu.Unlock()
assert.Equal(s.T(), 2, secondCount, "Second hung detection should trigger callback again")
}
// TestConcurrentOperations tests thread safety
func (s *WatchdogTestSuite) TestConcurrentOperations() {
var wg sync.WaitGroup
numWorkers := 10
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
forwardID := string(rune('a' + id))
s.watchdog.RegisterWorker(forwardID, nil)
for j := 0; j < 10; j++ {
s.watchdog.Heartbeat(forwardID)
time.Sleep(10 * time.Millisecond)
}
s.watchdog.UnregisterWorker(forwardID)
}(i)
}
wg.Wait()
// If we get here without deadlocks or panics, test passes
}
// TestStopWatchdog tests that stopping watchdog cleans up properly
func TestStopWatchdog(t *testing.T) {
watchdog := NewWatchdog(100*time.Millisecond, 300*time.Millisecond)
watchdog.Start()
callbackCalled := false
callback := func(forwardID string) {
callbackCalled = true
}
watchdog.RegisterWorker("test-forward", callback)
watchdog.Heartbeat("test-forward")
// Stop watchdog before hang detection
time.Sleep(100 * time.Millisecond)
watchdog.Stop()
// Wait to ensure no more callbacks after stop
time.Sleep(500 * time.Millisecond)
assert.False(t, callbackCalled, "Callback should not be called after watchdog is stopped")
}
// TestWatchdogWithZeroHeartbeats tests detecting hung worker that never sends heartbeats
func (s *WatchdogTestSuite) TestWatchdogWithZeroHeartbeats() {
callbackCalled := make(chan string, 1)
callback := func(forwardID string) {
callbackCalled <- forwardID
}
// Register worker but never send heartbeat
s.watchdog.RegisterWorker("test-forward", callback)
// Wait for hung detection
timeout := time.After(1 * time.Second)
select {
case forwardID := <-callbackCalled:
assert.Equal(s.T(), "test-forward", forwardID)
case <-timeout:
s.T().Fatal("Timeout waiting for hung worker callback")
}
}
+80 -13
View File
@@ -5,6 +5,7 @@ import (
"fmt"
"io"
"log"
"sync"
"time"
"github.com/nvm/kportal/internal/config"
@@ -20,21 +21,25 @@ const (
// ForwardWorker manages a single port-forward connection with automatic retry.
type ForwardWorker struct {
forward config.Forward
portForwarder *k8s.PortForwarder
ctx context.Context
cancel context.CancelFunc
stopChan chan struct{}
doneChan chan struct{}
verbose bool
lastPod string // Track the last pod we connected to
statusUI StatusUpdater
healthChecker *healthcheck.Checker
startTime time.Time // Track when the worker started
forward config.Forward
portForwarder *k8s.PortForwarder
ctx context.Context
cancel context.CancelFunc
stopChan chan struct{}
doneChan chan struct{}
reconnectChan chan string // Channel to trigger reconnection
verbose bool
lastPod string // Track the last pod we connected to
statusUI StatusUpdater
healthChecker *healthcheck.Checker
watchdog *Watchdog
startTime time.Time // Track when the worker started
forwardCancel context.CancelFunc // Cancel function for current forward attempt
forwardCancelMu sync.Mutex // Protects forwardCancel
}
// NewForwardWorker creates a new ForwardWorker for a single forward configuration.
func NewForwardWorker(fwd config.Forward, portForwarder *k8s.PortForwarder, verbose bool, statusUI StatusUpdater, healthChecker *healthcheck.Checker) *ForwardWorker {
func NewForwardWorker(fwd config.Forward, portForwarder *k8s.PortForwarder, verbose bool, statusUI StatusUpdater, healthChecker *healthcheck.Checker, watchdog *Watchdog) *ForwardWorker {
ctx, cancel := context.WithCancel(context.Background())
return &ForwardWorker{
@@ -44,13 +49,32 @@ func NewForwardWorker(fwd config.Forward, portForwarder *k8s.PortForwarder, verb
cancel: cancel,
stopChan: make(chan struct{}),
doneChan: make(chan struct{}),
reconnectChan: make(chan string, 1), // Buffered to avoid blocking
verbose: verbose,
statusUI: statusUI,
healthChecker: healthChecker,
watchdog: watchdog,
startTime: time.Now(),
}
}
// TriggerReconnect triggers a reconnection (e.g., due to stale connection)
func (w *ForwardWorker) TriggerReconnect(reason string) {
// Cancel current forward if running
w.forwardCancelMu.Lock()
if w.forwardCancel != nil {
w.forwardCancel()
}
w.forwardCancelMu.Unlock()
// Send reconnect signal (non-blocking)
select {
case w.reconnectChan <- reason:
default:
// Channel already has pending reconnect
}
}
// Start begins the port-forward worker in a goroutine.
// The worker will continuously retry on failures with exponential backoff.
func (w *ForwardWorker) Start() {
@@ -68,6 +92,12 @@ func (w *ForwardWorker) Stop() {
func (w *ForwardWorker) run() {
defer close(w.doneChan)
// Start heartbeat goroutine to continuously send heartbeats to watchdog
// This prevents false "hung worker" detection when connections are long-lived
if w.watchdog != nil {
go w.heartbeatLoop()
}
backoff := retry.NewBackoff()
for {
@@ -173,6 +203,26 @@ func (w *ForwardWorker) run() {
}
}
// heartbeatLoop sends periodic heartbeats to the watchdog to prove the worker is alive
// This runs in a separate goroutine and continues throughout the worker's lifetime
func (w *ForwardWorker) heartbeatLoop() {
// Send heartbeats every 15 seconds (well within typical 60s watchdog timeout)
ticker := time.NewTicker(15 * time.Second)
defer ticker.Stop()
// Send immediate heartbeat
w.watchdog.Heartbeat(w.forward.ID())
for {
select {
case <-ticker.C:
w.watchdog.Heartbeat(w.forward.ID())
case <-w.ctx.Done():
return
}
}
}
// establishForward establishes a port-forward connection.
// This blocks until the connection is closed or an error occurs.
func (w *ForwardWorker) establishForward(podName string) error {
@@ -184,11 +234,24 @@ func (w *ForwardWorker) establishForward(podName string) error {
forwardCtx, forwardCancel := context.WithCancel(w.ctx)
defer forwardCancel()
// Start a goroutine to monitor for stop signal
// Store cancel function so TriggerReconnect can use it
w.forwardCancelMu.Lock()
w.forwardCancel = forwardCancel
w.forwardCancelMu.Unlock()
defer func() {
w.forwardCancelMu.Lock()
w.forwardCancel = nil
w.forwardCancelMu.Unlock()
}()
// Start a goroutine to monitor for stop signal and reconnect triggers
go func() {
select {
case <-w.stopChan:
close(stopChan)
case <-w.reconnectChan:
close(stopChan)
case <-forwardCtx.Done():
close(stopChan)
}
@@ -230,6 +293,10 @@ func (w *ForwardWorker) establishForward(podName string) error {
if w.verbose {
log.Printf("[%s] Port-forward connection established", w.forward.ID())
}
// Mark connection as established in health checker
if w.healthChecker != nil {
w.healthChecker.MarkConnected(w.forward.ID())
}
case err := <-errChan:
return fmt.Errorf("failed to establish forward: %w", err)
case <-w.ctx.Done():
+219 -70
View File
@@ -3,13 +3,17 @@ package healthcheck
import (
"context"
"fmt"
"io"
"net"
"sync"
"time"
"github.com/nvm/kportal/internal/config"
)
const (
startupGracePeriod = 10 * time.Second
dataTransferSize = 1024 // bytes to read in data transfer test
)
// Status represents the health status of a port forward
@@ -20,15 +24,26 @@ const (
StatusUnhealthy Status = "Error"
StatusStarting Status = "Starting"
StatusReconnect Status = "Reconnecting"
StatusStale Status = "Stale" // Connection is old or idle
)
// CheckMethod represents the health check method
type CheckMethod string
const (
CheckMethodTCPDial CheckMethod = "tcp-dial" // Simple TCP connection test
CheckMethodDataTransfer CheckMethod = "data-transfer" // Try to read data from connection
)
// PortHealth represents the health status of a single port
type PortHealth struct {
Port int
LastCheck time.Time
Status Status
ErrorMessage string
RegisteredAt time.Time // When this port was registered
Port int
LastCheck time.Time
Status Status
ErrorMessage string
RegisteredAt time.Time // When this port was registered
ConnectionTime time.Time // When current connection was established
LastActivity time.Time // Last time data was transferred
}
// StatusCallback is called when a port's health status changes
@@ -36,26 +51,52 @@ type StatusCallback func(forwardID string, status Status, errorMsg string)
// Checker performs periodic health checks on local ports
type Checker struct {
mu sync.RWMutex
ports map[string]*PortHealth // key: forward ID
callbacks map[string]StatusCallback
interval time.Duration
timeout time.Duration
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
mu sync.RWMutex
ports map[string]*PortHealth // key: forward ID
callbacks map[string]StatusCallback
interval time.Duration
timeout time.Duration
method CheckMethod
maxConnectionAge time.Duration
maxIdleTime time.Duration
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
}
// NewChecker creates a new health checker
// CheckerOptions configures the health checker
type CheckerOptions struct {
Interval time.Duration
Timeout time.Duration
Method CheckMethod
MaxConnectionAge time.Duration
MaxIdleTime time.Duration
}
// NewChecker creates a new health checker with default options
func NewChecker(interval, timeout time.Duration) *Checker {
return NewCheckerWithOptions(CheckerOptions{
Interval: interval,
Timeout: timeout,
Method: CheckMethodDataTransfer,
MaxConnectionAge: config.DefaultMaxConnectionAge,
MaxIdleTime: config.DefaultMaxIdleTime,
})
}
// NewCheckerWithOptions creates a new health checker with custom options
func NewCheckerWithOptions(opts CheckerOptions) *Checker {
ctx, cancel := context.WithCancel(context.Background())
return &Checker{
ports: make(map[string]*PortHealth),
callbacks: make(map[string]StatusCallback),
interval: interval,
timeout: timeout,
ctx: ctx,
cancel: cancel,
ports: make(map[string]*PortHealth),
callbacks: make(map[string]StatusCallback),
interval: opts.Interval,
timeout: opts.Timeout,
method: opts.Method,
maxConnectionAge: opts.MaxConnectionAge,
maxIdleTime: opts.MaxIdleTime,
ctx: ctx,
cancel: cancel,
}
}
@@ -64,11 +105,14 @@ func (c *Checker) Register(forwardID string, port int, callback StatusCallback)
c.mu.Lock()
defer c.mu.Unlock()
now := time.Now()
c.ports[forwardID] = &PortHealth{
Port: port,
LastCheck: time.Time{},
Status: StatusStarting,
RegisteredAt: time.Now(),
Port: port,
LastCheck: time.Time{},
Status: StatusStarting,
RegisteredAt: now,
ConnectionTime: now,
LastActivity: now,
}
c.callbacks[forwardID] = callback
@@ -77,6 +121,28 @@ func (c *Checker) Register(forwardID string, port int, callback StatusCallback)
go c.checkLoop(forwardID)
}
// MarkConnected marks a forward as having established a new connection
func (c *Checker) MarkConnected(forwardID string) {
c.mu.Lock()
defer c.mu.Unlock()
if health, exists := c.ports[forwardID]; exists {
now := time.Now()
health.ConnectionTime = now
health.LastActivity = now
}
}
// RecordActivity records data transfer activity for a forward
func (c *Checker) RecordActivity(forwardID string) {
c.mu.Lock()
defer c.mu.Unlock()
if health, exists := c.ports[forwardID]; exists {
health.LastActivity = time.Now()
}
}
// Unregister removes a port from monitoring
func (c *Checker) Unregister(forwardID string) {
c.mu.Lock()
@@ -86,44 +152,34 @@ func (c *Checker) Unregister(forwardID string) {
delete(c.callbacks, forwardID)
}
// MarkReconnecting marks a forward as reconnecting (called by worker)
func (c *Checker) MarkReconnecting(forwardID string) {
// markStatus is a helper to set a forward's status and notify on change.
func (c *Checker) markStatus(forwardID string, newStatus Status) {
c.mu.Lock()
if health, exists := c.ports[forwardID]; exists {
oldStatus := health.Status
health.Status = StatusReconnect
health.LastCheck = time.Now()
health, exists := c.ports[forwardID]
if !exists {
c.mu.Unlock()
if oldStatus != StatusReconnect {
c.notifyStatusChange(forwardID, StatusReconnect, "")
}
return
}
oldStatus := health.Status
health.Status = newStatus
health.LastCheck = time.Now()
c.mu.Unlock()
if oldStatus != newStatus {
c.notifyStatusChange(forwardID, newStatus, "")
}
}
// MarkReconnecting marks a forward as reconnecting (called by worker)
func (c *Checker) MarkReconnecting(forwardID string) {
c.markStatus(forwardID, StatusReconnect)
}
// MarkStarting marks a forward as starting (called by worker)
func (c *Checker) MarkStarting(forwardID string) {
c.mu.Lock()
if health, exists := c.ports[forwardID]; exists {
oldStatus := health.Status
health.Status = StatusStarting
health.LastCheck = time.Now()
c.mu.Unlock()
if oldStatus != StatusStarting {
c.notifyStatusChange(forwardID, StatusStarting, "")
}
return
}
c.mu.Unlock()
c.markStatus(forwardID, StatusStarting)
}
// GetStatus returns the current health status of a forward
@@ -137,6 +193,17 @@ func (c *Checker) GetStatus(forwardID string) (Status, bool) {
return StatusUnhealthy, false
}
// GetLastCheckTime returns the last health check time for a forward
func (c *Checker) GetLastCheckTime(forwardID string) (time.Time, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
if health, exists := c.ports[forwardID]; exists {
return health.LastCheck, true
}
return time.Time{}, false
}
// GetAllErrors returns all forwards with errors and their error messages
func (c *Checker) GetAllErrors() map[string]string {
c.mu.RLock()
@@ -197,38 +264,64 @@ func (c *Checker) checkPort(forwardID string) {
port := health.Port
oldStatus := health.Status
registeredAt := health.RegisteredAt
connectionTime := health.ConnectionTime
lastActivity := health.LastActivity
c.mu.RUnlock()
// Attempt to connect to the local port
ctx, cancel := context.WithTimeout(c.ctx, c.timeout)
defer cancel()
var d net.Dialer
conn, err := d.DialContext(ctx, "tcp", fmt.Sprintf("127.0.0.1:%d", port))
now := time.Now()
newStatus := StatusHealthy
errorMsg := ""
if err != nil {
// Grace period: if forward is less than 10 seconds old, keep it as "Starting"
// This avoids scary "Error" messages during initial connection attempts
timeSinceStart := time.Since(registeredAt)
if timeSinceStart < startupGracePeriod {
newStatus = StatusStarting
} else {
newStatus = StatusUnhealthy
}
errorMsg = err.Error()
// Check for stale connections based on age or idle time
connectionAge := now.Sub(connectionTime)
idleTime := now.Sub(lastActivity)
// Only enforce max connection age if the connection is ALSO idle
// This prevents interrupting active transfers (e.g., database dumps)
if c.maxConnectionAge > 0 && connectionAge > c.maxConnectionAge && idleTime > c.maxIdleTime {
newStatus = StatusStale
errorMsg = fmt.Sprintf("connection age %v exceeds max %v (and idle for %v)",
connectionAge.Round(time.Second), c.maxConnectionAge, idleTime.Round(time.Second))
} else if c.maxIdleTime > 0 && idleTime > c.maxIdleTime {
newStatus = StatusStale
errorMsg = fmt.Sprintf("idle time %v exceeds max %v", idleTime.Round(time.Second), c.maxIdleTime)
} else {
conn.Close()
// Perform connectivity check
var checkErr error
switch c.method {
case CheckMethodDataTransfer:
checkErr = c.checkDataTransfer(port)
case CheckMethodTCPDial:
checkErr = c.checkTCPDial(port)
default:
checkErr = c.checkTCPDial(port)
}
if checkErr != nil {
// Grace period: if forward is less than 10 seconds old, keep it as "Starting"
// This avoids scary "Error" messages during initial connection attempts
timeSinceStart := now.Sub(registeredAt)
if timeSinceStart < startupGracePeriod {
newStatus = StatusStarting
} else {
newStatus = StatusUnhealthy
}
errorMsg = checkErr.Error()
}
}
// Update health status
c.mu.Lock()
if health, exists := c.ports[forwardID]; exists {
health.Status = newStatus
health.LastCheck = time.Now()
health.LastCheck = now
health.ErrorMessage = errorMsg
// Successful health check indicates connection is active
// This prevents false positives where healthy connections are marked as idle
if newStatus == StatusHealthy {
health.LastActivity = now
}
}
c.mu.Unlock()
@@ -238,6 +331,62 @@ func (c *Checker) checkPort(forwardID string) {
}
}
// checkTCPDial performs a simple TCP dial test
func (c *Checker) checkTCPDial(port int) error {
ctx, cancel := context.WithTimeout(c.ctx, c.timeout)
defer cancel()
var d net.Dialer
conn, err := d.DialContext(ctx, "tcp", fmt.Sprintf("127.0.0.1:%d", port))
if err != nil {
return err
}
conn.Close()
return nil
}
// checkDataTransfer attempts to read data from the connection to verify tunnel health
func (c *Checker) checkDataTransfer(port int) error {
ctx, cancel := context.WithTimeout(c.ctx, c.timeout)
defer cancel()
var d net.Dialer
conn, err := d.DialContext(ctx, "tcp", fmt.Sprintf("127.0.0.1:%d", port))
if err != nil {
return err
}
defer conn.Close()
// Set a short read deadline to detect hung connections
// We don't expect to receive data, but we want to verify the connection isn't hung
conn.SetReadDeadline(time.Now().Add(c.timeout))
// Try to read a small amount of data
// Most servers will either:
// 1. Send a banner (SSH, FTP, etc) - we'll read it successfully
// 2. Wait for client to send first (HTTP, postgres) - we'll timeout (which is OK)
// 3. Hung/stale connection - will timeout with different error
buf := make([]byte, dataTransferSize)
_, err = conn.Read(buf)
// We expect either:
// - No error (banner received)
// - EOF (connection closed by server after connect)
// - Timeout (server waiting for client)
// All of these indicate the tunnel is working
if err == nil || err == io.EOF {
return nil
}
// Timeout is acceptable - server is waiting for us to send data first
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
return nil
}
// Other errors indicate a problem
return fmt.Errorf("data transfer check failed: %w", err)
}
// notifyStatusChange calls the callback for a forward
func (c *Checker) notifyStatusChange(forwardID string, status Status, errorMsg string) {
c.mu.RLock()
+551
View File
@@ -0,0 +1,551 @@
package healthcheck
import (
"fmt"
"net"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)
// HealthCheckTestSuite contains tests for the health checker
type HealthCheckTestSuite struct {
suite.Suite
checker *Checker
listener net.Listener
port int
}
func TestHealthCheckSuite(t *testing.T) {
suite.Run(t, new(HealthCheckTestSuite))
}
func (s *HealthCheckTestSuite) SetupTest() {
// Create a test listener on a random port
ln, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(s.T(), err)
s.listener = ln
s.port = ln.Addr().(*net.TCPAddr).Port
// Create checker with fast intervals for testing
s.checker = NewCheckerWithOptions(CheckerOptions{
Interval: 100 * time.Millisecond,
Timeout: 50 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 500 * time.Millisecond,
MaxIdleTime: 300 * time.Millisecond,
})
}
func (s *HealthCheckTestSuite) TearDownTest() {
if s.checker != nil {
s.checker.Stop()
}
if s.listener != nil {
s.listener.Close()
}
}
// TestRegisterAndUnregister tests basic registration and unregistration
func (s *HealthCheckTestSuite) TestRegisterAndUnregister() {
callbackCalled := false
var callbackStatus Status
var mu sync.Mutex
callback := func(forwardID string, status Status, errorMsg string) {
mu.Lock()
defer mu.Unlock()
callbackCalled = true
callbackStatus = status
}
// Register port
s.checker.Register("test-forward", s.port, callback)
// Wait for health check to run
time.Sleep(200 * time.Millisecond)
// Verify callback was called with healthy status
mu.Lock()
assert.True(s.T(), callbackCalled, "Callback should have been called")
assert.Equal(s.T(), StatusHealthy, callbackStatus)
mu.Unlock()
// Unregister
s.checker.Unregister("test-forward")
// Verify port is no longer monitored
status, exists := s.checker.GetStatus("test-forward")
assert.False(s.T(), exists, "Port should no longer exist after unregister")
assert.Equal(s.T(), StatusUnhealthy, status)
}
// TestTCPDialMethod tests the TCP dial health check method
func (s *HealthCheckTestSuite) TestTCPDialMethod() {
tests := []struct {
name string
setupPort bool
expectedStatus Status
description string
}{
{
name: "port available - healthy",
setupPort: true,
expectedStatus: StatusHealthy,
description: "When port is listening, status should be healthy",
},
{
name: "port unavailable - unhealthy",
setupPort: false,
expectedStatus: StatusUnhealthy,
description: "When port is not listening, status should be unhealthy",
},
}
for _, tt := range tests {
s.Run(tt.name, func() {
var testPort int
var testListener net.Listener
if tt.setupPort {
// Use the existing listener
testPort = s.port
} else {
// Use a port that's not listening
testPort = 54321 // Likely unused port
}
// Create a new checker for this test
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 100 * time.Millisecond,
Timeout: 50 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 0, // Disable for this test
MaxIdleTime: 0, // Disable for this test
})
defer checker.Stop()
checker.Register("test-forward", testPort, nil)
// Wait for health checks to complete
if !tt.setupPort {
// For unhealthy case, wait for grace period
time.Sleep(startupGracePeriod + 200*time.Millisecond)
} else {
time.Sleep(200 * time.Millisecond)
}
// Check status directly
status, exists := checker.GetStatus("test-forward")
assert.True(s.T(), exists)
assert.Equal(s.T(), tt.expectedStatus, status, tt.description)
if testListener != nil {
testListener.Close()
}
})
}
}
// TestDataTransferMethod tests the data transfer health check method
func (s *HealthCheckTestSuite) TestDataTransferMethod() {
tests := []struct {
name string
serverBehavior string // "banner", "silent", "close", "none"
expectedStatus Status
}{
{
name: "server sends banner - healthy",
serverBehavior: "banner",
expectedStatus: StatusHealthy,
},
{
name: "server waits silently - healthy (timeout OK)",
serverBehavior: "silent",
expectedStatus: StatusHealthy,
},
{
name: "server closes connection - healthy (EOF OK)",
serverBehavior: "close",
expectedStatus: StatusHealthy,
},
{
name: "no server listening - unhealthy",
serverBehavior: "none",
expectedStatus: StatusUnhealthy,
},
}
for _, tt := range tests {
s.Run(tt.name, func() {
var testPort int
var testListener net.Listener
var err error
if tt.serverBehavior != "none" {
// Start test server
testListener, err = net.Listen("tcp", "127.0.0.1:0")
require.NoError(s.T(), err)
testPort = testListener.Addr().(*net.TCPAddr).Port
// Handle connections based on behavior
go func() {
for {
conn, err := testListener.Accept()
if err != nil {
return
}
switch tt.serverBehavior {
case "banner":
conn.Write([]byte("220 Welcome\r\n"))
time.Sleep(50 * time.Millisecond)
conn.Close()
case "close":
conn.Close()
case "silent":
// Just keep connection open
time.Sleep(200 * time.Millisecond)
conn.Close()
}
}
}()
defer testListener.Close()
} else {
testPort = 54322 // Unused port
}
// Create checker with data transfer method
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 100 * time.Millisecond,
Timeout: 50 * time.Millisecond,
Method: CheckMethodDataTransfer,
MaxConnectionAge: 0, // Disable for this test
MaxIdleTime: 0, // Disable for this test
})
defer checker.Stop()
checker.Register("test-forward", testPort, nil)
// Wait for health checks to complete
if tt.serverBehavior == "none" {
// For unhealthy case, wait for grace period
time.Sleep(startupGracePeriod + 200*time.Millisecond)
} else {
time.Sleep(300 * time.Millisecond)
}
// Check status directly
status, exists := checker.GetStatus("test-forward")
assert.True(s.T(), exists)
assert.Equal(s.T(), tt.expectedStatus, status)
})
}
}
// TestConnectionAgeDetection tests max connection age detection
func (s *HealthCheckTestSuite) TestConnectionAgeDetection() {
statusChanges := make(chan Status, 10)
callback := func(forwardID string, status Status, errorMsg string) {
statusChanges <- status
}
// Create checker with very short max connection age
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 150 * time.Millisecond, // Very short for testing
MaxIdleTime: 0, // Disable idle detection
})
defer checker.Stop()
checker.Register("test-forward", s.port, callback)
// Wait for initial healthy status
var gotHealthy, gotStale bool
timeout := time.After(1 * time.Second)
for {
select {
case status := <-statusChanges:
if status == StatusHealthy || status == StatusStarting {
gotHealthy = true
}
if status == StatusStale {
gotStale = true
}
if gotHealthy && gotStale {
return // Test passed
}
case <-timeout:
s.T().Fatalf("Expected StatusStale after max connection age exceeded. gotHealthy=%v, gotStale=%v",
gotHealthy, gotStale)
}
}
}
// TestIdleTimeDetection tests that connections with passing health checks are NOT marked as stale
// This verifies that successful health checks update LastActivity, preventing false idle detection
func (s *HealthCheckTestSuite) TestIdleTimeDetection() {
statusChanges := make(chan Status, 10)
callback := func(forwardID string, status Status, errorMsg string) {
statusChanges <- status
}
// Create checker with very short max idle time
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 0, // Disable age detection
MaxIdleTime: 150 * time.Millisecond, // Very short for testing
})
defer checker.Stop()
checker.Register("test-forward", s.port, callback)
// Wait long enough that idle time WOULD be exceeded if health checks didn't update LastActivity
time.Sleep(500 * time.Millisecond)
// Verify connection is still healthy, not stale
// This proves that successful health checks are updating LastActivity
status, exists := checker.GetStatus("test-forward")
require.True(s.T(), exists)
assert.Equal(s.T(), StatusHealthy, status, "Connection with passing health checks should NOT be marked as stale")
// Verify we never received a StatusStale callback
select {
case status := <-statusChanges:
if status == StatusStale {
s.T().Fatal("Connection should NOT be marked as stale when health checks are passing")
}
default:
// No stale status - this is correct
}
}
// TestMarkConnected tests that MarkConnected resets connection time
func (s *HealthCheckTestSuite) TestMarkConnected() {
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 200 * time.Millisecond,
MaxIdleTime: 0,
})
defer checker.Stop()
statusChanges := make(chan Status, 10)
callback := func(forwardID string, status Status, errorMsg string) {
statusChanges <- status
}
checker.Register("test-forward", s.port, callback)
// Wait a bit
time.Sleep(100 * time.Millisecond)
// Mark as reconnected (resets connection time)
checker.MarkConnected("test-forward")
// Wait for connection age to exceed (relative to first connection time)
time.Sleep(200 * time.Millisecond)
// Check status - should still be healthy because we reset connection time
status, exists := checker.GetStatus("test-forward")
assert.True(s.T(), exists)
// Note: Might be StatusStale by now, but the key is that MarkConnected delayed it
// This is a timing-sensitive test, so we just verify the functionality exists
_ = status
}
// TestRecordActivity tests that RecordActivity resets idle time
func (s *HealthCheckTestSuite) TestRecordActivity() {
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 0,
MaxIdleTime: 200 * time.Millisecond,
})
defer checker.Stop()
statusChanges := make(chan Status, 10)
callback := func(forwardID string, status Status, errorMsg string) {
statusChanges <- status
}
checker.Register("test-forward", s.port, callback)
// Periodically record activity to prevent idle detection
ticker := time.NewTicker(80 * time.Millisecond)
defer ticker.Stop()
go func() {
for i := 0; i < 5; i++ {
<-ticker.C
checker.RecordActivity("test-forward")
}
}()
// Wait longer than idle timeout
time.Sleep(500 * time.Millisecond)
// Should still be healthy due to activity
status, exists := checker.GetStatus("test-forward")
assert.True(s.T(), exists)
// May transition to stale eventually, but activity recording should have delayed it
_ = status
}
// TestMarkReconnecting tests the MarkReconnecting functionality
func (s *HealthCheckTestSuite) TestMarkReconnecting() {
statusChanges := make(chan Status, 10)
callback := func(forwardID string, status Status, errorMsg string) {
statusChanges <- status
}
s.checker.Register("test-forward", s.port, callback)
// Wait for initial status
time.Sleep(150 * time.Millisecond)
// Mark as reconnecting
s.checker.MarkReconnecting("test-forward")
// Should receive reconnecting status
timeout := time.After(500 * time.Millisecond)
gotReconnect := false
for !gotReconnect {
select {
case status := <-statusChanges:
if status == StatusReconnect {
gotReconnect = true
}
case <-timeout:
s.T().Fatal("Expected StatusReconnect")
}
}
}
// TestStartingGracePeriod tests that errors during grace period show as "Starting"
func (s *HealthCheckTestSuite) TestStartingGracePeriod() {
// Use a port that's not listening
unavailablePort := 54323
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 0,
MaxIdleTime: 0,
})
defer checker.Stop()
// Register without callback - we'll check status directly
checker.Register("test-forward", unavailablePort, nil)
// Immediately check status - should be Starting or not yet checked
status, exists := checker.GetStatus("test-forward")
assert.True(s.T(), exists)
// Initially should be Starting
assert.Equal(s.T(), StatusStarting, status)
// Wait for grace period to expire
time.Sleep(startupGracePeriod + 200*time.Millisecond)
// Now should be Unhealthy
status, exists = checker.GetStatus("test-forward")
assert.True(s.T(), exists)
assert.Equal(s.T(), StatusUnhealthy, status)
}
// TestGetAllErrors tests retrieving all error messages
func (s *HealthCheckTestSuite) TestGetAllErrors() {
// Create a new checker with faster intervals for this test
checker := NewCheckerWithOptions(CheckerOptions{
Interval: 100 * time.Millisecond,
Timeout: 50 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 0,
MaxIdleTime: 0,
})
defer checker.Stop()
// Register multiple forwards
checker.Register("forward1", s.port, nil)
checker.Register("forward2", 54324, nil) // Unavailable port
// Wait for grace period to expire
time.Sleep(startupGracePeriod + 300*time.Millisecond)
errors := checker.GetAllErrors()
// forward2 should have an error
_, hasError := errors["forward2"]
assert.True(s.T(), hasError, "forward2 should have an error")
// forward1 should not have an error
_, hasError = errors["forward1"]
assert.False(s.T(), hasError, "forward1 should not have an error")
}
// TestConcurrentOperations tests thread safety
func (s *HealthCheckTestSuite) TestConcurrentOperations() {
var wg sync.WaitGroup
numGoroutines := 10
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
forwardID := fmt.Sprintf("forward-%d", id)
s.checker.Register(forwardID, s.port, nil)
time.Sleep(50 * time.Millisecond)
s.checker.MarkConnected(forwardID)
s.checker.RecordActivity(forwardID)
status, _ := s.checker.GetStatus(forwardID)
_ = status
s.checker.Unregister(forwardID)
}(i)
}
wg.Wait()
// If we get here without deadlocks or panics, test passes
}
// TestDefaultOptions tests that NewChecker uses sensible defaults
func TestDefaultOptions(t *testing.T) {
checker := NewChecker(5*time.Second, 2*time.Second)
defer checker.Stop()
assert.Equal(t, 5*time.Second, checker.interval)
assert.Equal(t, 2*time.Second, checker.timeout)
assert.Equal(t, CheckMethodDataTransfer, checker.method)
assert.Equal(t, 25*time.Minute, checker.maxConnectionAge)
assert.Equal(t, 10*time.Minute, checker.maxIdleTime)
}
// TestCustomOptions tests NewCheckerWithOptions
func TestCustomOptions(t *testing.T) {
opts := CheckerOptions{
Interval: 1 * time.Second,
Timeout: 500 * time.Millisecond,
Method: CheckMethodTCPDial,
MaxConnectionAge: 5 * time.Minute,
MaxIdleTime: 2 * time.Minute,
}
checker := NewCheckerWithOptions(opts)
defer checker.Stop()
assert.Equal(t, 1*time.Second, checker.interval)
assert.Equal(t, 500*time.Millisecond, checker.timeout)
assert.Equal(t, CheckMethodTCPDial, checker.method)
assert.Equal(t, 5*time.Minute, checker.maxConnectionAge)
assert.Equal(t, 2*time.Minute, checker.maxIdleTime)
}
+2 -6
View File
@@ -292,12 +292,8 @@ func CheckPortAvailability(port int) (bool, string, error) {
addr := fmt.Sprintf(":%d", port)
listener, err := net.Listen("tcp", addr)
if err != nil {
// Port is in use
// Try to get process info (best-effort)
processInfo := "unknown process"
// Note: Getting process info requires platform-specific code
// For now, just return a generic message
return false, processInfo, nil
// Port is in use - return error details
return false, err.Error(), nil
}
// Port is available, close the listener
+42 -5
View File
@@ -4,9 +4,13 @@ import (
"context"
"fmt"
"io"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/nvm/kportal/internal/config"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -17,18 +21,32 @@ import (
// PortForwarder handles Kubernetes port-forwarding operations.
type PortForwarder struct {
clientPool *ClientPool
resolver *ResourceResolver
clientPool *ClientPool
resolver *ResourceResolver
tcpKeepalive time.Duration // TCP keepalive interval
dialTimeout time.Duration // Connection dial timeout
}
// NewPortForwarder creates a new PortForwarder instance.
// NewPortForwarder creates a new PortForwarder instance with default settings.
func NewPortForwarder(clientPool *ClientPool, resolver *ResourceResolver) *PortForwarder {
return &PortForwarder{
clientPool: clientPool,
resolver: resolver,
clientPool: clientPool,
resolver: resolver,
tcpKeepalive: config.DefaultTCPKeepalive,
dialTimeout: config.DefaultDialTimeout,
}
}
// SetTCPKeepalive configures the TCP keepalive interval for new connections.
func (pf *PortForwarder) SetTCPKeepalive(keepalive time.Duration) {
pf.tcpKeepalive = keepalive
}
// SetDialTimeout configures the connection dial timeout.
func (pf *PortForwarder) SetDialTimeout(timeout time.Duration) {
pf.dialTimeout = timeout
}
// ForwardRequest contains the parameters for a port-forward request.
type ForwardRequest struct {
ContextName string // Kubernetes context name
@@ -124,6 +142,9 @@ func (pf *PortForwarder) forwardToService(ctx context.Context, req *ForwardReque
}
// Get pods backing the service using label selector
if len(service.Spec.Selector) == 0 {
return fmt.Errorf("service %s has no selector (headless service without selector cannot be port-forwarded)", serviceName)
}
selector := metav1.FormatLabelSelector(&metav1.LabelSelector{MatchLabels: service.Spec.Selector})
pods, err := client.CoreV1().Pods(req.Namespace).List(ctx, metav1.ListOptions{
LabelSelector: selector,
@@ -164,6 +185,19 @@ func (pf *PortForwarder) forwardToService(ctx context.Context, req *ForwardReque
// executePortForward performs the actual port-forward operation.
func (pf *PortForwarder) executePortForward(config *rest.Config, url *url.URL, req *ForwardRequest) error {
// Configure TCP settings on the underlying connection
// This is set in the rest.Config which will be used by the SPDY transport
if config.Dial == nil {
// Create a custom dialer with configurable timeout and keepalive
// - Timeout: How long to wait for connection to establish
// - KeepAlive: TCP keepalive helps OS detect dead connections at network layer
dialer := &net.Dialer{
Timeout: pf.dialTimeout, // Configurable dial timeout
KeepAlive: pf.tcpKeepalive, // Configurable keepalive interval
}
config.Dial = dialer.DialContext
}
// Create SPDY roundtripper
transport, upgrader, err := spdy.RoundTripperFor(config)
if err != nil {
@@ -228,6 +262,9 @@ func (pf *PortForwarder) GetPodForResource(ctx context.Context, contextName, nam
return "", fmt.Errorf("failed to get service: %w", err)
}
if len(service.Spec.Selector) == 0 {
return "", fmt.Errorf("service %s has no selector (headless service without selector cannot be port-forwarded)", resourceName)
}
selector := metav1.FormatLabelSelector(&metav1.LabelSelector{MatchLabels: service.Spec.Selector})
pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
LabelSelector: selector,
+37 -3
View File
@@ -46,6 +46,11 @@ type BubbleTeaUI struct {
version string
errors map[string]string // Track error messages by forward ID
// Update notification
updateAvailable bool
updateVersion string
updateURL string
// Modal wizard state
viewMode ViewMode
addWizard *AddWizardState
@@ -96,6 +101,16 @@ func (ui *BubbleTeaUI) SetWizardDependencies(discovery *k8s.Discovery, mutator *
ui.configPath = configPath
}
// SetUpdateAvailable sets the update notification to be displayed
func (ui *BubbleTeaUI) SetUpdateAvailable(version, url string) {
ui.mu.Lock()
defer ui.mu.Unlock()
ui.updateAvailable = true
ui.updateVersion = version
ui.updateURL = url
}
// Start starts the bubbletea application
func (ui *BubbleTeaUI) Start() error {
m := model{ui: ui}
@@ -169,8 +184,9 @@ func (ui *BubbleTeaUI) UpdateStatus(id string, status string) {
if fwd, ok := ui.forwards[id]; ok {
fwd.Status = status
}
// Clear error if status is not Error
if status != "Error" {
// Only clear error when forward becomes Active again
// This keeps error visible during Reconnecting/Starting states
if status == "Active" {
delete(ui.errors, id)
}
ui.mu.Unlock()
@@ -266,7 +282,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.ui.addWizard = nil
m.ui.removeWizard = nil
m.ui.mu.Unlock()
return m, nil
return m, tea.ClearScreen
}
return m, nil
@@ -356,6 +372,15 @@ func (m model) renderMainView() string {
// Title with version
title := fmt.Sprintf("kportal v%s - Port Forwarding Status", m.ui.version)
b.WriteString(titleStyle.Render(title))
// Show update notification if available
if m.ui.updateAvailable {
updateStyle := lipgloss.NewStyle().
Foreground(lipgloss.Color("42")). // Green
Bold(true)
updateMsg := fmt.Sprintf(" Update available: v%s", m.ui.updateVersion)
b.WriteString(updateStyle.Render(updateMsg))
}
b.WriteString("\n\n")
// Header
@@ -574,6 +599,15 @@ func (ui *BubbleTeaUI) moveSelection(delta int) {
}
}
// resetDeleteConfirmation resets the delete confirmation dialog state.
// Caller must hold ui.mu lock.
func (ui *BubbleTeaUI) resetDeleteConfirmation() {
ui.deleteConfirming = false
ui.deleteConfirmID = ""
ui.deleteConfirmAlias = ""
ui.deleteConfirmCursor = 0
}
// renderDeleteConfirmation renders the delete confirmation dialog
func (m model) renderDeleteConfirmation() string {
m.ui.mu.RLock()
+18 -1
View File
@@ -144,8 +144,25 @@ func validateSelectorCmd(discovery *k8s.Discovery, contextName, namespace, selec
}
// checkPortCmd checks if a local port is available
func checkPortCmd(port int) tea.Cmd {
func checkPortCmd(port int, configPath string) tea.Cmd {
return func() tea.Msg {
// First check if port is already in the configuration
cfg, err := config.LoadConfig(configPath)
if err == nil {
// Check all forwards in config for this port
allForwards := cfg.GetAllForwards()
for _, fwd := range allForwards {
if fwd.LocalPort == port {
return PortCheckedMsg{
port: port,
available: false,
message: fmt.Sprintf("✗ Port %d already assigned to %s", port, fwd.ID()),
}
}
}
}
// Then check if port is available at OS level
available, processInfo, err := k8s.CheckPortAvailability(port)
msg := ""
+41 -33
View File
@@ -173,12 +173,8 @@ func (m model) handleDeleteConfirmation(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
switch msg.String() {
case "ctrl+c", "esc":
// Cancel deletion
m.ui.deleteConfirming = false
m.ui.deleteConfirmID = ""
m.ui.deleteConfirmAlias = ""
m.ui.deleteConfirmCursor = 0 // Reset cursor
m.ui.resetDeleteConfirmation()
m.ui.mu.Unlock()
// Force a repaint by returning the model
return m, tea.ClearScreen
case "left", "h", "right", "l":
@@ -191,26 +187,18 @@ func (m model) handleDeleteConfirmation(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
// Confirm deletion (either Enter on Yes or pressing 'y')
if m.ui.deleteConfirmCursor == 0 || msg.String() == "y" {
id := m.ui.deleteConfirmID
m.ui.deleteConfirming = false
m.ui.deleteConfirmID = ""
m.ui.deleteConfirmAlias = ""
m.ui.resetDeleteConfirmation()
m.ui.mu.Unlock()
return m, removeForwardByIDCmd(m.ui.mutator, id)
}
// Enter on No = cancel
m.ui.deleteConfirming = false
m.ui.deleteConfirmID = ""
m.ui.deleteConfirmAlias = ""
m.ui.deleteConfirmCursor = 0 // Reset cursor
m.ui.resetDeleteConfirmation()
m.ui.mu.Unlock()
return m, tea.ClearScreen
case "n":
// Quick 'n' for no
m.ui.deleteConfirming = false
m.ui.deleteConfirmID = ""
m.ui.deleteConfirmAlias = ""
m.ui.deleteConfirmCursor = 0 // Reset cursor
m.ui.resetDeleteConfirmation()
m.ui.mu.Unlock()
return m, tea.ClearScreen
}
@@ -259,10 +247,7 @@ func (m model) handleAddWizardKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
} else {
// Go back one step
wizard.step--
wizard.cursor = 0
wizard.clearTextInput()
wizard.clearSearchFilter()
wizard.error = nil
wizard.resetInput()
// Reset input mode based on the step we're going back to
switch wizard.step {
@@ -374,6 +359,11 @@ func (m model) handleAddWizardKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
wizard := m.ui.addWizard
// Don't process Enter if we're currently loading
if wizard.loading {
return m, nil
}
switch wizard.step {
case StepSelectContext:
filteredContexts := wizard.getFilteredContexts()
@@ -452,12 +442,14 @@ func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
filteredServices := wizard.getFilteredServices()
if wizard.cursor >= 0 && wizard.cursor < len(filteredServices) {
wizard.resourceValue = filteredServices[wizard.cursor].Name
// Get ports from selected service (must do this BEFORE clearing search filter)
wizard.detectedPorts = filteredServices[wizard.cursor].Ports
wizard.step = StepEnterRemotePort
wizard.clearTextInput()
wizard.clearSearchFilter()
// Get ports from selected service
wizard.detectedPorts = filteredServices[wizard.cursor].Ports
if len(wizard.detectedPorts) > 0 {
wizard.inputMode = InputModeList
wizard.cursor = 0
@@ -485,7 +477,7 @@ func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
} else {
// Text mode - manual entry
port, err := strconv.Atoi(wizard.textInput)
if err != nil || port < 1 || port > 65535 {
if err != nil || !config.IsValidPort(port) {
wizard.error = fmt.Errorf("invalid port number")
} else {
wizard.remotePort = port
@@ -497,17 +489,14 @@ func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
case StepEnterLocalPort:
port, err := strconv.Atoi(wizard.textInput)
if err != nil || port < 1 || port > 65535 {
if err != nil || !config.IsValidPort(port) {
wizard.error = fmt.Errorf("invalid port number")
} else {
// Check port availability before proceeding
wizard.localPort = port
wizard.step = StepConfirmation
wizard.clearTextInput()
wizard.cursor = 0
wizard.inputMode = InputModeList
wizard.error = nil
wizard.loading = true
return m, checkPortCmd(port)
wizard.error = nil
return m, checkPortCmd(port, m.ui.configPath)
}
case StepConfirmation:
@@ -520,6 +509,12 @@ func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
// Handle button selection
if wizard.cursor == 0 {
// Check if port is available before saving
if !wizard.portAvailable {
wizard.error = fmt.Errorf("port %d is not available. Please choose a different port", wizard.localPort)
return m, nil
}
// Confirmed - save the forward
wizard.alias = wizard.textInput
@@ -549,9 +544,10 @@ func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
return m, saveForwardCmd(m.ui.mutator, wizard.selectedContext, wizard.selectedNamespace, fwd)
} else {
// Cancelled
// Cancelled - return to main view with screen clear
m.ui.viewMode = ViewModeMain
m.ui.addWizard = nil
return m, tea.ClearScreen
}
case StepSuccess:
@@ -561,9 +557,10 @@ func (m model) handleAddWizardEnter() (tea.Model, tea.Cmd) {
m.ui.addWizard.loading = true
return m, loadContextsCmd(m.ui.discovery)
} else {
// Return to main view
// Return to main view with screen clear
m.ui.viewMode = ViewModeMain
m.ui.addWizard = nil
return m, tea.ClearScreen
}
}
@@ -771,6 +768,17 @@ func (m model) handlePortChecked(msg PortCheckedMsg) (tea.Model, tea.Cmd) {
m.ui.addWizard.loading = false
m.ui.addWizard.portAvailable = msg.available
m.ui.addWizard.portCheckMsg = msg.message
// Only proceed to confirmation if port is available
if msg.available {
m.ui.addWizard.step = StepConfirmation
m.ui.addWizard.clearTextInput()
m.ui.addWizard.cursor = 0
m.ui.addWizard.inputMode = InputModeList
} else {
// Port is not available - show error and stay on local port step
m.ui.addWizard.error = fmt.Errorf("port %d is in use, please choose another port", msg.port)
}
}
return m, nil
@@ -807,5 +815,5 @@ func (m model) handleForwardsRemoved(msg ForwardsRemovedMsg) (tea.Model, tea.Cmd
// If there was an error, it will be logged but we don't show it in UI for now
// The config watcher will either reload (success) or keep old config (failure)
return m, nil
return m, tea.ClearScreen
}
+10
View File
@@ -363,3 +363,13 @@ func (w *AddWizardState) clearSearchFilter() {
w.cursor = 0
w.scrollOffset = 0
}
// resetInput clears text input, search filter, and error state.
// Use this when navigating between wizard steps.
func (w *AddWizardState) resetInput() {
w.textInput = ""
w.searchFilter = ""
w.cursor = 0
w.scrollOffset = 0
w.error = nil
}
+2 -2
View File
@@ -373,7 +373,7 @@ func (m model) renderEnterRemotePort() string {
prefix = "▸ "
b.WriteString(selectedStyle.Render(prefix + manualOption))
} else {
b.WriteString(mutedStyle.Render(prefix + manualOption))
b.WriteString(prefix + mutedStyle.Render(manualOption))
}
b.WriteString("\n")
}
@@ -443,7 +443,7 @@ func (m model) renderEnterLocalPort() string {
} else {
b.WriteString(errorStyle.Render(wizard.portCheckMsg))
}
} else if wizard.textInput != "" && wizard.localPort > 0 {
} else if wizard.textInput != "" {
b.WriteString(mutedStyle.Render("Press Enter to check availability"))
}
+158
View File
@@ -0,0 +1,158 @@
package version
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
)
const (
// GitHubAPIURL is the GitHub API endpoint for releases
githubReleasesURL = "https://api.github.com/repos/%s/%s/releases/latest"
// requestTimeout is the timeout for HTTP requests
requestTimeout = 5 * time.Second
)
// ReleaseInfo contains information about a GitHub release
type ReleaseInfo struct {
TagName string `json:"tag_name"`
HTMLURL string `json:"html_url"`
Name string `json:"name"`
}
// UpdateInfo contains information about an available update
type UpdateInfo struct {
CurrentVersion string
LatestVersion string
ReleaseURL string
ReleaseName string
}
// Checker checks for new versions on GitHub
type Checker struct {
owner string
repo string
current string
client *http.Client
}
// NewChecker creates a new version checker
func NewChecker(owner, repo, currentVersion string) *Checker {
return &Checker{
owner: owner,
repo: repo,
current: normalizeVersion(currentVersion),
client: &http.Client{
Timeout: requestTimeout,
},
}
}
// CheckForUpdate checks if a newer version is available.
// Returns nil if current version is up to date or if check fails.
// This is designed to fail silently - network errors should not impact the user.
func (c *Checker) CheckForUpdate(ctx context.Context) *UpdateInfo {
release, err := c.fetchLatestRelease(ctx)
if err != nil {
return nil
}
latestVersion := normalizeVersion(release.TagName)
if isNewerVersion(latestVersion, c.current) {
return &UpdateInfo{
CurrentVersion: c.current,
LatestVersion: latestVersion,
ReleaseURL: release.HTMLURL,
ReleaseName: release.Name,
}
}
return nil
}
// fetchLatestRelease fetches the latest release info from GitHub API
func (c *Checker) fetchLatestRelease(ctx context.Context) (*ReleaseInfo, error) {
url := fmt.Sprintf(githubReleasesURL, c.owner, c.repo)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept", "application/vnd.github.v3+json")
req.Header.Set("User-Agent", "kportal-version-checker")
resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("GitHub API returned status %d", resp.StatusCode)
}
var release ReleaseInfo
if err := json.NewDecoder(resp.Body).Decode(&release); err != nil {
return nil, err
}
return &release, nil
}
// normalizeVersion removes 'v' or 'V' prefix and trims whitespace
func normalizeVersion(v string) string {
v = strings.TrimSpace(v)
v = strings.TrimPrefix(v, "v")
v = strings.TrimPrefix(v, "V")
return v
}
// isNewerVersion compares two semver-like versions.
// Returns true if latest is newer than current.
func isNewerVersion(latest, current string) bool {
latestParts := parseVersion(latest)
currentParts := parseVersion(current)
// Compare each part
for i := 0; i < len(latestParts) && i < len(currentParts); i++ {
if latestParts[i] > currentParts[i] {
return true
}
if latestParts[i] < currentParts[i] {
return false
}
}
// If all compared parts are equal, longer version is newer
// e.g., 1.0.1 > 1.0
return len(latestParts) > len(currentParts)
}
// parseVersion splits a version string into numeric parts
func parseVersion(v string) []int {
// Remove any suffix like -beta, -rc1, etc.
if idx := strings.IndexAny(v, "-+"); idx != -1 {
v = v[:idx]
}
parts := strings.Split(v, ".")
result := make([]int, 0, len(parts))
for _, p := range parts {
var num int
fmt.Sscanf(p, "%d", &num)
result = append(result, num)
}
return result
}
// FormatUpdateMessage formats a user-friendly update notification
func (u *UpdateInfo) FormatUpdateMessage() string {
return fmt.Sprintf("New version available: %s (current: %s) - %s",
u.LatestVersion, u.CurrentVersion, u.ReleaseURL)
}
+90
View File
@@ -0,0 +1,90 @@
package version
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestNormalizeVersion(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"v1.0.0", "1.0.0"},
{"1.0.0", "1.0.0"},
{" v2.1.3 ", "2.1.3"},
{"V1.0.0", "1.0.0"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
result := normalizeVersion(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestParseVersion(t *testing.T) {
tests := []struct {
input string
expected []int
}{
{"1.0.0", []int{1, 0, 0}},
{"2.1.3", []int{2, 1, 3}},
{"1.0", []int{1, 0}},
{"10.20.30", []int{10, 20, 30}},
{"1.0.0-beta", []int{1, 0, 0}},
{"1.0.0-rc1", []int{1, 0, 0}},
{"1.0.0+build123", []int{1, 0, 0}},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
result := parseVersion(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestIsNewerVersion(t *testing.T) {
tests := []struct {
name string
latest string
current string
expected bool
}{
{"major version bump", "2.0.0", "1.0.0", true},
{"minor version bump", "1.1.0", "1.0.0", true},
{"patch version bump", "1.0.1", "1.0.0", true},
{"same version", "1.0.0", "1.0.0", false},
{"current is newer major", "1.0.0", "2.0.0", false},
{"current is newer minor", "1.0.0", "1.1.0", false},
{"current is newer patch", "1.0.0", "1.0.1", false},
{"multi-digit versions", "1.10.0", "1.9.0", true},
{"longer version is newer", "1.0.1", "1.0", true},
{"shorter version is older", "1.0", "1.0.1", false},
{"complex comparison", "2.1.3", "2.1.2", true},
{"real world example", "0.2.0", "0.1.0", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isNewerVersion(tt.latest, tt.current)
assert.Equal(t, tt.expected, result)
})
}
}
func TestUpdateInfo_FormatUpdateMessage(t *testing.T) {
info := &UpdateInfo{
CurrentVersion: "0.1.0",
LatestVersion: "0.2.0",
ReleaseURL: "https://github.com/nvm/kportal/releases/tag/v0.2.0",
}
msg := info.FormatUpdateMessage()
assert.Contains(t, msg, "0.2.0")
assert.Contains(t, msg, "0.1.0")
assert.Contains(t, msg, "https://github.com/nvm/kportal/releases/tag/v0.2.0")
}