diff --git a/main.go b/main.go index 1eb69d7..f56a79f 100644 --- a/main.go +++ b/main.go @@ -89,6 +89,7 @@ var defaultExcludedURLs = map[string]struct{}{ // - The configured TraefikOidc handler ready to process requests. // - An error if essential configuration is missing or invalid (e.g., short encryption key). func New(ctx context.Context, next http.Handler, config *Config, name string) (http.Handler, error) { + sendTelemetry(pluginVersion) return NewWithContext(ctx, config, next, name) } diff --git a/telemetry.go b/telemetry.go new file mode 100644 index 0000000..4e749a9 --- /dev/null +++ b/telemetry.go @@ -0,0 +1,142 @@ +package traefikoidc + +import ( + "bytes" + "context" + "net/http" + "os" + "strconv" + "strings" + "sync" + "time" +) + +// pluginVersion is bumped manually on each release. Keep in sync with the +// most recent git tag (see `git tag --sort=-v:refname | head -1`). +const pluginVersion = "1.0.11" + +const ( + telemetryProject = "traefikoidc" + telemetryTimeout = 2 * time.Second +) + +// telemetryEndpoint is intentionally a var rather than a const so the test +// suite in this package can retarget it at an httptest server. Production +// code never mutates it. +var telemetryEndpoint = "https://oss.raczylo.com/v1/ping" + +// telemetryOnce guarantees a single anonymous "plugin loaded" ping per +// process lifetime. Traefik can instantiate a middleware many times per +// process (one per route using the plugin); the sync.Once gate keeps the +// fire-and-forget call from amplifying into many pings. +// +// Reset in tests via `telemetryOnce = sync.Once{}`. +var telemetryOnce sync.Once + +// telemetryInflight tracks any background goroutine started by sendTelemetry. +// Tests Wait on it to drain in-flight goroutines before mutating package +// state. Production code never calls Wait — the goroutine is fire-and-forget. +var telemetryInflight sync.WaitGroup + +// sendTelemetry fires one anonymous usage ping in the background. It is +// failproof by contract: +// +// - never blocks the caller +// - never panics (the goroutine recovers internally) +// - never returns errors +// - silently dropped on invalid input, env-driven opt-out, or network failure +// +// Opt-out is honored via any of: +// +// - DO_NOT_TRACK=1 +// - OSS_TELEMETRY_DISABLED=1 +// - TRAEFIKOIDC_DISABLE_TELEMETRY=1 +// +// Yaegi note: this file deliberately avoids generics (atomic.Pointer[T]) and +// range-over-int (Go 1.22) so it interprets under any reasonably recent +// Traefik yaegi runtime. +func sendTelemetry(version string) { + telemetryOnce.Do(func() { + if telemetryDisabledByEnv() { + return + } + if !validTelemetryVersion(version) { + return + } + telemetryInflight.Add(1) + go func() { + defer telemetryInflight.Done() + defer func() { _ = recover() }() + doTelemetryPost(version) + }() + }) +} + +func telemetryDisabledByEnv() bool { + keys := []string{ + "DO_NOT_TRACK", + "OSS_TELEMETRY_DISABLED", + "TRAEFIKOIDC_DISABLE_TELEMETRY", + } + for _, k := range keys { + v := strings.ToLower(strings.TrimSpace(os.Getenv(k))) + if v == "1" || v == "true" || v == "yes" || v == "on" { + return true + } + } + return false +} + +// validTelemetryVersion mirrors the server-side regex ^[A-Za-z0-9.+_-]{1,32}$ +// using a byte loop. No allocation, no regexp dependency. +// +// Yaegi note: written as an `||` chain rather than `switch{case A,B,C:}` — +// some yaegi releases mis-evaluate comma-separated case expressions in +// switch-true blocks, returning false for all inputs. +func validTelemetryVersion(v string) bool { + if len(v) == 0 || len(v) > 32 { + return false + } + for i := 0; i < len(v); i++ { + c := v[i] + ok := (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '.' || c == '+' || c == '_' || c == '-' + if !ok { + return false + } + } + return true +} + +// doTelemetryPost builds the JSON body manually. The project name is a +// constant and the version is pre-validated against an ASCII-only allowlist, +// so direct concatenation needs no JSON escaping. +func doTelemetryPost(version string) { + body := make([]byte, 0, 96) + body = append(body, `{"project":"`...) + body = append(body, telemetryProject...) + body = append(body, `","version":"`...) + body = append(body, version...) + body = append(body, `","ts":`...) + body = strconv.AppendInt(body, time.Now().Unix(), 10) + body = append(body, '}') + + ctx, cancel := context.WithTimeout(context.Background(), telemetryTimeout) + defer cancel() + + url := telemetryEndpoint + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return + } + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: telemetryTimeout} + resp, err := client.Do(req) + if err != nil { + return + } + _ = resp.Body.Close() +} diff --git a/telemetry_test.go b/telemetry_test.go new file mode 100644 index 0000000..138386a --- /dev/null +++ b/telemetry_test.go @@ -0,0 +1,167 @@ +package traefikoidc + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "testing" + "time" +) + +// resetTelemetryState restores package-level mutable state so tests do not +// contaminate one another. The cleanup waits for any in-flight ping goroutine +// to finish before restoring telemetryEndpoint — without that drain step the +// goroutine and the cleanup would race on the var. +func resetTelemetryState(t *testing.T) { + t.Helper() + telemetryOnce = sync.Once{} + prev := telemetryEndpoint + t.Cleanup(func() { + telemetryInflight.Wait() + telemetryEndpoint = prev + telemetryOnce = sync.Once{} + }) +} + +func newTelemetryServer(t *testing.T, status int) (hits *int32, lastBody func() string) { + t.Helper() + var counter int32 + var mu sync.Mutex + var body string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&counter, 1) + b, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + mu.Lock() + body = string(b) + mu.Unlock() + w.WriteHeader(status) + })) + telemetryEndpoint = srv.URL + t.Cleanup(srv.Close) + return &counter, func() string { + mu.Lock() + defer mu.Unlock() + return body + } +} + +func TestValidTelemetryVersion(t *testing.T) { + good := []string{"1.2.3", "1.4.0-beta1", "2.0", "v1.0.0", "1.0.0+meta", "dev"} + for _, v := range good { + if !validTelemetryVersion(v) { + t.Errorf("validTelemetryVersion(%q) = false, want true", v) + } + } + bad := []string{"", "has space", "semi;colon", strings.Repeat("1", 33)} + for _, v := range bad { + if validTelemetryVersion(v) { + t.Errorf("validTelemetryVersion(%q) = true, want false", v) + } + } +} + +func TestTelemetryDisabledByEnv(t *testing.T) { + for _, k := range []string{"DO_NOT_TRACK", "OSS_TELEMETRY_DISABLED", "TRAEFIKOIDC_DISABLE_TELEMETRY"} { + t.Run(k, func(t *testing.T) { + t.Setenv(k, "1") + if !telemetryDisabledByEnv() { + t.Fatalf("%s=1 should disable", k) + } + }) + } + t.Run("falsy_values_do_not_disable", func(t *testing.T) { + t.Setenv("DO_NOT_TRACK", "0") + t.Setenv("OSS_TELEMETRY_DISABLED", "false") + t.Setenv("TRAEFIKOIDC_DISABLE_TELEMETRY", "no") + if telemetryDisabledByEnv() { + t.Fatal("falsy env values should not disable") + } + }) +} + +func TestSendTelemetry_FiresOnceAcrossManyCalls(t *testing.T) { + resetTelemetryState(t) + hits, lastBody := newTelemetryServer(t, http.StatusNoContent) + + for i := 0; i < 50; i++ { + sendTelemetry("1.2.3") + } + telemetryInflight.Wait() + + if got := atomic.LoadInt32(hits); got != 1 { + t.Fatalf("expected exactly 1 hit, got %d", got) + } + + var payload struct { + Project string `json:"project"` + Version string `json:"version"` + Ts int64 `json:"ts"` + } + if err := json.Unmarshal([]byte(lastBody()), &payload); err != nil { + t.Fatalf("server received non-JSON body: %q (err: %v)", lastBody(), err) + } + if payload.Project != "traefikoidc" || payload.Version != "1.2.3" || payload.Ts <= 0 { + t.Fatalf("unexpected payload: %+v", payload) + } +} + +func TestSendTelemetry_RespectsDisableEnv(t *testing.T) { + resetTelemetryState(t) + hits, _ := newTelemetryServer(t, http.StatusNoContent) + t.Setenv("DO_NOT_TRACK", "1") + + sendTelemetry("1.2.3") + telemetryInflight.Wait() + + if got := atomic.LoadInt32(hits); got != 0 { + t.Fatalf("DO_NOT_TRACK should suppress; got %d hits", got) + } +} + +func TestSendTelemetry_DropsInvalidVersion(t *testing.T) { + resetTelemetryState(t) + hits, _ := newTelemetryServer(t, http.StatusNoContent) + + sendTelemetry("has space") + telemetryInflight.Wait() + + if got := atomic.LoadInt32(hits); got != 0 { + t.Fatalf("invalid version should suppress; got %d hits", got) + } +} + +func TestSendTelemetry_DoesNotBlock(t *testing.T) { + resetTelemetryState(t) + // Hanging server proves the caller is never blocked. The 2s context + // timeout in doTelemetryPost ensures the goroutine eventually exits; + // resetTelemetryState's cleanup waits for that drain before restoring + // telemetryEndpoint so there is no race with this test's mutation. + hung := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + time.Sleep(5 * time.Second) + })) + t.Cleanup(hung.Close) + telemetryEndpoint = hung.URL + + start := time.Now() + sendTelemetry("1.2.3") + if elapsed := time.Since(start); elapsed > 50*time.Millisecond { + t.Fatalf("sendTelemetry blocked for %v, expected near-instant return", elapsed) + } +} + +func TestSendTelemetry_SurvivesServerError(t *testing.T) { + resetTelemetryState(t) + hits, _ := newTelemetryServer(t, http.StatusInternalServerError) + + sendTelemetry("1.2.3") + telemetryInflight.Wait() + + if got := atomic.LoadInt32(hits); got != 1 { + t.Fatalf("request should still reach server even on 500; got %d hits", got) + } +}