fix: resolve cache eviction lock-up and migrate telemetry [patch-release]

universal_cache: stop the write-lock convoy / 100%-CPU spin (observed via pprof: one ServeHTTP goroutine holding c.mu.Lock for hours while 119 requests queued). The per-request populate path (updateLocalCache) PushFronted a duplicate LRU node + overwrote items[key] without removing the prior node; once eviction deleted the key, orphan nodes at Back() were never removable and the eviction loop spun forever under the write lock. Replace the entry in place (mirroring setLocal) and harden evictOldest with a forward-progress guard. Adds universal_cache_orphan_test.go.

telemetry: delete the hand-rolled client; call oss-telemetry v0.2.3 (vendored, Yaegi-safe) directly from New(), once per process via sync.Once.

version: add version.go + workflow-prepare.sh so the release semver is stamped into source at build time (the value cannot be resolved at runtime under Yaegi). dev/source builds keep the 0.0.0-dev sentinel and emit no telemetry.
This commit is contained in:
2026-05-30 13:22:03 +01:00
parent cf6ed1da55
commit f75b2f20e0
15 changed files with 789 additions and 318 deletions
+1
View File
@@ -5,6 +5,7 @@ go 1.24.0
require (
github.com/alicebob/miniredis/v2 v2.35.0
github.com/gorilla/sessions v1.3.0
github.com/lukaszraczylo/oss-telemetry v0.2.3
github.com/redis/go-redis/v9 v9.17.2
github.com/stretchr/testify v1.10.0
golang.org/x/time v0.14.0
+2
View File
@@ -16,6 +16,8 @@ github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kX
github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo=
github.com/gorilla/sessions v1.3.0 h1:XYlkq7KcpOB2ZhHBPv5WpjMIxrQosiZanfoy1HLZFzg=
github.com/gorilla/sessions v1.3.0/go.mod h1:ePLdVu+jbEgHH+KWw8I1z2wqd0BAdAQh/8LRvBeoNcQ=
github.com/lukaszraczylo/oss-telemetry v0.2.3 h1:xoDtBqeZGmXj7IteiE1M5WMuzeoqag58qEleI0Cf2Ms=
github.com/lukaszraczylo/oss-telemetry v0.2.3/go.mod h1:+Cn78qZo8rc3T9eZt0v3oICYRdd75wORtSidc8lNjDQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/redis/go-redis/v9 v9.17.2 h1:P2EGsA4qVIM3Pp+aPocCJ7DguDHhqrXNhVcEp4ViluI=
+13 -1
View File
@@ -16,6 +16,7 @@ import (
"text/template"
"time"
telemetry "github.com/lukaszraczylo/oss-telemetry"
"golang.org/x/time/rate"
)
@@ -23,6 +24,11 @@ const (
ConstSessionTimeout = 86400
)
// telemetryStartupOnce keeps the anonymous "plugin loaded" ping to one per
// process. Traefik calls New once per route that uses the plugin; oss-telemetry
// does not deduplicate client-side (the server does), so the gate stays here.
var telemetryStartupOnce sync.Once
// isTestMode detects if the code is running in a test environment.
func isTestMode() bool {
if os.Getenv("SUPPRESS_DIAGNOSTIC_LOGS") == "1" {
@@ -89,7 +95,13 @@ var defaultExcludedURLs = map[string]struct{}{
// - The configured TraefikOidc handler ready to process requests.
// - An error if essential configuration is missing or invalid (e.g., short encryption key).
func New(ctx context.Context, next http.Handler, config *Config, name string) (http.Handler, error) {
sendTelemetry(pluginVersion)
telemetryStartupOnce.Do(func() {
// Only stamped release builds phone home; dev/local/test builds keep the
// devPluginVersion sentinel (see version.go) and stay silent.
if traefikoidcPluginVersion != devPluginVersion {
telemetry.Send("traefikoidc", traefikoidcPluginVersion)
}
})
return NewWithContext(ctx, config, next, name)
}
-142
View File
@@ -1,142 +0,0 @@
package traefikoidc
import (
"bytes"
"context"
"net/http"
"os"
"strconv"
"strings"
"sync"
"time"
)
// pluginVersion is bumped manually on each release. Keep in sync with the
// most recent git tag (see `git tag --sort=-v:refname | head -1`).
const pluginVersion = "1.0.11"
const (
telemetryProject = "traefikoidc"
telemetryTimeout = 2 * time.Second
)
// telemetryEndpoint is intentionally a var rather than a const so the test
// suite in this package can retarget it at an httptest server. Production
// code never mutates it.
var telemetryEndpoint = "https://oss.raczylo.com/v1/ping"
// telemetryOnce guarantees a single anonymous "plugin loaded" ping per
// process lifetime. Traefik can instantiate a middleware many times per
// process (one per route using the plugin); the sync.Once gate keeps the
// fire-and-forget call from amplifying into many pings.
//
// Reset in tests via `telemetryOnce = sync.Once{}`.
var telemetryOnce sync.Once
// telemetryInflight tracks any background goroutine started by sendTelemetry.
// Tests Wait on it to drain in-flight goroutines before mutating package
// state. Production code never calls Wait — the goroutine is fire-and-forget.
var telemetryInflight sync.WaitGroup
// sendTelemetry fires one anonymous usage ping in the background. It is
// failproof by contract:
//
// - never blocks the caller
// - never panics (the goroutine recovers internally)
// - never returns errors
// - silently dropped on invalid input, env-driven opt-out, or network failure
//
// Opt-out is honored via any of:
//
// - DO_NOT_TRACK=1
// - OSS_TELEMETRY_DISABLED=1
// - TRAEFIKOIDC_DISABLE_TELEMETRY=1
//
// Yaegi note: this file deliberately avoids generics (atomic.Pointer[T]) and
// range-over-int (Go 1.22) so it interprets under any reasonably recent
// Traefik yaegi runtime.
func sendTelemetry(version string) {
telemetryOnce.Do(func() {
if telemetryDisabledByEnv() {
return
}
if !validTelemetryVersion(version) {
return
}
telemetryInflight.Add(1)
go func() {
defer telemetryInflight.Done()
defer func() { _ = recover() }()
doTelemetryPost(version)
}()
})
}
func telemetryDisabledByEnv() bool {
keys := []string{
"DO_NOT_TRACK",
"OSS_TELEMETRY_DISABLED",
"TRAEFIKOIDC_DISABLE_TELEMETRY",
}
for _, k := range keys {
v := strings.ToLower(strings.TrimSpace(os.Getenv(k)))
if v == "1" || v == "true" || v == "yes" || v == "on" {
return true
}
}
return false
}
// validTelemetryVersion mirrors the server-side regex ^[A-Za-z0-9.+_-]{1,32}$
// using a byte loop. No allocation, no regexp dependency.
//
// Yaegi note: written as an `||` chain rather than `switch{case A,B,C:}` —
// some yaegi releases mis-evaluate comma-separated case expressions in
// switch-true blocks, returning false for all inputs.
func validTelemetryVersion(v string) bool {
if len(v) == 0 || len(v) > 32 {
return false
}
for i := 0; i < len(v); i++ {
c := v[i]
ok := (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == '.' || c == '+' || c == '_' || c == '-'
if !ok {
return false
}
}
return true
}
// doTelemetryPost builds the JSON body manually. The project name is a
// constant and the version is pre-validated against an ASCII-only allowlist,
// so direct concatenation needs no JSON escaping.
func doTelemetryPost(version string) {
body := make([]byte, 0, 96)
body = append(body, `{"project":"`...)
body = append(body, telemetryProject...)
body = append(body, `","version":"`...)
body = append(body, version...)
body = append(body, `","ts":`...)
body = strconv.AppendInt(body, time.Now().Unix(), 10)
body = append(body, '}')
ctx, cancel := context.WithTimeout(context.Background(), telemetryTimeout)
defer cancel()
url := telemetryEndpoint
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{Timeout: telemetryTimeout}
resp, err := client.Do(req)
if err != nil {
return
}
_ = resp.Body.Close()
}
-167
View File
@@ -1,167 +0,0 @@
package traefikoidc
import (
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// resetTelemetryState restores package-level mutable state so tests do not
// contaminate one another. The cleanup waits for any in-flight ping goroutine
// to finish before restoring telemetryEndpoint — without that drain step the
// goroutine and the cleanup would race on the var.
func resetTelemetryState(t *testing.T) {
t.Helper()
telemetryOnce = sync.Once{}
prev := telemetryEndpoint
t.Cleanup(func() {
telemetryInflight.Wait()
telemetryEndpoint = prev
telemetryOnce = sync.Once{}
})
}
func newTelemetryServer(t *testing.T, status int) (hits *int32, lastBody func() string) {
t.Helper()
var counter int32
var mu sync.Mutex
var body string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(&counter, 1)
b, _ := io.ReadAll(r.Body)
_ = r.Body.Close()
mu.Lock()
body = string(b)
mu.Unlock()
w.WriteHeader(status)
}))
telemetryEndpoint = srv.URL
t.Cleanup(srv.Close)
return &counter, func() string {
mu.Lock()
defer mu.Unlock()
return body
}
}
func TestValidTelemetryVersion(t *testing.T) {
good := []string{"1.2.3", "1.4.0-beta1", "2.0", "v1.0.0", "1.0.0+meta", "dev"}
for _, v := range good {
if !validTelemetryVersion(v) {
t.Errorf("validTelemetryVersion(%q) = false, want true", v)
}
}
bad := []string{"", "has space", "semi;colon", strings.Repeat("1", 33)}
for _, v := range bad {
if validTelemetryVersion(v) {
t.Errorf("validTelemetryVersion(%q) = true, want false", v)
}
}
}
func TestTelemetryDisabledByEnv(t *testing.T) {
for _, k := range []string{"DO_NOT_TRACK", "OSS_TELEMETRY_DISABLED", "TRAEFIKOIDC_DISABLE_TELEMETRY"} {
t.Run(k, func(t *testing.T) {
t.Setenv(k, "1")
if !telemetryDisabledByEnv() {
t.Fatalf("%s=1 should disable", k)
}
})
}
t.Run("falsy_values_do_not_disable", func(t *testing.T) {
t.Setenv("DO_NOT_TRACK", "0")
t.Setenv("OSS_TELEMETRY_DISABLED", "false")
t.Setenv("TRAEFIKOIDC_DISABLE_TELEMETRY", "no")
if telemetryDisabledByEnv() {
t.Fatal("falsy env values should not disable")
}
})
}
func TestSendTelemetry_FiresOnceAcrossManyCalls(t *testing.T) {
resetTelemetryState(t)
hits, lastBody := newTelemetryServer(t, http.StatusNoContent)
for i := 0; i < 50; i++ {
sendTelemetry("1.2.3")
}
telemetryInflight.Wait()
if got := atomic.LoadInt32(hits); got != 1 {
t.Fatalf("expected exactly 1 hit, got %d", got)
}
var payload struct {
Project string `json:"project"`
Version string `json:"version"`
Ts int64 `json:"ts"`
}
if err := json.Unmarshal([]byte(lastBody()), &payload); err != nil {
t.Fatalf("server received non-JSON body: %q (err: %v)", lastBody(), err)
}
if payload.Project != "traefikoidc" || payload.Version != "1.2.3" || payload.Ts <= 0 {
t.Fatalf("unexpected payload: %+v", payload)
}
}
func TestSendTelemetry_RespectsDisableEnv(t *testing.T) {
resetTelemetryState(t)
hits, _ := newTelemetryServer(t, http.StatusNoContent)
t.Setenv("DO_NOT_TRACK", "1")
sendTelemetry("1.2.3")
telemetryInflight.Wait()
if got := atomic.LoadInt32(hits); got != 0 {
t.Fatalf("DO_NOT_TRACK should suppress; got %d hits", got)
}
}
func TestSendTelemetry_DropsInvalidVersion(t *testing.T) {
resetTelemetryState(t)
hits, _ := newTelemetryServer(t, http.StatusNoContent)
sendTelemetry("has space")
telemetryInflight.Wait()
if got := atomic.LoadInt32(hits); got != 0 {
t.Fatalf("invalid version should suppress; got %d hits", got)
}
}
func TestSendTelemetry_DoesNotBlock(t *testing.T) {
resetTelemetryState(t)
// Hanging server proves the caller is never blocked. The 2s context
// timeout in doTelemetryPost ensures the goroutine eventually exits;
// resetTelemetryState's cleanup waits for that drain before restoring
// telemetryEndpoint so there is no race with this test's mutation.
hung := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {
time.Sleep(5 * time.Second)
}))
t.Cleanup(hung.Close)
telemetryEndpoint = hung.URL
start := time.Now()
sendTelemetry("1.2.3")
if elapsed := time.Since(start); elapsed > 50*time.Millisecond {
t.Fatalf("sendTelemetry blocked for %v, expected near-instant return", elapsed)
}
}
func TestSendTelemetry_SurvivesServerError(t *testing.T) {
resetTelemetryState(t)
hits, _ := newTelemetryServer(t, http.StatusInternalServerError)
sendTelemetry("1.2.3")
telemetryInflight.Wait()
if got := atomic.LoadInt32(hits); got != 1 {
t.Fatalf("request should still reach server even on 500; got %d hits", got)
}
}
+34 -8
View File
@@ -603,15 +603,28 @@ func (c *UniversalCache) removeItem(key string, item *CacheItem) {
// evictOldest evicts the oldest item from the cache (must be called with lock held)
func (c *UniversalCache) evictOldest() {
if elem := c.lruList.Back(); elem != nil {
key, _ := elem.Value.(string) // Safe to ignore: cache internal type assertion
if item, exists := c.items[key]; exists {
c.removeItem(key, item)
atomic.AddInt64(&c.evictions, 1)
if c.logger.IsDebug() {
c.logger.Debugf("UniversalCache[%s]: Evicted key=%s", c.config.Type, key)
}
elem := c.lruList.Back()
if elem == nil {
return
}
key, _ := elem.Value.(string) // Safe to ignore: cache internal type assertion
if item, exists := c.items[key]; exists && item.element == elem {
c.removeItem(key, item)
atomic.AddInt64(&c.evictions, 1)
if c.logger.IsDebug() {
c.logger.Debugf("UniversalCache[%s]: Evicted key=%s", c.config.Type, key)
}
return
}
// Defensive forward-progress guard: the back node is dangling — its key is
// absent from c.items, or c.items[key] points at a newer node (a stale
// duplicate). Drop the node directly so an eviction loop
// (`for ... && c.lruList.Len() > 0`) is guaranteed to terminate and can
// never spin holding c.mu.Lock(). With the updateLocalCache replace-in-place
// fix this branch should be unreachable, but it makes the spin impossible.
c.lruList.Remove(elem)
if c.currentSize > 0 {
c.currentSize--
}
}
@@ -944,6 +957,19 @@ func (c *UniversalCache) updateLocalCache(key string, value interface{}, ttl tim
}
now := time.Now()
// Replace any existing entry in place. Without this, a repeat populate of
// the same key (the per-request Get->backend-hit path at line ~359)
// PushFronts a second list node and overwrites c.items[key], orphaning the
// previous node. Orphans inflate currentMemory/currentSize and — once the
// eviction loop deletes the key — leave Back() nodes whose key is absent
// from c.items, so evictOldest() no-ops while lruList.Len()>0 stays true:
// an infinite loop while holding c.mu.Lock(), i.e. the 100%-CPU holder and
// write-lock convoy. setLocal already dedups on this path; this mirrors it.
if existing, ok := c.items[key]; ok {
c.removeItem(key, existing)
}
item := &CacheItem{
Key: key,
Value: value,
+84
View File
@@ -0,0 +1,84 @@
package traefikoidc
import (
"testing"
"time"
)
// newOrphanTestCache builds a Token-type cache with background cleanup disabled
// so the test fully controls lruList/items state.
func newOrphanTestCache(maxMem int64) *UniversalCache {
return NewUniversalCache(UniversalCacheConfig{
Type: CacheTypeToken,
DefaultTTL: time.Hour,
MaxSize: 1_000_000, // large: keep the size-branch out of the way
MaxMemoryBytes: maxMem,
EnableMemoryLimit: maxMem > 0,
SkipAutoCleanup: true,
EnableAutoCleanup: false,
})
}
// TestUpdateLocalCache_NoOrphanElements is the direct red test: repeatedly
// populating the SAME key via updateLocalCache (the per-request Get->backend-hit
// path) must NOT leave dangling lruList elements. Today updateLocalCache blindly
// PushFronts + overwrites c.items[key] without removing the prior element, so the
// list grows one orphan per call while items stays at 1 entry.
func TestUpdateLocalCache_NoOrphanElements(t *testing.T) {
c := newOrphanTestCache(0) // memory limit off: isolate the orphan, no eviction
const key = "same-key"
for range 5 {
if err := c.updateLocalCache(key, "v", time.Hour); err != nil {
t.Fatalf("updateLocalCache: %v", err)
}
}
c.mu.RLock()
listLen := c.lruList.Len()
itemCount := len(c.items)
c.mu.RUnlock()
if itemCount != 1 {
t.Fatalf("items: got %d want 1", itemCount)
}
if listLen != 1 {
t.Fatalf("ORPHAN BUG: lruList.Len()=%d but items=%d (one list element per key expected)", listLen, itemCount)
}
}
// TestUpdateLocalCache_EvictionTerminates is the convoy reproducer: once orphans
// for a key exist and the memory-eviction loop runs, evictOldest() deletes the
// key from items on the first eviction, after which every remaining orphan at
// Back() has a key absent from items -> evictOldest() no-ops while lruList.Len()>0
// stays true -> infinite loop while holding c.mu.Lock(). That is the 100%-CPU
// holder + write-lock convoy observed in pprof.
func TestUpdateLocalCache_EvictionTerminates(t *testing.T) {
c := newOrphanTestCache(0) // start with memory limit OFF to accumulate orphans
const key = "same-key"
// Build 3 same-key list elements (3 orphans, items={key}).
for range 3 {
if err := c.updateLocalCache(key, "v", time.Hour); err != nil {
t.Fatalf("seed updateLocalCache: %v", err)
}
}
// Arm the trap: tiny memory limit so the next call enters the eviction loop.
c.mu.Lock()
c.config.MaxMemoryBytes = 1
c.mu.Unlock()
done := make(chan struct{})
go func() {
_ = c.updateLocalCache(key, "v", time.Hour) // triggers the eviction loop
close(done)
}()
select {
case <-done:
// fix present: loop made forward progress and returned
case <-time.After(2 * time.Second):
t.Fatal("INFINITE LOOP: eviction loop did not terminate within 2s (orphan whose key was deleted is never removed from lruList)")
}
}
@@ -0,0 +1 @@
.docs
+36
View File
@@ -0,0 +1,36 @@
version: "2"
run:
timeout: 2m
linters:
default: none
enable:
- bodyclose
- errcheck
- errorlint
- gocritic
- gocyclo
- govet
- ineffassign
- misspell
- prealloc
- revive
- staticcheck
- unconvert
- unused
settings:
gocyclo:
min-complexity: 12
revive:
rules:
- name: var-naming
- name: indent-error-flow
- name: superfluous-else
- name: unused-parameter
- name: redefines-builtin-id
formatters:
enable:
- gofmt
- goimports
+42
View File
@@ -0,0 +1,42 @@
# Configuration for lukaszraczylo/semver-generator.
# Reference: https://github.com/lukaszraczylo/semver-generator
#
# Word matching is fuzzy + case-insensitive. The keywords below mirror the
# Conventional Commits prefixes used in this repo's git history. Same pattern
# as github.com/lukaszraczylo/go-telegram/.semver.yaml.
version: 1
# Respect existing v* tags as the version baseline. semver-generator finds
# the highest existing tag and bumps from there. With no tags yet, the first
# release computes from the empty base.
force:
existing: true
# Skip merge commits and machine-generated traffic that would otherwise
# spuriously bump the version.
blacklist:
- "Merge branch"
- "Merge pull request"
- "Merge remote-tracking branch"
- "go mod tidy"
wording:
patch:
- "fix"
- "chore"
- "docs"
- "test"
- "style"
- "refactor"
- "build"
- "ci"
- "perf"
minor:
- "feat"
major:
# Match only the canonical Conventional Commits trailer. The bare word
# "breaking" is too greedy under semver-generator's fuzzy match — it
# triggers on substrings inside a commit body and wrongly produces a
# major bump.
- "BREAKING CHANGE"
+122
View File
@@ -0,0 +1,122 @@
# oss-telemetry
A tiny Go client that fires one anonymous "this binary started" ping at a
central ingest endpoint. Designed to be embedded in your own open-source
projects so you can see approximate adoption and version spread without
collecting anything that could identify a user.
This is the **client library only**. The ingest endpoint, server-side
deduplication, rate limiting, and metrics are out of scope here.
## What it sends
A single HTTP `POST` per call to `Send`:
```json
{
"project": "my-tool",
"version": "1.2.3",
"ts": 1747782200
}
```
No identifiers, no IP, no machine info, no user data. The server dedupes
incoming requests; the client just fires and forgets.
## Failproof by design
- Never blocks the caller — work runs in a goroutine.
- Never panics — the goroutine recovers internally.
- Never returns errors — bad input and network failures are silently dropped.
- Never retries, never persists state, never reads back.
- 2-second hard timeout on every request.
- Zero third-party dependencies (Go stdlib only).
The endpoint is hardcoded and not overridable from consuming code, by design.
## Install
```bash
go get github.com/lukaszraczylo/oss-telemetry
```
Requires Go 1.22+.
## Usage
```go
package main
import (
"time"
telemetry "github.com/lukaszraczylo/oss-telemetry"
)
const version = "1.2.3"
func main() {
telemetry.Send("my-tool", version)
// ... your program runs ...
// Only needed for short-lived CLIs that may exit before the goroutine
// finishes its POST. Long-running services do not need this.
telemetry.Wait(2 * time.Second)
}
```
Call `Send` once at boot. Calling it more often just sends more pings; the
server deduplicates.
## Disabling telemetry
If you ship a binary that imports this library, link your users to this
section (`https://github.com/lukaszraczylo/oss-telemetry#disabling-telemetry`)
so they can find the opt-out paths.
Any one of these turns it off:
| Mechanism | How |
| ---------------------------------------- | ---------------------------------------------------------------- |
| Universal opt-out | `DO_NOT_TRACK=1` |
| Library-wide opt-out | `OSS_TELEMETRY_DISABLED=1` |
| Project-specific opt-out | `<UPPER_PROJECT>_DISABLE_TELEMETRY=1` |
| Programmatic (e.g. behind a `--no-telemetry` flag) | `telemetry.Disable()` before the first `Send` |
Project-specific env var derivation: uppercase the project name and replace
`-` with `_`. For `my-tool` the variable is `MY_TOOL_DISABLE_TELEMETRY`.
Truthy values: `1`, `true`, `yes`, `on` (case-insensitive). Anything else is
treated as "not set".
## Validation rules (silently dropped if violated)
- `project`: matches `^[a-z0-9-]+$`, length 164.
- `version`: matches `^[A-Za-z0-9.+_-]+$`, length 132.
Bad input is a no-op — the library never logs, never errors, never crashes.
## API
```go
// Fire a single ping in the background. Returns immediately.
func Send(project, version string)
// Suppress all subsequent Send calls in this process. Idempotent.
func Disable()
// Block until in-flight pings complete or timeout elapses, whichever first.
// Useful for short-lived CLI processes.
func Wait(timeout time.Duration)
```
## Testing
```bash
go test -race ./...
```
## License
Pick one before publishing. None bundled.
+367
View File
@@ -0,0 +1,367 @@
// Package telemetry sends anonymous usage pings for open-source Go projects.
//
// Wire format (POST application/json):
//
// {"project":"<name>","version":"<ver>","ts":<unix-seconds>}
//
// Design contract (failproof):
// - never blocks the caller (work happens in a goroutine)
// - never panics (background goroutine recovers internally)
// - never returns errors (silently no-ops on bad input or network failure)
// - never retries, never deduplicates, never persists state — the client
// fires a single ping and forgets; the server is responsible for
// deduplication, abuse protection, and aggregation
//
// Typical usage at program startup:
//
// telemetry.Send("my-tool", "1.2.3")
//
// For short-lived CLI processes that may exit before the goroutine finishes:
//
// telemetry.Send("my-tool", "1.2.3")
// defer telemetry.Wait(2 * time.Second)
//
// Disablement (any one of these suppresses pings):
// - environment variable DO_NOT_TRACK=1
// - environment variable OSS_TELEMETRY_DISABLED=1
// - environment variable <UPPER_PROJECT>_DISABLE_TELEMETRY=1
// (project name uppercased, dashes replaced with underscores)
// - calling telemetry.Disable() at runtime
package telemetry
import (
"bytes"
"context"
"net/http"
"os"
"runtime/debug"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
)
const (
defaultEndpoint = "https://oss.raczylo.com/v1/ping"
httpTimeout = 2 * time.Second
maxProjectLen = 64
maxVersionLen = 32
)
// Yaegi note: this package is consumed by the traefikoidc Traefik plugin, which
// Traefik interprets with Yaegi (it vendors and interprets dependency source).
// It therefore avoids generic stdlib types (atomic.Pointer[T], atomic.Bool) and
// range-over-int (Go 1.22), which some Traefik/Yaegi runtimes cannot interpret.
// Endpoint mutation uses a mutex-guarded string; the disabled flag uses the
// function-based sync/atomic int32 API (atomic.LoadInt32/StoreInt32).
var (
// endpointURL holds the ingest URL. Production code never mutates it; the
// setter exists only so the test suite can retarget it at httptest servers
// while goroutines started by Send are still in flight.
endpointMu sync.RWMutex
endpointURL = defaultEndpoint
disabled int32 // 0 = enabled, 1 = disabled; accessed via sync/atomic only
inflight sync.WaitGroup
client = &http.Client{Timeout: httpTimeout}
)
func currentEndpoint() string {
endpointMu.RLock()
defer endpointMu.RUnlock()
return endpointURL
}
func setEndpointURL(u string) {
endpointMu.Lock()
endpointURL = u
endpointMu.Unlock()
}
// Send fires a single anonymous telemetry ping in the background and returns
// immediately. It never blocks, never panics, and never reports errors.
// Invalid inputs, disabled state, and network failures are silently dropped.
//
// Version strings are validated against a SemVer-ish shape that mirrors the
// receiver. An optional leading "v" or "V" is accepted and stripped before
// transmission so that callers can pass either "v1.2.3" or "1.2.3"; the
// wire form is always the unprefixed canonical version.
//
// Call once at program startup. Calling repeatedly will send repeated pings;
// the server is responsible for deduplication.
func Send(project, version string) {
if atomic.LoadInt32(&disabled) != 0 {
return
}
if isDisabledByEnv(project) {
return
}
if !validProject(project) || !validVersion(version) {
return
}
canonical := normalizeVersion(version)
inflight.Add(1)
go func() {
defer inflight.Done()
defer func() { _ = recover() }()
dispatch(project, canonical)
}()
}
// SendForModule is the recommended call form for Go libraries: it resolves
// the version automatically from Go's build info for the given module path
// so consumers do not need to maintain a hand-bumped version constant in
// source. Behaviour and contract are otherwise identical to [Send].
//
// Resolution order:
//
// 1. debug.ReadBuildInfo Deps entry for modulePath (authoritative when the
// library is consumed via go.mod);
// 2. debug.ReadBuildInfo Main when the library is itself the main module
// (e.g. running its own tests or examples);
// 3. fallback parameter, used only when build info is unavailable or
// unhelpful (replace directives, detached `go run`, ldflag override).
//
// Any leading "v" reported by build info is stripped to match the canonical
// wire form. Empty / "(devel)" build versions are skipped in favour of the
// next resolution source. Typical usage:
//
// telemetry.SendForModule("my-tool", "github.com/me/my-tool", "0.0.0-dev")
func SendForModule(project, modulePath, fallback string) {
Send(project, ResolveModuleVersion(modulePath, fallback))
}
// ResolveModuleVersion implements the version resolution used by
// SendForModule. Exposed for callers that need to format the resolved
// version (e.g. logging) without firing a ping.
func ResolveModuleVersion(modulePath, fallback string) string {
if info, ok := debug.ReadBuildInfo(); ok {
for _, d := range info.Deps {
if d != nil && d.Path == modulePath && isUsableBuildVersion(d.Version) {
return strings.TrimPrefix(d.Version, "v")
}
}
if info.Main.Path == modulePath && isUsableBuildVersion(info.Main.Version) {
return strings.TrimPrefix(info.Main.Version, "v")
}
}
return fallback
}
func isUsableBuildVersion(v string) bool {
return v != "" && v != "(devel)"
}
// Disable suppresses all subsequent Send calls in this process.
// Idempotent and safe to call from any goroutine.
func Disable() {
atomic.StoreInt32(&disabled, 1)
}
// Wait blocks until all in-flight pings have completed, or until timeout
// elapses — whichever comes first. Useful for short-lived CLI processes
// that may otherwise exit before the background goroutine finishes its POST.
//
// A non-positive timeout returns immediately.
func Wait(timeout time.Duration) {
if timeout <= 0 {
return
}
done := make(chan struct{})
go func() {
inflight.Wait()
close(done)
}()
select {
case <-done:
case <-time.After(timeout):
}
}
func dispatch(project, version string) {
body := buildPayload(project, version, time.Now().Unix())
ctx, cancel := context.WithTimeout(context.Background(), httpTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, currentEndpoint(), bytes.NewReader(body))
if err != nil {
return
}
req.Header.Set("Content-Type", "application/json")
resp, err := client.Do(req)
if err != nil {
return
}
_ = resp.Body.Close()
}
// buildPayload writes the JSON body without encoding/json. The validators
// restrict project and version to characters that never require JSON
// escaping, so direct concatenation is safe.
func buildPayload(project, version string, ts int64) []byte {
// Wrapper text plus 20 chars for a signed int64.
const overhead = len(`{"project":"","version":"","ts":}`) + 20
buf := make([]byte, 0, len(project)+len(version)+overhead)
buf = append(buf, `{"project":"`...)
buf = append(buf, project...)
buf = append(buf, `","version":"`...)
buf = append(buf, version...)
buf = append(buf, `","ts":`...)
buf = strconv.AppendInt(buf, ts, 10)
buf = append(buf, '}')
return buf
}
func validProject(p string) bool {
n := len(p)
if n == 0 || n > maxProjectLen {
return false
}
for i := 0; i < n; i++ {
c := p[i]
switch {
case c >= 'a' && c <= 'z',
c >= '0' && c <= '9',
c == '-':
default:
return false
}
}
return true
}
// validVersion accepts SemVer-ish version strings with an optional leading
// "v"/"V" prefix. Acceptable shape (after stripping the leading v):
//
// MAJOR[.MINOR[.PATCH]] ("-"prerelease)? ("+"build)?
//
// where MAJOR/MINOR/PATCH are ASCII digit sequences and the prerelease/build
// payloads are non-empty runs of [0-9A-Za-z.-]. This intentionally mirrors
// the receiver's version regex so junk like "dev" or "git-2026-05-22" never
// leaves the client (where it would only be rejected with HTTP 400 anyway).
func validVersion(v string) bool {
n := len(v)
if n == 0 || n > maxVersionLen {
return false
}
if v[0] == 'v' || v[0] == 'V' {
v = v[1:]
}
if len(v) == 0 {
return false
}
return checkSemverShape(v)
}
// normalizeVersion strips an optional leading "v"/"V" so the on-the-wire
// version matches the form stored server-side by the version refresher cron
// (which also strips the leading v from release tags). Callers may pass
// either "v1.2.3" or "1.2.3" — only the unprefixed form is transmitted.
func normalizeVersion(v string) string {
if len(v) > 0 && (v[0] == 'v' || v[0] == 'V') {
return v[1:]
}
return v
}
func checkSemverShape(s string) bool {
i := 0
if !readDigitRun(s, &i) {
return false
}
for groups := 0; groups < 2 && i < len(s) && s[i] == '.'; groups++ {
i++
if !readDigitRun(s, &i) {
return false
}
}
if i < len(s) && s[i] == '-' {
i++
if !readIdentRun(s, &i, '+') {
return false
}
}
if i < len(s) && s[i] == '+' {
i++
if !readIdentRun(s, &i, 0) {
return false
}
}
return i == len(s)
}
func readDigitRun(s string, i *int) bool {
start := *i
for *i < len(s) && s[*i] >= '0' && s[*i] <= '9' {
*i++
}
return *i > start
}
// readIdentRun consumes [0-9A-Za-z.-] until end-of-string or until `stop`
// is hit (stop=0 disables the early-stop check). Returns false if no
// characters were consumed (i.e. empty payload).
func readIdentRun(s string, i *int, stop byte) bool {
start := *i
for *i < len(s) {
c := s[*i]
if stop != 0 && c == stop {
break
}
valid := (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
c == '.' || c == '-'
if !valid {
return false
}
*i++
}
return *i > start
}
func isDisabledByEnv(project string) bool {
if truthy(os.Getenv("DO_NOT_TRACK")) {
return true
}
if truthy(os.Getenv("OSS_TELEMETRY_DISABLED")) {
return true
}
if project == "" {
return false
}
key := projectEnvKey(project)
return truthy(os.Getenv(key))
}
// projectEnvKey returns "<UPPER_PROJECT>_DISABLE_TELEMETRY" using a single
// allocation rather than chained strings.ToUpper(strings.ReplaceAll(...)).
func projectEnvKey(project string) string {
const suffix = "_DISABLE_TELEMETRY"
buf := make([]byte, 0, len(project)+len(suffix))
for i := 0; i < len(project); i++ {
c := project[i]
switch {
case c == '-':
c = '_'
case c >= 'a' && c <= 'z':
c -= 'a' - 'A'
}
buf = append(buf, c)
}
buf = append(buf, suffix...)
return string(buf)
}
func truthy(s string) bool {
switch strings.ToLower(strings.TrimSpace(s)) {
case "1", "true", "yes", "on":
return true
}
return false
}
+3
View File
@@ -24,6 +24,9 @@ github.com/gorilla/securecookie
# github.com/gorilla/sessions v1.3.0
## explicit; go 1.20
github.com/gorilla/sessions
# github.com/lukaszraczylo/oss-telemetry v0.2.3
## explicit; go 1.22
github.com/lukaszraczylo/oss-telemetry
# github.com/pmezard/go-difflib v1.0.0
## explicit
github.com/pmezard/go-difflib/difflib
+17
View File
@@ -0,0 +1,17 @@
package traefikoidc
// devPluginVersion is the placeholder carried by source-tree / local / test
// builds. Telemetry is suppressed while the plugin still reports this sentinel,
// so only stamped release builds emit a "plugin loaded" ping.
const devPluginVersion = "0.0.0-dev"
// traefikoidcPluginVersion is the released version of this plugin. It is stamped
// at release time by ./workflow-prepare.sh (invoked by the shared go-release
// workflow before GoReleaser builds and tags), which rewrites the string below
// to the computed semver.
//
// Traefik runs this plugin under Yaegi, where the version cannot be resolved
// from build info at runtime (debug.ReadBuildInfo sees Traefik's build graph,
// not the interpreted plugin). This build-stamped constant is therefore the
// single source of truth for the version reported by anonymous usage telemetry.
const traefikoidcPluginVersion = "0.0.0-dev"
+67
View File
@@ -0,0 +1,67 @@
#!/usr/bin/env bash
#
# workflow-prepare.sh — stamp the release version into version.go at build time.
#
# The shared go-release workflow (lukaszraczylo/shared-actions go-release.yaml)
# runs this script, if present, from the repository root BEFORE GoReleaser
# builds and tags. Traefik runs this plugin under Yaegi, where the version
# cannot be resolved from build info at runtime, so the released semver must be
# baked into source here.
#
# Version source — first non-empty wins:
# $VERSION $VERSION_TAG $SEMVER $NEW_VERSION $RELEASE_VERSION
# A leading "v"/"V" is stripped.
#
# NOTE: go-release.yaml @main does not yet pass the computed version into this
# step's environment. Add it to the "Run workflow prepare script" step, e.g.:
# env:
# VERSION: ${{ needs.version.outputs.version }} # bare, no leading v
#
# The shared workflow runs this script in its test, version AND release jobs,
# but only the release job has a computed version. So a missing version is a
# no-op (leave the dev sentinel) — NOT a hard failure, otherwise the test/version
# jobs would break. A malformed version that IS provided is a hard error. Wire
# the env only on the release job's prepare step (see header note above).
set -euo pipefail
FILE="version.go"
CONST="traefikoidcPluginVersion"
VER="${VERSION:-${VERSION_TAG:-${SEMVER:-${NEW_VERSION:-${RELEASE_VERSION:-}}}}}"
VER="${VER#v}"
VER="${VER#V}"
if [ -z "$VER" ]; then
if [ "${GITHUB_ACTIONS:-}" = "true" ]; then
echo "workflow-prepare: WARNING no version provided; leaving ${FILE} at the dev placeholder. If this is the release build, set 'env: VERSION: \${{ needs.version.outputs.version }}' on the release job's prepare step — otherwise the release ships 0.0.0-dev and emits no telemetry." >&2
else
echo "workflow-prepare: no version provided; leaving dev placeholder in ${FILE} (local build)"
fi
exit 0
fi
# Accept MAJOR[.MINOR[.PATCH]] with optional -prerelease / +build (semver-ish,
# matching the oss-telemetry receiver's validator).
if ! printf '%s' "$VER" | grep -Eq '^[0-9]+(\.[0-9]+){0,2}(-[0-9A-Za-z.-]+)?(\+[0-9A-Za-z.-]+)?$'; then
echo "workflow-prepare: ERROR version '${VER}' is not semver-shaped" >&2
exit 1
fi
if [ ! -f "$FILE" ]; then
echo "workflow-prepare: ERROR ${FILE} not found (run from repository root)" >&2
exit 1
fi
# Rewrite only the value of ${CONST}, anchored on the constant name so the
# sibling devPluginVersion sentinel is left untouched.
tmp="$(mktemp)"
sed -E "s/(${CONST}[[:space:]]*=[[:space:]]*\")[^\"]*(\")/\1${VER}\2/" "$FILE" > "$tmp"
mv "$tmp" "$FILE"
if ! grep -Eq "${CONST}[[:space:]]*=[[:space:]]*\"${VER}\"" "$FILE"; then
echo "workflow-prepare: ERROR failed to stamp version into ${FILE}" >&2
exit 1
fi
command -v gofmt >/dev/null 2>&1 && gofmt -w "$FILE"
echo "workflow-prepare: stamped ${CONST} = \"${VER}\" in ${FILE}"