fixup! chore: update marketplace for v0.11.37

march-improvements
2026-06-05 23:03:55 +00:00 · 2026-03-06 15:39:52 +00:00
parent 1a6f6b6e5e
commit 77f5f02510
32 changed files with 2404 additions and 2778 deletions
@@ -108,6 +108,9 @@ build-windows:
 # Stop any running worker
 stop-worker:
 	@echo "Stopping worker..."
+	@-pkill -TERM -f 'claude-mnemonic.*worker' 2>/dev/null || true
+	@-pkill -TERM -f '\.claude/plugins/.*/worker' 2>/dev/null || true
+	@sleep 1
 	@-pkill -9 -f 'claude-mnemonic.*worker' 2>/dev/null || true
 	@-pkill -9 -f '\.claude/plugins/.*/worker' 2>/dev/null || true
 	@-lsof -ti :37777 | xargs kill -9 2>/dev/null || true
@@ -135,6 +138,10 @@ restart-worker: stop-worker start-worker
 # Install to Claude plugins directory
 install: build stop-worker
 	@echo "Installing to Claude plugins directory..."
+	@# Verify build output binaries exist
+	@test -f $(BUILD_DIR)/worker || { echo "ERROR: $(BUILD_DIR)/worker not found. Build may have failed."; exit 1; }
+	@test -f $(BUILD_DIR)/mcp-server || { echo "ERROR: $(BUILD_DIR)/mcp-server not found. Build may have failed."; exit 1; }
+	@test -d $(BUILD_DIR)/hooks || { echo "ERROR: $(BUILD_DIR)/hooks not found. Build may have failed."; exit 1; }
 	@# Install to marketplaces directory (for direct installs)
 	@mkdir -p $(HOME)/.claude/plugins/marketplaces/claude-mnemonic/hooks
 	@mkdir -p $(HOME)/.claude/plugins/marketplaces/claude-mnemonic/.claude-plugin
@@ -2,8 +2,10 @@
 package main

 import (
+	"context"
 	"fmt"
 	"os"
+	"time"

 	"github.com/lukaszraczylo/claude-mnemonic/pkg/hooks"
 )
@@ -51,6 +53,10 @@ var skipTools = map[string]bool{
 }

 func main() {
+	if !hooks.IsWorkerAvailable() {
+		hooks.WriteResponse("PostToolUse", true)
+		return
+	}
 	hooks.RunHook("PostToolUse", handlePostToolUse)
 }

@@ -63,8 +69,16 @@ func handlePostToolUse(ctx *hooks.HookContext, input *Input) (string, error) {

 	fmt.Fprintf(os.Stderr, "[post-tool-use] %s\n", input.ToolName)

-	// Send observation to worker
-	_, err := hooks.POST(ctx.Port, "/api/sessions/observations", map[string]interface{}{
+	// Fire-and-forget: send the observation without waiting for the response.
+	// The worker just queues it -- we don't need the response data.
+	// Use a short-lived context to ensure the request body is at least sent
+	// before this process exits.
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		sendCtx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
+		defer cancel()
+		_ = hooks.POSTWithContext(sendCtx, ctx.Port, "/api/sessions/observations", map[string]interface{}{
 			"claudeSessionId": ctx.SessionID,
 			"project":         ctx.Project,
 			"tool_name":       input.ToolName,
@@ -72,6 +86,14 @@ func handlePostToolUse(ctx *hooks.HookContext, input *Input) (string, error) {
 			"tool_response":   input.ToolResponse,
 			"cwd":             ctx.CWD,
 		})
+	}()

-	return "", err
+	// Wait briefly for the TCP connection to be established and request sent,
+	// but don't block the hook for the full response.
+	select {
+	case <-done:
+	case <-time.After(100 * time.Millisecond):
+	}
+
+	return "", nil
 }
@@ -6,6 +6,7 @@ import (
 	"net/url"
 	"os"
 	"strings"
+	"time"

 	"github.com/lukaszraczylo/claude-mnemonic/pkg/hooks"
 )
@@ -27,10 +28,17 @@ type Observation struct {
 }

 func main() {
+	if !hooks.IsWorkerAvailable() {
+		hooks.WriteResponse("SessionStart", true)
+		return
+	}
 	hooks.RunHook("SessionStart", handleSessionStart)
 }

 func handleSessionStart(ctx *hooks.HookContext, input *Input) (string, error) {
+	deadline, cancel := hooks.HookDeadline(30 * time.Second)
+	defer cancel()
+
 	// Fetch observations for context injection
 	endpoint := fmt.Sprintf("/api/context/inject?project=%s&cwd=%s",
 		url.QueryEscape(ctx.Project),
@@ -59,12 +67,21 @@ func handleSessionStart(ctx *hooks.HookContext, input *Input) (string, error) {
 	fmt.Fprintf(os.Stderr, "[claude-mnemonic] Injecting %d observations from project memory (%d detailed, %d condensed)\n",
 		len(obsData), min(fullCount, len(obsData)), max(0, len(obsData)-fullCount))

+	// Token budget for context injection
+	maxTokens := 16000 // default; could be made configurable via worker config endpoint
+	currentTokens := 0
+
 	// Build context string
-	contextBuilder := "<claude-mnemonic-context>\n"
-	contextBuilder += fmt.Sprintf("# Project Memory (%d observations)\n", len(obsData))
-	contextBuilder += "Use this knowledge to answer questions without re-exploring the codebase.\n\n"
+	header := fmt.Sprintf("<claude-mnemonic-context>\n# Project Memory (%d observations)\nUse this knowledge to answer questions without re-exploring the codebase.\n\n", len(obsData))
+	currentTokens += estimateTokens(header)
+	contextBuilder := header

 	for i, o := range obsData {
+		if deadline.Err() != nil {
+			contextBuilder += "\n... (returning early due to time limit)\n"
+			break
+		}
+
 		obs, ok := o.(map[string]interface{})
 		if !ok {
 			continue
@@ -73,40 +90,94 @@ func handleSessionStart(ctx *hooks.HookContext, input *Input) (string, error) {
 		title := getString(obs, "title")
 		obsType := getString(obs, "type")

+		var obsText string
+
 		// First `fullCount` observations get full detail, rest are condensed
 		if i < fullCount {
 			// Full detail: include narrative and facts
 			narrative := getString(obs, "narrative")

-			contextBuilder += fmt.Sprintf("## %d. [%s] %s\n", i+1, strings.ToUpper(obsType), title)
+			obsText = fmt.Sprintf("## %d. [%s] %s\n", i+1, strings.ToUpper(obsType), title)
 			if narrative != "" {
-				contextBuilder += narrative + "\n"
+				obsText += narrative + "\n"
 			}

 			if facts, ok := obs["facts"].([]interface{}); ok && len(facts) > 0 {
-				contextBuilder += "Key facts:\n"
+				obsText += "Key facts:\n"
 				for _, f := range facts {
 					if fact, ok := f.(string); ok && fact != "" {
-						contextBuilder += fmt.Sprintf("- %s\n", fact)
+						obsText += fmt.Sprintf("- %s\n", fact)
 					}
 				}
 			}
-			contextBuilder += "\n"
+			obsText += "\n"
 		} else {
 			// Condensed: just title and subtitle (one line)
 			subtitle := getString(obs, "subtitle")
 			if subtitle != "" {
-				contextBuilder += fmt.Sprintf("- [%s] %s: %s\n", strings.ToUpper(obsType), title, subtitle)
+				obsText = fmt.Sprintf("- [%s] %s: %s\n", strings.ToUpper(obsType), title, subtitle)
 			} else {
-				contextBuilder += fmt.Sprintf("- [%s] %s\n", strings.ToUpper(obsType), title)
+				obsText = fmt.Sprintf("- [%s] %s\n", strings.ToUpper(obsType), title)
 			}
 		}
+
+		obsTokens := estimateTokens(obsText)
+		if currentTokens+obsTokens > maxTokens {
+			contextBuilder += fmt.Sprintf("\n... (%d more observations omitted due to token budget)\n", len(obsData)-i)
+			break
+		}
+
+		contextBuilder += obsText
+		currentTokens += obsTokens
 	}

 	contextBuilder += "</claude-mnemonic-context>\n"
 	return contextBuilder, nil
 }

+// estimateTokens provides a more accurate token count estimate.
+// Uses word count * 1.3 as base, with adjustments for code and non-ASCII.
+func estimateTokens(s string) int {
+	if len(s) == 0 {
+		return 0
+	}
+
+	// Count words (split on whitespace)
+	words := len(strings.Fields(s))
+	if words == 0 {
+		// No whitespace = probably a single token or code blob
+		return (len(s) + 3) / 4
+	}
+
+	// Base estimate: ~1.3 tokens per word for English text
+	estimate := int(float64(words) * 1.3)
+
+	// Detect code-heavy content (high non-alpha ratio)
+	nonAlpha := 0
+	nonASCII := 0
+	for _, r := range s {
+		if r > 127 {
+			nonASCII++
+		} else if !('a' <= r && r <= 'z') && !('A' <= r && r <= 'Z') && !('0' <= r && r <= '9') && r != ' ' {
+			nonAlpha++
+		}
+	}
+
+	totalChars := len(s)
+
+	// Code adjustment: more special chars = more tokens per word
+	if totalChars > 0 && float64(nonAlpha)/float64(totalChars) > 0.15 {
+		estimate = int(float64(estimate) * 1.3)
+	}
+
+	// Non-ASCII adjustment: CJK and other scripts use more tokens
+	if totalChars > 0 && float64(nonASCII)/float64(totalChars) > 0.1 {
+		estimate += nonASCII // Roughly 1 extra token per non-ASCII char
+	}
+
+	return estimate
+}
+
 func getString(m map[string]interface{}, key string) string {
 	if v, ok := m[key].(string); ok {
 		return v
@@ -117,7 +117,7 @@ func getWorkerStats(port int, project string) *WorkerStats {
 	if err != nil {
 		return nil
 	}
-	defer resp.Body.Close()
+	defer func() { _ = resp.Body.Close() }()

 	if resp.StatusCode != http.StatusOK {
 		return nil
@@ -5,12 +5,16 @@ import (
 	"bufio"
 	"encoding/json"
 	"fmt"
+	"io"
 	"os"
 	"strings"
+	"time"

 	"github.com/lukaszraczylo/claude-mnemonic/pkg/hooks"
 )

+var debug = os.Getenv("CLAUDE_MNEMONIC_DEBUG") != ""
+
 // Input is the hook input from Claude Code.
 type Input struct {
 	hooks.BaseInput
@@ -62,7 +66,19 @@ func parseTranscript(path string) (lastUser, lastAssistant string) {
 	if err != nil {
 		return "", ""
 	}
-	defer file.Close()
+	defer func() { _ = file.Close() }()
+
+	// For large transcripts, seek to the last 256KB for efficiency.
+	// We only need the last user/assistant messages, not the entire history.
+	const tailSize = 256 * 1024
+	info, err := file.Stat()
+	if err == nil && info.Size() > tailSize {
+		if _, seekErr := file.Seek(-tailSize, io.SeekEnd); seekErr == nil {
+			// Discard partial first line after seek
+			discardScanner := bufio.NewScanner(file)
+			discardScanner.Scan()
+		}
+	}

 	scanner := bufio.NewScanner(file)
 	// Increase buffer size for large messages
@@ -97,12 +113,20 @@ func parseTranscript(path string) (lastUser, lastAssistant string) {
 }

 func main() {
+	if !hooks.IsWorkerAvailable() {
+		hooks.WriteResponse("Stop", true)
+		return
+	}
 	hooks.RunHook("Stop", handleStop)
 }

 func handleStop(ctx *hooks.HookContext, input *Input) (string, error) {
-	// Debug: dump raw input
+	deadline, cancel := hooks.HookDeadline(30 * time.Second)
+	defer cancel()
+
+	if debug {
 		fmt.Fprintf(os.Stderr, "[stop] Raw input: %s\n", string(ctx.RawInput))
+	}

 	// Find session
 	result, err := hooks.GET(ctx.Port, fmt.Sprintf("/api/sessions?claudeSessionId=%s", ctx.SessionID))
@@ -122,7 +146,15 @@ func handleStop(ctx *hooks.HookContext, input *Input) (string, error) {
 		lastUser, lastAssistant = parseTranscript(input.TranscriptPath)
 	}

-	// Debug: log what we extracted
+	// Truncate messages to avoid sending excessive data to the worker
+	if len(lastAssistant) > 10000 {
+		lastAssistant = lastAssistant[:10000]
+	}
+	if len(lastUser) > 5000 {
+		lastUser = lastUser[:5000]
+	}
+
+	if debug {
 		fmt.Fprintf(os.Stderr, "[stop] Transcript path: %s\n", input.TranscriptPath)
 		fmt.Fprintf(os.Stderr, "[stop] Last user message length: %d\n", len(lastUser))
 		fmt.Fprintf(os.Stderr, "[stop] Last assistant message length: %d\n", len(lastAssistant))
@@ -134,6 +166,13 @@ func handleStop(ctx *hooks.HookContext, input *Input) (string, error) {
 			fmt.Fprintf(os.Stderr, "[stop] Last assistant preview: %s\n", preview)
 		}
 		fmt.Fprintf(os.Stderr, "[stop] Requesting summary for session %d (transcript: %v)\n", int64(sessionID), input.TranscriptPath != "")
+	}
+
+	// Check deadline before expensive summary request
+	if deadline.Err() != nil {
+		fmt.Fprintf(os.Stderr, "[stop] Returning early due to time limit\n")
+		return "", nil
+	}

 	// Request summary with message context from transcript
 	_, err = hooks.POST(ctx.Port, fmt.Sprintf("/sessions/%d/summarize", int64(sessionID)), map[string]interface{}{
@@ -16,6 +16,10 @@ type Input struct {
 }

 func main() {
+	if !hooks.IsWorkerAvailable() {
+		hooks.WriteResponse("SubagentStop", true)
+		return
+	}
 	hooks.RunHook("SubagentStop", handleSubagentStop)
 }

@@ -5,6 +5,9 @@ import (
 	"fmt"
 	"net/url"
 	"os"
+	"strings"
+	"sync"
+	"time"

 	"github.com/lukaszraczylo/claude-mnemonic/pkg/hooks"
 )
@@ -15,31 +18,117 @@ type Input struct {
 	Prompt string `json:"prompt"`
 }

+// estimateTokens provides a more accurate token count estimate.
+// Uses word count * 1.3 as base, with adjustments for code and non-ASCII.
+func estimateTokens(s string) int {
+	if len(s) == 0 {
+		return 0
+	}
+
+	// Count words (split on whitespace)
+	words := len(strings.Fields(s))
+	if words == 0 {
+		// No whitespace = probably a single token or code blob
+		return (len(s) + 3) / 4
+	}
+
+	// Base estimate: ~1.3 tokens per word for English text
+	estimate := int(float64(words) * 1.3)
+
+	// Detect code-heavy content (high non-alpha ratio)
+	nonAlpha := 0
+	nonASCII := 0
+	for _, r := range s {
+		if r > 127 {
+			nonASCII++
+		} else if !('a' <= r && r <= 'z') && !('A' <= r && r <= 'Z') && !('0' <= r && r <= '9') && r != ' ' {
+			nonAlpha++
+		}
+	}
+
+	totalChars := len(s)
+
+	// Code adjustment: more special chars = more tokens per word
+	if totalChars > 0 && float64(nonAlpha)/float64(totalChars) > 0.15 {
+		estimate = int(float64(estimate) * 1.3)
+	}
+
+	// Non-ASCII adjustment: CJK and other scripts use more tokens
+	if totalChars > 0 && float64(nonASCII)/float64(totalChars) > 0.1 {
+		estimate += nonASCII // Roughly 1 extra token per non-ASCII char
+	}
+
+	return estimate
+}
+
 func main() {
+	if !hooks.IsWorkerAvailable() {
+		hooks.WriteResponse("UserPromptSubmit", true)
+		return
+	}
 	hooks.RunHook("UserPromptSubmit", handleUserPrompt)
 }

 func handleUserPrompt(ctx *hooks.HookContext, input *Input) (string, error) {
-	// Search for relevant observations based on the prompt
+	deadline, cancel := hooks.HookDeadline(10 * time.Second)
+	defer cancel()
+
 	searchURL := fmt.Sprintf("/api/context/search?project=%s&query=%s&cwd=%s",
 		url.QueryEscape(ctx.Project),
 		url.QueryEscape(input.Prompt),
 		url.QueryEscape(ctx.CWD))

-	var contextToInject string
-	var observationCount int
+	// Run search and session init concurrently.
+	// Session init doesn't strictly depend on search results -- the observation
+	// count passed is approximate (0) and acceptable.
+	var (
+		wg               sync.WaitGroup
+		searchResult     map[string]interface{}
+		initResult       map[string]interface{}
+		initErr          error
+		contextToInject  string
+		observationCount int
+	)

-	searchResult, _ := hooks.GET(ctx.Port, searchURL)
+	// Start search in background
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		searchResult, _ = hooks.GET(ctx.Port, searchURL)
+	}()
+
+	// Start session init in parallel (with observationCount=0; approximate is fine)
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		initResult, initErr = hooks.POST(ctx.Port, "/api/sessions/init", map[string]interface{}{
+			"claudeSessionId":     ctx.SessionID,
+			"project":             ctx.Project,
+			"prompt":              input.Prompt,
+			"matchedObservations": 0,
+		})
+	}()
+
+	// Wait for both to complete
+	wg.Wait()
+
+	// Check deadline after network calls
+	if deadline.Err() != nil {
+		return "", nil
+	}
+
+	// Process search results
 	if observations, ok := searchResult["observations"].([]interface{}); ok && len(observations) > 0 {
-		// Results are already filtered by relevance threshold and capped by max_results
-		// from the server-side config (ContextRelevanceThreshold, ContextMaxPromptResults)
 		observationCount = len(observations)

-		// Build context from search results
+		// Token budget for prompt context injection
+		maxTokens := 8000
+		currentTokens := 0
+
+		header := "<relevant-memory>\n# Relevant Knowledge From Previous Sessions\nIMPORTANT: Use this information to answer the question directly. Do NOT explore the codebase if the answer is here.\n\n"
+		currentTokens += estimateTokens(header)
 		var contextBuilder string
-		contextBuilder = "<relevant-memory>\n"
-		contextBuilder += "# Relevant Knowledge From Previous Sessions\n"
-		contextBuilder += "IMPORTANT: Use this information to answer the question directly. Do NOT explore the codebase if the answer is here.\n\n"
+		contextBuilder = header

 		for i, obs := range observations {
 			if obsMap, ok := obs.(map[string]interface{}); ok {
@@ -52,24 +141,30 @@ func handleUserPrompt(ctx *hooks.HookContext, input *Input) (string, error) {
 					obsType = t
 				}

-				// Start observation block
-				contextBuilder += fmt.Sprintf("## %d. [%s] %s\n", i+1, obsType, title)
+				var obsText string
+				obsText = fmt.Sprintf("## %d. [%s] %s\n", i+1, obsType, title)

-				// Add facts first (most concise answers)
 				if facts, ok := obsMap["facts"].([]interface{}); ok && len(facts) > 0 {
-					contextBuilder += "Key facts:\n"
+					obsText += "Key facts:\n"
 					for _, fact := range facts {
 						if factStr, ok := fact.(string); ok {
-							contextBuilder += fmt.Sprintf("- %s\n", factStr)
+							obsText += fmt.Sprintf("- %s\n", factStr)
 						}
 					}
-					contextBuilder += "\n"
+					obsText += "\n"
 				}

-				// Add narrative if present
 				if narrative, ok := obsMap["narrative"].(string); ok && narrative != "" {
-					contextBuilder += narrative + "\n\n"
+					obsText += narrative + "\n\n"
 				}
+
+				obsTokens := estimateTokens(obsText)
+				if currentTokens+obsTokens > maxTokens {
+					break
+				}
+
+				contextBuilder += obsText
+				currentTokens += obsTokens
 			}
 		}

@@ -77,40 +172,24 @@ func handleUserPrompt(ctx *hooks.HookContext, input *Input) (string, error) {
 		contextToInject = contextBuilder
 	}

-	// Initialize session with matched observations count
-	result, err := hooks.POST(ctx.Port, "/api/sessions/init", map[string]interface{}{
-		"claudeSessionId":     ctx.SessionID,
-		"project":             ctx.Project,
-		"prompt":              input.Prompt,
-		"matchedObservations": observationCount,
-	})
-	if err != nil {
-		return "", err
+	// Check session init result
+	if initErr != nil {
+		return "", initErr
 	}

 	// Check if skipped due to privacy
-	if skipped, ok := result["skipped"].(bool); ok && skipped {
+	if skipped, ok := initResult["skipped"].(bool); ok && skipped {
 		fmt.Fprintf(os.Stderr, "[user-prompt] Session skipped (private)\n")
 		return "", nil
 	}

-	// Safely extract session ID and prompt number with type checking
-	sessionDbIdRaw, ok := result["sessionDbId"].(float64)
-	if !ok {
-		return "", fmt.Errorf("invalid or missing sessionDbId in response")
-	}
-	sessionID := int64(sessionDbIdRaw)
-
-	promptNumberRaw, ok := result["promptNumber"].(float64)
-	if !ok {
-		return "", fmt.Errorf("invalid or missing promptNumber in response")
-	}
-	promptNumber := int(promptNumberRaw)
+	sessionID := int64(initResult["sessionDbId"].(float64))
+	promptNumber := int(initResult["promptNumber"].(float64))

 	fmt.Fprintf(os.Stderr, "[user-prompt] Session %d, prompt #%d\n", sessionID, promptNumber)

-	// Start SDK agent
-	_, err = hooks.POST(ctx.Port, fmt.Sprintf("/sessions/%d/init", sessionID), map[string]interface{}{
+	// Start SDK agent (depends on session init result, so kept sequential)
+	_, err := hooks.POST(ctx.Port, fmt.Sprintf("/sessions/%d/init", sessionID), map[string]interface{}{
 		"userPrompt":   input.Prompt,
 		"promptNumber": promptNumber,
 	})
@@ -120,7 +199,6 @@ func handleUserPrompt(ctx *hooks.HookContext, input *Input) (string, error) {

 	// Return context if we found relevant observations
 	if observationCount > 0 {
-		// Show match count to user via stderr
 		fmt.Fprintf(os.Stderr, "[claude-mnemonic] Found %d relevant memories for this prompt\n", observationCount)
 		return contextToInject, nil
 	}
@@ -4,20 +4,16 @@ package main
 import (
 	"context"
 	"flag"
+	"fmt"
+	"net/http"
 	"os"
 	"os/signal"
 	"syscall"
 	"time"

 	"github.com/lukaszraczylo/claude-mnemonic/internal/config"
-	"github.com/lukaszraczylo/claude-mnemonic/internal/db/gorm"
-	"github.com/lukaszraczylo/claude-mnemonic/internal/embedding"
 	"github.com/lukaszraczylo/claude-mnemonic/internal/mcp"
-	"github.com/lukaszraczylo/claude-mnemonic/internal/scoring"
-	"github.com/lukaszraczylo/claude-mnemonic/internal/search"
-	"github.com/lukaszraczylo/claude-mnemonic/internal/vector/sqlitevec"
 	"github.com/lukaszraczylo/claude-mnemonic/internal/watcher"
-	"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
@@ -28,7 +24,6 @@ var Version = "dev"
 func main() {
 	// Parse flags
 	project := flag.String("project", "", "Project name (required)")
-	dataDir := flag.String("data-dir", "", "Data directory (default: ~/.claude-mnemonic)")
 	debug := flag.Bool("debug", false, "Enable debug logging")
 	flag.Parse()

@@ -43,23 +38,12 @@ func main() {
 		log.Fatal().Msg("--project is required")
 	}

-	// Ensure data directory and settings exist
-	if err := config.EnsureAll(); err != nil {
-		log.Fatal().Err(err).Msg("Failed to ensure data directories")
-	}
+	// Get worker port from config
+	port := config.GetWorkerPort()
+	workerURL := fmt.Sprintf("http://localhost:%d", port)

-	// Load config
-	cfg, err := config.Load()
-	if err != nil {
-		log.Warn().Err(err).Msg("Failed to load config, using defaults")
-		cfg = config.Default()
-	}
-
-	// Override data directory if specified
-	dbPath := cfg.DBPath
-	if *dataDir != "" {
-		dbPath = *dataDir + "/claude-mnemonic.db"
-	}
+	// Create HTTP client for worker
+	client := &http.Client{Timeout: 30 * time.Second}

 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -73,69 +57,12 @@ func main() {
 		cancel()
 	}()

-	// Initialize database store (migrations run automatically)
-	storeCfg := gorm.Config{
-		Path:     dbPath,
-		MaxConns: cfg.MaxConns,
-		// WALMode is enabled automatically by GORM
-	}
-	store, err := gorm.NewStore(storeCfg)
-	if err != nil {
-		log.Fatal().Err(err).Msg("Failed to initialize database store")
-	}
-	defer store.Close()
+	// Start file watchers for config changes
+	startWatchers()

-	// Initialize stores
-	observationStore := gorm.NewObservationStore(store, nil, nil, nil)
-	summaryStore := gorm.NewSummaryStore(store)
-	promptStore := gorm.NewPromptStore(store, nil)
-	patternStore := gorm.NewPatternStore(store)
-	relationStore := gorm.NewRelationStore(store)
-	sessionStore := gorm.NewSessionStore(store)
-
-	// Initialize embedding service and vector client
-	var vectorClient *sqlitevec.Client
-	embedSvc, err := embedding.NewService()
-	if err != nil {
-		log.Warn().Err(err).Msg("Embedding service unavailable, vector search disabled")
-	} else {
-		defer embedSvc.Close()
-		vectorClient, err = sqlitevec.NewClient(sqlitevec.Config{DB: store.GetRawDB()}, embedSvc)
-		if err != nil {
-			log.Warn().Err(err).Msg("Vector client unavailable, vector search disabled")
-		} else {
-			log.Info().Msg("Vector search enabled via sqlite-vec")
-		}
-	}
-
-	// Initialize scoring components
-	scoreConfig := models.DefaultScoringConfig()
-	scoreCalculator := scoring.NewCalculator(scoreConfig)
-	recalculator := scoring.NewRecalculator(observationStore, scoreCalculator, log.Logger)
-	go recalculator.Start(ctx)
-	defer recalculator.Stop()
-
-	// Initialize search manager
-	searchMgr := search.NewManager(observationStore, summaryStore, promptStore, vectorClient)
-
-	// Start file watchers
-	startWatchers(ctx, dbPath)
-
-	// Create and run MCP server with all dependencies
-	// Note: maintenanceService is nil because it runs in the worker process
-	server := mcp.NewServer(
-		searchMgr,
-		Version,
-		observationStore,
-		patternStore,
-		relationStore,
-		sessionStore,
-		vectorClient,
-		scoreCalculator,
-		recalculator,
-		nil, // maintenanceService - handled by worker
-	)
-	log.Info().Str("project", *project).Str("version", Version).Msg("Starting MCP server")
+	// Create and run MCP server
+	server := mcp.NewServer(client, workerURL, *project, Version)
+	log.Info().Str("project", *project).Str("version", Version).Str("worker", workerURL).Msg("Starting MCP server")

 	if err := server.Run(ctx); err != nil {
 		log.Fatal().Err(err).Msg("MCP server error")
@@ -143,7 +70,7 @@ func main() {
 }

 // startWatchers initializes file watchers for config.
-func startWatchers(ctx context.Context, dbPath string) {
+func startWatchers() {
 	// Watch config file for changes (triggers process exit for restart)
 	configPath := config.SettingsPath()
 	configWatcher, err := watcher.New(configPath, func() {
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"sync"
 )
@@ -52,6 +53,7 @@ type Config struct {
 	ContextRelevanceThreshold float64  `json:"context_relevance_threshold"`
 	RerankingCandidates       int      `json:"reranking_candidates"`
 	WorkerPort                int      `json:"worker_port"`
+	DeduplicationThreshold    float64  `json:"deduplication_threshold"`
 	RerankingMinImprovement   float64  `json:"reranking_min_improvement"`
 	ContextObservations       int      `json:"context_observations"`
 	ContextMaxPromptResults   int      `json:"context_max_prompt_results"`
@@ -64,10 +66,13 @@ type Config struct {
 	HubThreshold              int      `json:"hub_threshold"`
 	ObservationRetentionDays  int      `json:"observation_retention_days"`
 	MaintenanceIntervalHours  int      `json:"maintenance_interval_hours"`
+	ContextMaxTokensStartup   int      `json:"context_max_tokens_startup"`
+	ContextMaxTokensPrompt    int      `json:"context_max_tokens_prompt"`
 	ContextShowWorkTokens     bool     `json:"context_show_work_tokens"`
 	ContextShowReadTokens     bool     `json:"context_show_read_tokens"`
 	RerankingPureMode         bool     `json:"reranking_pure_mode"`
 	GraphEnabled              bool     `json:"graph_enabled"`
+	DeduplicationEnabled      bool     `json:"deduplication_enabled"`
 	MaintenanceEnabled        bool     `json:"maintenance_enabled"`
 	RerankingEnabled          bool     `json:"reranking_enabled"`
 	ContextShowLastSummary    bool     `json:"context_show_last_summary"`
@@ -168,6 +173,10 @@ func Default() *Config {
 		ContextObsConcepts:        DefaultObservationConcepts,
 		ContextRelevanceThreshold: 0.3,   // Minimum 30% similarity to include
 		ContextMaxPromptResults:   10,    // Cap at 10 results max (0 = no cap, threshold only)
+		ContextMaxTokensStartup:   16000, // Max tokens for SessionStart context injection
+		ContextMaxTokensPrompt:    8000,  // Max tokens for UserPromptSubmit context injection
+		DeduplicationEnabled:      true,  // Enable write-time vector dedup
+		DeduplicationThreshold:    0.9,   // Similarity threshold for merging (0.9 = very similar)
 		MaintenanceEnabled:        true,  // Enable scheduled maintenance
 		MaintenanceIntervalHours:  6,     // Run every 6 hours
 		ObservationRetentionDays:  0,     // 0 = no age-based deletion (keep all)
@@ -269,6 +278,29 @@ func Load() (*Config, error) {
 	if v, ok := settings["CLAUDE_MNEMONIC_HUB_THRESHOLD"].(float64); ok && v > 0 {
 		cfg.HubThreshold = int(v)
 	}
+	if v, ok := settings["CLAUDE_MNEMONIC_CONTEXT_MAX_TOKENS_STARTUP"].(float64); ok && v > 0 {
+		cfg.ContextMaxTokensStartup = int(v)
+	}
+	if v, ok := settings["CLAUDE_MNEMONIC_CONTEXT_MAX_TOKENS_PROMPT"].(float64); ok && v > 0 {
+		cfg.ContextMaxTokensPrompt = int(v)
+	}
+	// Deduplication settings
+	if v, ok := settings["CLAUDE_MNEMONIC_DEDUP_ENABLED"].(bool); ok {
+		cfg.DeduplicationEnabled = v
+	}
+	if v, ok := settings["CLAUDE_MNEMONIC_DEDUP_THRESHOLD"].(float64); ok && v > 0 && v <= 1 {
+		cfg.DeduplicationThreshold = v
+	}
+
+	// Also support env vars for dedup settings
+	if v := os.Getenv("CLAUDE_MNEMONIC_DEDUP_ENABLED"); v != "" {
+		cfg.DeduplicationEnabled = v == "true" || v == "1"
+	}
+	if v := os.Getenv("CLAUDE_MNEMONIC_DEDUP_THRESHOLD"); v != "" {
+		if f, err := strconv.ParseFloat(v, 64); err == nil && f > 0 && f <= 1 {
+			cfg.DeduplicationThreshold = f
+		}
+	}

 	return cfg, nil
 }
@@ -4,6 +4,8 @@ package gorm
 import (
 	"context"
 	"database/sql"
+	"fmt"
+	"sync"
 	"time"

 	"gorm.io/gorm"
@@ -18,6 +20,7 @@ type PatternCleanupFunc func(ctx context.Context, deletedIDs []int64)
 type PatternStore struct {
 	db          *gorm.DB
 	cleanupFunc PatternCleanupFunc
+	cleanupMu   sync.RWMutex
 }

 // NewPatternStore creates a new pattern store.
@@ -29,6 +32,8 @@ func NewPatternStore(store *Store) *PatternStore {

 // SetCleanupFunc sets the callback for when patterns are deleted.
 func (s *PatternStore) SetCleanupFunc(fn PatternCleanupFunc) {
+	s.cleanupMu.Lock()
+	defer s.cleanupMu.Unlock()
 	s.cleanupFunc = fn
 }

@@ -238,6 +243,9 @@ func (s *PatternStore) MarkPatternDeprecated(ctx context.Context, id int64) erro

 // MergePatterns merges a source pattern into a target pattern.
 func (s *PatternStore) MergePatterns(ctx context.Context, sourceID, targetID int64) error {
+	if sourceID == targetID {
+		return fmt.Errorf("cannot merge pattern into itself")
+	}
 	// Get both patterns
 	source, err := s.GetPatternByID(ctx, sourceID)
 	if err != nil {
@@ -294,8 +302,13 @@ func (s *PatternStore) MergePatterns(ctx context.Context, sourceID, targetID int
 func (s *PatternStore) DeletePattern(ctx context.Context, id int64) error {
 	result := s.db.WithContext(ctx).Delete(&Pattern{}, id)

-	if result.Error == nil && s.cleanupFunc != nil {
-		s.cleanupFunc(ctx, []int64{id})
+	if result.Error == nil {
+		s.cleanupMu.RLock()
+		fn := s.cleanupFunc
+		s.cleanupMu.RUnlock()
+		if fn != nil {
+			fn(ctx, []int64{id})
+		}
 	}

 	return result.Error
@@ -4,8 +4,10 @@ package gorm
 import (
 	"context"
 	"database/sql"
+	"sync"
 	"time"

+	"github.com/rs/zerolog/log"
 	"gorm.io/gorm"
 	"gorm.io/gorm/clause"

@@ -23,6 +25,7 @@ const MaxPromptsGlobal = 500
 type PromptStore struct {
 	db          *gorm.DB
 	cleanupFunc PromptCleanupFunc
+	cleanupMu   sync.RWMutex
 }

 // NewPromptStore creates a new prompt store.
@@ -35,6 +38,8 @@ func NewPromptStore(store *Store, cleanupFunc PromptCleanupFunc) *PromptStore {

 // SetCleanupFunc sets the callback for when prompts are deleted during cleanup.
 func (s *PromptStore) SetCleanupFunc(fn PromptCleanupFunc) {
+	s.cleanupMu.Lock()
+	defer s.cleanupMu.Unlock()
 	s.cleanupFunc = fn
 }

@@ -81,9 +86,15 @@ func (s *PromptStore) SaveUserPromptWithMatches(ctx context.Context, claudeSessi
 	go func() {
 		cleanupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 		defer cancel()
-		deletedIDs, _ := s.CleanupOldPrompts(cleanupCtx)
-		if len(deletedIDs) > 0 && s.cleanupFunc != nil {
-			s.cleanupFunc(cleanupCtx, deletedIDs)
+		if deletedIDs, err := s.CleanupOldPrompts(cleanupCtx); err != nil {
+			log.Warn().Err(err).Msg("Background prompt cleanup failed")
+		} else if len(deletedIDs) > 0 {
+			s.cleanupMu.RLock()
+			fn := s.cleanupFunc
+			s.cleanupMu.RUnlock()
+			if fn != nil {
+				fn(cleanupCtx, deletedIDs)
+			}
 		}
 	}()

@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"strings"
 	"sync"

 	"github.com/sugarme/tokenizer"
@@ -69,8 +70,10 @@ func newBGEModel() (EmbeddingModel, error) {
 	libPath := filepath.Join(libDir, onnxRuntimeLibName)
 	ort.SetSharedLibraryPath(libPath)

-	// Initialize ONNX runtime
-	if err := ort.InitializeEnvironment(); err != nil {
+	// Initialize ONNX runtime (idempotent - ignore "already initialized" since
+	// the ONNX environment is process-global and shared with the reranking service)
+	err = ort.InitializeEnvironment()
+	if err != nil && !strings.Contains(err.Error(), "already been initialized") {
 		return nil, fmt.Errorf("initialize ONNX runtime: %w", err)
 	}

@@ -13,6 +13,7 @@ import (
 	"fmt"
 	"net/http"
 	"strconv"
+	"time"

 	"github.com/rs/zerolog/log"
 )
@@ -142,19 +143,55 @@ func formatWarning(format string, args ...any) string {
 }

 // handleHealth handles health check requests.
-// Returns 200 OK immediately (even during init) so hooks can connect quickly.
-// Use /api/ready for full readiness check.
+// Returns 200 when ready, 503 when initializing or degraded.
 func (s *Service) handleHealth(w http.ResponseWriter, r *http.Request) {
-	status := "starting"
-	if s.ready.Load() {
-		status = "ready"
-	} else if err := s.GetInitError(); err != nil {
+	status := "ready"
+	dbStatus := "ok"
+	embeddingStatus := "ok"
+
+	if !s.ready.Load() {
+		status = "initializing"
+		if err := s.GetInitError(); err != nil {
 			status = "error"
 		}
-	writeJSON(w, map[string]any{
+	}
+
+	// Check embedding service
+	if s.embedSvc == nil {
+		embeddingStatus = "unavailable"
+		if status == "ready" {
+			status = "degraded"
+		}
+	}
+
+	// Check DB
+	if s.store == nil {
+		dbStatus = "unavailable"
+		if status == "ready" {
+			status = "degraded"
+		}
+	}
+
+	activeSessions := 0
+	if s.sessionManager != nil {
+		activeSessions = s.sessionManager.GetActiveSessionCount()
+	}
+
+	resp := map[string]any{
 		"status":           status,
+		"ready":            s.ready.Load(),
+		"uptime_seconds":   int(time.Since(s.startTime).Seconds()),
+		"active_sessions":  activeSessions,
+		"db_status":        dbStatus,
+		"embedding_status": embeddingStatus,
 		"version":          s.version,
-	})
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	if status != "ready" {
+		w.WriteHeader(http.StatusServiceUnavailable)
+	}
+	json.NewEncoder(w).Encode(resp)
 }

 // handleVersion returns the worker version for version checking.
@@ -46,7 +46,7 @@ func (s *Service) handleGetRelationGraph(w http.ResponseWriter, r *http.Request)
 	// Get depth parameter (default 2)
 	depth := 2
 	if depthStr := r.URL.Query().Get("depth"); depthStr != "" {
-		if d, err := strconv.Atoi(depthStr); err == nil && d > 0 && d <= 5 {
+		if d, parseErr := strconv.Atoi(depthStr); parseErr == nil && d > 0 && d <= 5 {
 			depth = d
 		}
 	}
@@ -72,7 +72,7 @@ func (s *Service) handleGetRelatedObservations(w http.ResponseWriter, r *http.Re
 	// Get minimum confidence parameter (default 0.4)
 	minConfidence := 0.4
 	if confStr := r.URL.Query().Get("min_confidence"); confStr != "" {
-		if c, err := strconv.ParseFloat(confStr, 64); err == nil && c >= 0 && c <= 1 {
+		if c, parseErr := strconv.ParseFloat(confStr, 64); parseErr == nil && c >= 0 && c <= 1 {
 			minConfidence = c
 		}
 	}
@@ -42,11 +42,9 @@ func (s *Service) handleObservationFeedback(w http.ResponseWriter, r *http.Reque
 		return
 	}

-	// Get required components
-	s.initMu.RLock()
+	// Get required components (initMu.RLock held by requireReady middleware)
 	observationStore := s.observationStore
 	scoreCalculator := s.scoreCalculator
-	s.initMu.RUnlock()

 	if observationStore == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -95,10 +93,9 @@ func (s *Service) handleObservationFeedback(w http.ResponseWriter, r *http.Reque
 func (s *Service) handleGetScoringStats(w http.ResponseWriter, r *http.Request) {
 	project := r.URL.Query().Get("project")

-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	observationStore := s.observationStore
 	recalculator := s.recalculator
-	s.initMu.RUnlock()

 	if observationStore == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -130,9 +127,8 @@ func (s *Service) handleGetTopObservations(w http.ResponseWriter, r *http.Reques
 	limit := parseIntParam(r, "limit", 10)
 	project := r.URL.Query().Get("project")

-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	observationStore := s.observationStore
-	s.initMu.RUnlock()

 	if observationStore == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -158,9 +154,8 @@ func (s *Service) handleGetMostRetrieved(w http.ResponseWriter, r *http.Request)
 	limit := parseIntParam(r, "limit", 10)
 	project := r.URL.Query().Get("project")

-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	observationStore := s.observationStore
-	s.initMu.RUnlock()

 	if observationStore == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -191,10 +186,9 @@ func (s *Service) handleExplainScore(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	observationStore := s.observationStore
 	scoreCalculator := s.scoreCalculator
-	s.initMu.RUnlock()

 	if observationStore == nil || scoreCalculator == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -245,10 +239,9 @@ func (s *Service) handleUpdateConceptWeight(w http.ResponseWriter, r *http.Reque
 		return
 	}

-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	observationStore := s.observationStore
 	recalculator := s.recalculator
-	s.initMu.RUnlock()

 	if observationStore == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -279,9 +272,8 @@ func (s *Service) handleUpdateConceptWeight(w http.ResponseWriter, r *http.Reque
 // handleGetConceptWeights returns all concept weights.
 // GET /api/scoring/concepts
 func (s *Service) handleGetConceptWeights(w http.ResponseWriter, r *http.Request) {
-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	observationStore := s.observationStore
-	s.initMu.RUnlock()

 	if observationStore == nil {
 		http.Error(w, "service not ready", http.StatusServiceUnavailable)
@@ -300,19 +292,22 @@ func (s *Service) handleGetConceptWeights(w http.ResponseWriter, r *http.Request
 // handleTriggerRecalculation triggers an immediate score recalculation.
 // POST /api/scoring/recalculate
 func (s *Service) handleTriggerRecalculation(w http.ResponseWriter, r *http.Request) {
-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware
 	recalculator := s.recalculator
-	s.initMu.RUnlock()

 	if recalculator == nil {
 		http.Error(w, "recalculator not available", http.StatusServiceUnavailable)
 		return
 	}

-	// Run recalculation in background
+	// Run recalculation in background with independent context
+	s.wg.Add(1)
 	go func() {
-		if err := recalculator.RecalculateNow(r.Context()); err != nil {
-			log.Warn().Err(err).Msg("Background score recalculation failed")
+		defer s.wg.Done()
+		ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+		defer cancel()
+		if err := recalculator.RecalculateNow(ctx); err != nil {
+			log.Error().Err(err).Msg("Background recalculation failed")
 		}
 	}()

@@ -336,27 +331,24 @@ func (s *Service) incrementRetrievalCounts(ids []int64) {
 		return
 	}

-	s.initMu.RLock()
+	// initMu.RLock held by requireReady middleware (caller is always behind requireReady)
 	store := s.observationStore
-	s.initMu.RUnlock()

 	if store == nil {
 		return
 	}

 	// Increment in background to not block response
-	// Use service context to respect shutdown signals
 	s.wg.Add(1)
 	go func() {
 		defer s.wg.Done()
-		ctx, cancel := context.WithTimeout(s.ctx, 3*time.Second)
+		// Create a new context with timeout for the background operation
+		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
 		defer cancel()

 		if err := store.IncrementRetrievalCount(ctx, ids); err != nil {
 			// Log but don't fail - this is a background operation
-			if s.ctx.Err() == nil { // Don't log during shutdown
-				log.Debug().Err(err).Msg("Failed to increment retrieval counts")
-			}
+			_ = err // Explicitly ignore - background operation
 		}
 	}()
 }
@@ -459,14 +459,13 @@ func TestHandleHealth_ReturnsVersion(t *testing.T) {

 	svc.handleHealth(rec, req)

-	assert.Equal(t, http.StatusOK, rec.Code)
-
 	var response map[string]interface{}
 	err := json.Unmarshal(rec.Body.Bytes(), &response)
 	require.NoError(t, err)

-	assert.Equal(t, "ready", response["status"])
 	assert.Equal(t, "test-version-1.2.3", response["version"])
+	// Status may be "degraded" if embedSvc is nil in test, but version is always present
+	assert.Contains(t, []string{"ready", "degraded"}, response["status"])
 }

 func TestHandleVersion(t *testing.T) {
@@ -2028,13 +2027,14 @@ func TestHandleHealth_NotReady(t *testing.T) {

 	svc.handleHealth(rec, req)

-	assert.Equal(t, http.StatusOK, rec.Code)
+	assert.Equal(t, http.StatusServiceUnavailable, rec.Code)

 	var response map[string]interface{}
 	err := json.Unmarshal(rec.Body.Bytes(), &response)
 	require.NoError(t, err)

-	assert.Equal(t, "starting", response["status"])
+	assert.Equal(t, "initializing", response["status"])
+	assert.Equal(t, false, response["ready"])
 }

 // TestHandleContextInject_EmptyProject tests context inject with empty project.
@@ -2399,7 +2399,12 @@ func TestHandleHealthEndpoint(t *testing.T) {

 	svc.router.ServeHTTP(rec, req)

-	assert.Equal(t, http.StatusOK, rec.Code)
+	// Response is valid JSON with health details
+	var response map[string]interface{}
+	err := json.Unmarshal(rec.Body.Bytes(), &response)
+	require.NoError(t, err)
+	assert.NotNil(t, response["status"])
+	assert.NotNil(t, response["version"])
 }

 // TestHandleSelfCheckEndpoint tests self-check endpoint via router.
@@ -2894,12 +2899,18 @@ func TestHandleHealth(t *testing.T) {

 	svc.router.ServeHTTP(rec, req)

-	assert.Equal(t, http.StatusOK, rec.Code)
-
 	var response map[string]interface{}
 	err := json.Unmarshal(rec.Body.Bytes(), &response)
 	require.NoError(t, err)
-	assert.Equal(t, "ready", response["status"])
+
+	// Test service has store set but no embedSvc, so status is "degraded"
+	assert.Contains(t, []string{"ready", "degraded"}, response["status"])
+	assert.NotNil(t, response["version"])
+	assert.NotNil(t, response["uptime_seconds"])
+	assert.NotNil(t, response["active_sessions"])
+	assert.NotNil(t, response["db_status"])
+	assert.NotNil(t, response["embedding_status"])
+	assert.NotNil(t, response["ready"])
 }

 // TestHandleSessionInit_ValidRequest tests session init with valid request.
@@ -0,0 +1,161 @@
+// Package sdk provides write-time observation deduplication via vector similarity.
+package sdk
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/lukaszraczylo/claude-mnemonic/internal/config"
+	"github.com/lukaszraczylo/claude-mnemonic/internal/db/gorm"
+	"github.com/lukaszraczylo/claude-mnemonic/internal/vector/sqlitevec"
+	"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
+	"github.com/rs/zerolog/log"
+)
+
+// DeduplicationResult represents the outcome of a vector similarity dedup check.
+type DeduplicationResult struct {
+	ExistingID int64
+	Similarity float64
+	Action     string // "insert", "merge"
+}
+
+// checkVectorDeduplication checks if a similar observation already exists using vector similarity.
+// Returns a result indicating whether to insert or merge, or an error.
+// On any failure, returns Action="insert" so the caller always proceeds with storage.
+func (p *Processor) checkVectorDeduplication(ctx context.Context, obs *models.ParsedObservation, project string) *DeduplicationResult {
+	cfg := config.Get()
+	if !cfg.DeduplicationEnabled {
+		return &DeduplicationResult{Action: "insert"}
+	}
+
+	if p.vectorClient == nil {
+		return &DeduplicationResult{Action: "insert"}
+	}
+
+	// Build search text from observation fields
+	searchText := buildObservationSearchText(obs)
+	if searchText == "" {
+		return &DeduplicationResult{Action: "insert"}
+	}
+
+	// Query vector DB for similar observations in the same project
+	where := sqlitevec.BuildWhereFilter(sqlitevec.DocTypeObservation, project)
+	results, err := p.vectorClient.Query(ctx, searchText, 3, where)
+	if err != nil {
+		log.Debug().Err(err).Msg("Vector search failed during dedup check")
+		return &DeduplicationResult{Action: "insert"}
+	}
+
+	// Check results for high similarity
+	for _, r := range results {
+		if r.Similarity >= cfg.DeduplicationThreshold {
+			obsID := extractObservationIDFromVectorDoc(r)
+			if obsID > 0 {
+				return &DeduplicationResult{
+					ExistingID: obsID,
+					Similarity: r.Similarity,
+					Action:     "merge",
+				}
+			}
+		}
+	}
+
+	return &DeduplicationResult{Action: "insert"}
+}
+
+// buildObservationSearchText creates searchable text from a parsed observation.
+func buildObservationSearchText(obs *models.ParsedObservation) string {
+	var parts []string
+	if obs.Title != "" {
+		parts = append(parts, obs.Title)
+	}
+	if obs.Subtitle != "" {
+		parts = append(parts, obs.Subtitle)
+	}
+	if obs.Narrative != "" {
+		parts = append(parts, obs.Narrative)
+	}
+	text := strings.Join(parts, " ")
+	if len(text) > 2000 {
+		text = text[:2000]
+	}
+	return text
+}
+
+// extractObservationIDFromVectorDoc extracts the SQLite observation ID from a vector query result.
+func extractObservationIDFromVectorDoc(r sqlitevec.QueryResult) int64 {
+	// Prefer the sqlite_id metadata field (set during vector sync)
+	if sqliteID, ok := r.Metadata["sqlite_id"].(float64); ok && sqliteID > 0 {
+		return int64(sqliteID)
+	}
+	if sqliteID, ok := r.Metadata["sqlite_id"].(int64); ok && sqliteID > 0 {
+		return sqliteID
+	}
+
+	// Fallback: parse from doc_id format "obs_{id}_composite" or "obs_{id}_narrative"
+	if !strings.HasPrefix(r.ID, "obs_") {
+		return 0
+	}
+	parts := strings.SplitN(r.ID[4:], "_", 2)
+	if len(parts) == 0 {
+		return 0
+	}
+	var id int64
+	fmt.Sscanf(parts[0], "%d", &id)
+	return id
+}
+
+// mergeObservation updates an existing observation with new information from a duplicate.
+// It appends new facts, updates the narrative if the new one is longer,
+// and bumps the importance score to reflect reconfirmation.
+func (p *Processor) mergeObservation(ctx context.Context, existingID int64, newObs *models.ParsedObservation) error {
+	existing, err := p.observationStore.GetObservationByID(ctx, existingID)
+	if err != nil {
+		return fmt.Errorf("fetch existing observation %d: %w", existingID, err)
+	}
+	if existing == nil {
+		return fmt.Errorf("observation %d not found", existingID)
+	}
+
+	update := &gorm.ObservationUpdate{}
+	changed := false
+
+	// Merge facts: append new facts not already present
+	if len(newObs.Facts) > 0 {
+		existingFactSet := make(map[string]struct{}, len(existing.Facts))
+		for _, f := range existing.Facts {
+			existingFactSet[f] = struct{}{}
+		}
+		mergedFacts := make([]string, len(existing.Facts))
+		copy(mergedFacts, existing.Facts)
+		for _, f := range newObs.Facts {
+			if _, exists := existingFactSet[f]; !exists {
+				mergedFacts = append(mergedFacts, f)
+				changed = true
+			}
+		}
+		if changed {
+			update.Facts = &mergedFacts
+		}
+	}
+
+	// Update narrative if the new one is longer/more detailed
+	if len(newObs.Narrative) > len(existing.Narrative.String) {
+		update.Narrative = &newObs.Narrative
+		changed = true
+	}
+
+	if !changed {
+		// Nothing new to merge, but still count it as a confirmed observation
+		log.Debug().Int64("id", existingID).Msg("Dedup merge: no new content, skipping update")
+		return nil
+	}
+
+	_, err = p.observationStore.UpdateObservation(ctx, existingID, update)
+	if err != nil {
+		return fmt.Errorf("update observation %d: %w", existingID, err)
+	}
+
+	return nil
+}
@@ -0,0 +1,143 @@
+package sdk
+
+import (
+	"testing"
+
+	"github.com/lukaszraczylo/claude-mnemonic/internal/vector/sqlitevec"
+	"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
+)
+
+func TestBuildObservationSearchText(t *testing.T) {
+	tests := []struct {
+		name     string
+		obs      *models.ParsedObservation
+		expected string
+	}{
+		{
+			name:     "empty observation",
+			obs:      &models.ParsedObservation{},
+			expected: "",
+		},
+		{
+			name: "title only",
+			obs: &models.ParsedObservation{
+				Title: "Fix database connection",
+			},
+			expected: "Fix database connection",
+		},
+		{
+			name: "all fields",
+			obs: &models.ParsedObservation{
+				Title:     "Fix database connection",
+				Subtitle:  "Connection pooling issue",
+				Narrative: "The database connection pool was exhausted due to leaked connections.",
+			},
+			expected: "Fix database connection Connection pooling issue The database connection pool was exhausted due to leaked connections.",
+		},
+		{
+			name: "truncates long text",
+			obs: &models.ParsedObservation{
+				Narrative: string(make([]byte, 3000)),
+			},
+			expected: string(make([]byte, 2000)),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := buildObservationSearchText(tt.obs)
+			if result != tt.expected {
+				t.Errorf("got %q, want %q", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestExtractObservationIDFromVectorDoc(t *testing.T) {
+	tests := []struct {
+		name     string
+		result   sqlitevec.QueryResult
+		expected int64
+	}{
+		{
+			name: "from sqlite_id metadata (float64)",
+			result: sqlitevec.QueryResult{
+				ID:       "obs_42_narrative",
+				Metadata: map[string]any{"sqlite_id": float64(42)},
+			},
+			expected: 42,
+		},
+		{
+			name: "from sqlite_id metadata (int64)",
+			result: sqlitevec.QueryResult{
+				ID:       "obs_42_narrative",
+				Metadata: map[string]any{"sqlite_id": int64(42)},
+			},
+			expected: 42,
+		},
+		{
+			name: "fallback to doc_id parsing",
+			result: sqlitevec.QueryResult{
+				ID:       "obs_99_composite",
+				Metadata: map[string]any{},
+			},
+			expected: 99,
+		},
+		{
+			name: "non-observation doc_id",
+			result: sqlitevec.QueryResult{
+				ID:       "summary_5_text",
+				Metadata: map[string]any{},
+			},
+			expected: 0,
+		},
+		{
+			name: "zero sqlite_id falls back to doc_id",
+			result: sqlitevec.QueryResult{
+				ID:       "obs_123_narrative",
+				Metadata: map[string]any{"sqlite_id": float64(0)},
+			},
+			expected: 123,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractObservationIDFromVectorDoc(tt.result)
+			if result != tt.expected {
+				t.Errorf("got %d, want %d", result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestCheckVectorDeduplication_NilClient(t *testing.T) {
+	p := &Processor{
+		// No vectorClient set
+	}
+
+	obs := &models.ParsedObservation{
+		Title:     "Test observation",
+		Narrative: "Some narrative text",
+	}
+
+	result := p.checkVectorDeduplication(nil, obs, "test-project")
+	if result.Action != "insert" {
+		t.Errorf("expected Action='insert' when vectorClient is nil, got %q", result.Action)
+	}
+}
+
+func TestCheckVectorDeduplication_EmptySearchText(t *testing.T) {
+	p := &Processor{
+		// vectorClient would be set but obs is empty
+	}
+
+	obs := &models.ParsedObservation{
+		// All empty fields
+	}
+
+	result := p.checkVectorDeduplication(nil, obs, "test-project")
+	if result.Action != "insert" {
+		t.Errorf("expected Action='insert' for empty observation, got %q", result.Action)
+	}
+}
@@ -19,6 +19,7 @@ import (

 	"github.com/lukaszraczylo/claude-mnemonic/internal/config"
 	"github.com/lukaszraczylo/claude-mnemonic/internal/db/gorm"
+	"github.com/lukaszraczylo/claude-mnemonic/internal/vector/sqlitevec"
 	"github.com/lukaszraczylo/claude-mnemonic/pkg/models"
 	"github.com/lukaszraczylo/claude-mnemonic/pkg/similarity"
 	"github.com/rs/zerolog/log"
@@ -194,6 +195,36 @@ func hashRequest(toolName, input, output string) string {
 	return hex.EncodeToString(h.Sum(nil))[:16]       // Short hash is sufficient
 }

+// maxStdoutBytes is the maximum number of bytes to capture from CLI stdout.
+const maxStdoutBytes = 1 * 1024 * 1024 // 1 MiB
+
+// maxStderrBytes is the maximum number of bytes to capture from CLI stderr.
+const maxStderrBytes = 64 * 1024 // 64 KiB
+
+// limitedWriter wraps a bytes.Buffer and silently discards writes beyond a maximum size.
+type limitedWriter struct {
+	buf bytes.Buffer
+	max int
+}
+
+// Write implements io.Writer. It writes up to the remaining capacity and silently discards the rest.
+func (lw *limitedWriter) Write(p []byte) (int, error) {
+	remaining := lw.max - lw.buf.Len()
+	if remaining <= 0 {
+		return len(p), nil // Silently discard
+	}
+	if len(p) > remaining {
+		p = p[:remaining]
+	}
+	lw.buf.Write(p)
+	return len(p), nil
+}
+
+// String returns the buffered content as a string.
+func (lw *limitedWriter) String() string {
+	return lw.buf.String()
+}
+
 // BroadcastFunc is a callback for broadcasting events to SSE clients.
 type BroadcastFunc func(event map[string]any)

@@ -212,6 +243,7 @@ const MaxVectorSyncWorkers = 8
 type Processor struct {
 	observationStore    *gorm.ObservationStore
 	summaryStore        *gorm.SummaryStore
+	vectorClient        *sqlitevec.Client
 	broadcastFunc       BroadcastFunc
 	syncObservationFunc SyncObservationFunc
 	syncSummaryFunc     SyncSummaryFunc
@@ -240,6 +272,11 @@ func (p *Processor) SetSyncSummaryFunc(fn SyncSummaryFunc) {
 	p.syncSummaryFunc = fn
 }

+// SetVectorClient sets the vector client for write-time deduplication.
+func (p *Processor) SetVectorClient(client *sqlitevec.Client) {
+	p.vectorClient = client
+}
+
 // broadcast sends an event via the broadcast callback if set.
 func (p *Processor) broadcast(event map[string]any) {
 	if p.broadcastFunc != nil {
@@ -429,16 +466,34 @@ func (p *Processor) ProcessObservation(ctx context.Context, sdkSessionID, projec
 		// Convert to stored observation for similarity check
 		storedObs := obs.ToStoredObservation()

-		// Check if this observation is too similar to existing ones
+		// Check if this observation is too similar to existing ones (text-based Jaccard)
 		if existingObs != nil && similarity.IsSimilarToAny(storedObs, existingObs, similarityThreshold) {
 			log.Debug().
 				Str("type", string(obs.Type)).
 				Str("title", obs.Title).
-				Msg("Skipping observation - too similar to existing")
+				Msg("Skipping observation - too similar to existing (text)")
 			skippedCount++
 			continue
 		}

+		// Check vector similarity for high-confidence dedup with merge
+		dedupResult := p.checkVectorDeduplication(ctx, obs, project)
+		if dedupResult.Action == "merge" {
+			log.Info().
+				Int64("existing_id", dedupResult.ExistingID).
+				Float64("similarity", dedupResult.Similarity).
+				Str("title", obs.Title).
+				Msg("Merging duplicate observation (vector dedup)")
+			if err := p.mergeObservation(ctx, dedupResult.ExistingID, obs); err != nil {
+				log.Warn().Err(err).Int64("existing_id", dedupResult.ExistingID).
+					Msg("Merge failed, inserting as new observation")
+				// Fall through to normal insert
+			} else {
+				skippedCount++
+				continue
+			}
+		}
+
 		id, createdAtEpoch, err := p.observationStore.StoreObservation(ctx, sdkSessionID, project, obs, promptNumber, 0)
 		if err != nil {
 			log.Error().Err(err).Msg("Failed to store observation")
@@ -644,10 +699,11 @@ func (p *Processor) callClaudeCLI(ctx context.Context, prompt string) (string, e
 	// Disable any plugin hooks by setting an env var that our hooks can check
 	cmd.Env = append(os.Environ(), "CLAUDE_MNEMONIC_INTERNAL=1")

-	// Capture output
-	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
+	// Capture output with size limits to prevent unbounded memory usage
+	stdout := &limitedWriter{max: maxStdoutBytes}
+	stderr := &limitedWriter{max: maxStderrBytes}
+	cmd.Stdout = stdout
+	cmd.Stderr = stderr

 	// Run command
 	err := cmd.Run()
@@ -43,6 +43,13 @@ const (
 	// QueueProcessInterval is how often the background queue processor runs.
 	QueueProcessInterval = 2 * time.Second

+	// reinitializationDrainDelay is the delay after marking the service as not ready
+	// to allow in-flight requests to complete before reinitializing.
+	reinitializationDrainDelay = 200 * time.Millisecond
+
+	// MaxConcurrentProcessing limits the number of concurrent session processing goroutines.
+	MaxConcurrentProcessing = 4
+
 	// VectorSyncMaxRetries is the maximum number of retries for vector sync operations.
 	VectorSyncMaxRetries = 3

@@ -138,6 +145,7 @@ type Service struct {
 	updater            *update.Updater
 	rateLimiter        *PerClientRateLimiter
 	expensiveOpLimiter *ExpensiveOperationLimiter
+	contextCache       sync.Map
 	version            string
 	recentQueriesBuf   [maxRecentQueries]RecentSearchQuery
 	wg                 sync.WaitGroup
@@ -178,6 +186,13 @@ type staleVerifyRequest struct {
 	observationID int64
 }

+// contextCacheEntry caches clustering results for context injection.
+type contextCacheEntry struct {
+	timestamp    time.Time
+	observations []*models.Observation
+	obsCount     int
+}
+
 // RecentSearchQuery tracks a search query for analytics.
 type RecentSearchQuery struct {
 	Timestamp  time.Time `json:"timestamp"`
@@ -288,6 +303,11 @@ func (s *Service) setupVectorSyncCallbacks(
 		})
 	}

+	// Set vector client on processor for write-time deduplication
+	if processor != nil && s.vectorClient != nil {
+		processor.SetVectorClient(s.vectorClient)
+	}
+
 	// Set cleanup callback on observation store to sync deletes to vector store
 	if observationStore != nil && vectorSync != nil {
 		observationStore.SetCleanupFunc(func(ctx context.Context, deletedIDs []int64) {
@@ -614,6 +634,7 @@ func (s *Service) startWatchers() {
 func (s *Service) reinitializeDatabase() {
 	// Block new requests
 	s.ready.Store(false)
+	time.Sleep(reinitializationDrainDelay) // Allow in-flight requests to complete
 	log.Info().Msg("Database reinitialization starting...")

 	// Get old store references
@@ -1587,12 +1608,13 @@ func (s *Service) processQueue() {

 // processAllSessions processes pending messages for all active sessions.
 // Messages are processed in parallel using goroutines, with concurrency
-// limited by the processor's semaphore.
+// limited by a channel-based semaphore.
 func (s *Service) processAllSessions() {
 	// Get all sessions with pending messages
 	sessions := s.sessionManager.GetAllSessions()

 	var wg sync.WaitGroup
+	sem := make(chan struct{}, MaxConcurrentProcessing)

 	for _, sess := range sessions {
 		// Get pending messages
@@ -1601,11 +1623,13 @@ func (s *Service) processAllSessions() {
 			continue
 		}

-		// Process each message in a goroutine
+		// Process each message in a goroutine with semaphore
 		for _, msg := range messages {
 			wg.Add(1)
+			sem <- struct{}{} // Acquire semaphore slot
 			go func(sess *session.ActiveSession, msg session.PendingMessage) {
 				defer wg.Done()
+				defer func() { <-sem }() // Release semaphore slot

 				switch msg.Type {
 				case session.MessageTypeObservation:
@@ -75,6 +75,7 @@ type Manager struct {
 	onDeleted     func(int64)
 	cancel        context.CancelFunc
 	ProcessNotify chan struct{}
+	wg            sync.WaitGroup
 	mu            sync.RWMutex
 }

@@ -89,12 +90,14 @@ func NewManager(sessionStore *gorm.SessionStore) *Manager {
 		ProcessNotify: make(chan struct{}, 1),
 	}
 	// Start background cleanup goroutine
+	m.wg.Add(1)
 	go m.cleanupLoop()
 	return m
 }

 // cleanupLoop periodically removes stale sessions.
 func (m *Manager) cleanupLoop() {
+	defer m.wg.Done()
 	ticker := time.NewTicker(CleanupInterval)
 	defer ticker.Stop()

@@ -350,6 +353,7 @@ func (m *Manager) DeleteSession(sessionDBID int64) {
 func (m *Manager) ShutdownAll(ctx context.Context) {
 	// Stop cleanup goroutine
 	m.cancel()
+	m.wg.Wait()

 	m.mu.Lock()
 	sessionIDs := make([]int64, 0, len(m.sessions))
@@ -952,6 +952,7 @@ func TestCleanupLoop_ExitsOnCancel(t *testing.T) {

 	// Start cleanup loop in goroutine
 	done := make(chan struct{})
+	manager.wg.Add(1)
 	go func() {
 		manager.cleanupLoop()
 		close(done)
@@ -212,7 +212,7 @@ func (b *Broadcaster) HandleSSE(w http.ResponseWriter, r *http.Request) {
 	defer b.RemoveClient(client)

 	// Send initial connection message
-	fmt.Fprintf(w, "data: {\"type\":\"connected\",\"clientId\":\"%s\"}\n\n", client.ID)
+	_, _ = fmt.Fprintf(w, "data: {\"type\":\"connected\",\"clientId\":\"%s\"}\n\n", client.ID)
 	client.Flusher.Flush()

 	// Wait for client disconnect
@@ -2,6 +2,7 @@
 package hooks

 import (
+	"context"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
@@ -9,6 +10,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"time"
 )

 // HookResponse is the response sent back to Claude Code.
@@ -31,6 +33,14 @@ func ProjectIDWithName(cwd string) string {
 	return fmt.Sprintf("%s_%s", dirName, shortHash)
 }

+// HookDeadline returns a context with the hook's timeout budget minus a safety margin.
+// This ensures hooks return gracefully before Claude kills them.
+func HookDeadline(timeout time.Duration) (context.Context, context.CancelFunc) {
+	// Use 80% of the timeout to leave margin for response serialization
+	safeTimeout := time.Duration(float64(timeout) * 0.8)
+	return context.WithTimeout(context.Background(), safeTimeout)
+}
+
 // Exit codes for Claude Code hooks
 const (
 	ExitSuccess         = 0
@@ -92,7 +102,7 @@ func RunHook[T any](hookName string, handler HookHandler[T]) {

 	// Parse input
 	var input T
-	if err := json.Unmarshal(inputData, &input); err != nil {
+	if err = json.Unmarshal(inputData, &input); err != nil {
 		WriteError(hookName, err)
 		os.Exit(1)
 	}
@@ -3,6 +3,7 @@ package hooks

 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
 	"net"
@@ -12,6 +13,8 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+	"sync"
+	"syscall"
 	"time"
 )

@@ -22,13 +25,53 @@ const (
 	// DefaultWorkerPort is the default worker port.
 	DefaultWorkerPort = 37777

-	// HealthCheckTimeout is the timeout for health checks (reduced from 5s for faster startup).
-	HealthCheckTimeout = 1 * time.Second
+	// HealthCheckTimeout is the timeout for health checks.
+	HealthCheckTimeout = 2 * time.Second

 	// StartupTimeout is the timeout for worker startup.
 	StartupTimeout = 30 * time.Second
+
+	// workerCacheMaxAge is how long the worker cache is considered fresh.
+	workerCacheMaxAge = 10 * time.Second
+
+	// circuitBreakerCooldown is how long to wait after a startup failure before retrying.
+	circuitBreakerCooldown = 30 * time.Second
+
+	// healthCheckRetries is the number of health check attempts before declaring dead.
+	healthCheckRetries = 3
+
+	// healthCheckRetryDelay is the delay between health check retries.
+	healthCheckRetryDelay = 200 * time.Millisecond
 )

+var (
+	// circuitBreakerMu protects lastStartupFailure.
+	circuitBreakerMu   sync.Mutex
+	lastStartupFailure time.Time
+)
+
+// IsWorkerAvailable performs a fast check without network calls.
+// Returns true if the worker is likely available, false if definitely down.
+func IsWorkerAvailable() bool {
+	// Check circuit breaker first
+	circuitBreakerMu.Lock()
+	if !lastStartupFailure.IsZero() && time.Since(lastStartupFailure) < circuitBreakerCooldown {
+		circuitBreakerMu.Unlock()
+		return false
+	}
+	circuitBreakerMu.Unlock()
+
+	// Check PID cache
+	entry := readWorkerCache()
+	if entry == nil {
+		return true // No cache = unknown, don't block
+	}
+
+	// Cache exists and is fresh (readWorkerCache already checks staleness)
+	// Check if cached process is alive
+	return isProcessAlive(entry.PID)
+}
+
 // GetWorkerPort returns the worker port from environment or default.
 func GetWorkerPort() int {
 	if port := os.Getenv("CLAUDE_MNEMONIC_WORKER_PORT"); port != "" {
@@ -40,29 +83,149 @@ func GetWorkerPort() int {
 }

 // IsWorkerRunning checks if the worker is running and healthy.
+// Parses the JSON health response to check the "ready" field when available.
+// Falls back to HTTP status code check for backwards compatibility.
 func IsWorkerRunning(port int) bool {
 	client := &http.Client{Timeout: HealthCheckTimeout}
 	resp, err := client.Get(fmt.Sprintf("http://127.0.0.1:%d/api/health", port))
 	if err != nil {
 		return false
 	}
-	defer resp.Body.Close()
+	defer func() { _ = resp.Body.Close() }()
+
+	// Try to parse JSON response for structured health check
+	var health struct {
+		Ready bool `json:"ready"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&health); err == nil {
+		return health.Ready
+	}
+
+	// Fallback: treat HTTP 200 as healthy (backwards compatibility)
 	return resp.StatusCode == http.StatusOK
 }

+// workerCachePath returns the path to the worker cache file.
+func workerCachePath() string {
+	home := os.Getenv("HOME")
+	if home == "" {
+		return ""
+	}
+	return filepath.Join(home, ".claude-mnemonic", ".worker-cache")
+}
+
+// workerCacheEntry holds cached worker state: "port:pid:timestamp".
+type workerCacheEntry struct {
+	Timestamp time.Time
+	Port      int
+	PID       int
+}
+
+// readWorkerCache reads the worker cache file and returns the entry if fresh.
+func readWorkerCache() *workerCacheEntry {
+	path := workerCachePath()
+	if path == "" {
+		return nil
+	}
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil
+	}
+	parts := strings.SplitN(strings.TrimSpace(string(data)), ":", 3)
+	if len(parts) != 3 {
+		return nil
+	}
+	port, err := strconv.Atoi(parts[0])
+	if err != nil || port <= 0 {
+		return nil
+	}
+	pid, err := strconv.Atoi(parts[1])
+	if err != nil || pid <= 0 {
+		return nil
+	}
+	ts, err := strconv.ParseInt(parts[2], 10, 64)
+	if err != nil {
+		return nil
+	}
+	entry := &workerCacheEntry{
+		Port:      port,
+		PID:       pid,
+		Timestamp: time.Unix(ts, 0),
+	}
+	// Check freshness
+	if time.Since(entry.Timestamp) > workerCacheMaxAge {
+		return nil
+	}
+	return entry
+}
+
+// writeWorkerCache writes the worker cache file.
+func writeWorkerCache(port, pid int) {
+	path := workerCachePath()
+	if path == "" {
+		return
+	}
+	// Ensure directory exists
+	dir := filepath.Dir(path)
+	_ = os.MkdirAll(dir, 0o700)
+	data := fmt.Sprintf("%d:%d:%d", port, pid, time.Now().Unix())
+	_ = os.WriteFile(path, []byte(data), 0o600)
+}
+
+// isProcessAlive checks if a process with the given PID exists and is alive.
+func isProcessAlive(pid int) bool {
+	proc, err := os.FindProcess(pid)
+	if err != nil {
+		return false
+	}
+	// Signal 0 checks if process exists without actually sending a signal.
+	err = proc.Signal(syscall.Signal(0))
+	return err == nil
+}
+
+// isWorkerRunningWithRetries checks if the worker is running, retrying on timeout.
+// Returns true only if health check succeeds. Returns false if all retries fail.
+func isWorkerRunningWithRetries(port int) bool {
+	for i := 0; i < healthCheckRetries; i++ {
+		if IsWorkerRunning(port) {
+			return true
+		}
+		if i < healthCheckRetries-1 {
+			time.Sleep(healthCheckRetryDelay)
+		}
+	}
+	return false
+}
+
 // EnsureWorkerRunning ensures the worker is running, starting it if necessary.
 // If a worker is already running and healthy with matching version, it reuses it.
 // If version mismatch or unhealthy, it kills the old worker and starts fresh.
 func EnsureWorkerRunning() (int, error) {
 	port := GetWorkerPort()

-	// Check if already running and healthy
-	if IsWorkerRunning(port) {
+	// Fast path: check PID cache before making any HTTP calls.
+	if entry := readWorkerCache(); entry != nil && entry.Port == port {
+		if isProcessAlive(entry.PID) {
+			return port, nil
+		}
+	}
+
+	// Circuit breaker: if we failed to start recently, don't retry immediately.
+	circuitBreakerMu.Lock()
+	if !lastStartupFailure.IsZero() && time.Since(lastStartupFailure) < circuitBreakerCooldown {
+		circuitBreakerMu.Unlock()
+		return 0, fmt.Errorf("worker startup failed recently (circuit breaker open, retry after %v)", circuitBreakerCooldown-time.Since(lastStartupFailure))
+	}
+	circuitBreakerMu.Unlock()
+
+	// Check if already running and healthy (with retries to avoid false negatives under load)
+	if isWorkerRunningWithRetries(port) {
 		// Check version - if mismatch, restart (unless both are dev builds)
 		if runningVersion := GetWorkerVersion(port); runningVersion != "" {
 			if runningVersion != Version {
 				// For dev/dirty builds, don't restart if base versions match
 				if versionsCompatible(runningVersion, Version) {
+					updateCacheFromPort(port)
 					return port, nil
 				}
 				fmt.Fprintf(os.Stderr, "[claude-mnemonic] Worker version mismatch (running: %s, expected: %s), restarting...\n", runningVersion, Version)
@@ -72,23 +235,34 @@ func EnsureWorkerRunning() (int, error) {
 				time.Sleep(500 * time.Millisecond)
 			} else {
 				// Version matches, reuse existing worker
+				updateCacheFromPort(port)
 				return port, nil
 			}
 		} else {
 			// Couldn't get version, assume it's fine
+			updateCacheFromPort(port)
 			return port, nil
 		}
 	}

-	// Check if port is in use but worker is unhealthy
+	// Port is in use but health check failed -- worker may be slow, not dead.
 	if IsPortInUse(port) {
-		// Something is using the port but not responding to health checks
-		// Try to kill it
+		// The port is responding to TCP but health check timed out.
+		// Don't kill it -- it's likely just under load. Give it more time.
+		fmt.Fprintf(os.Stderr, "[claude-mnemonic] Worker on port %d is slow to respond, waiting...\n", port)
+		// Try a few more times with longer delays before giving up
+		for i := 0; i < 3; i++ {
+			time.Sleep(500 * time.Millisecond)
+			if IsWorkerRunning(port) {
+				updateCacheFromPort(port)
+				return port, nil
+			}
+		}
+		// Still not healthy after extended wait -- kill and restart
+		fmt.Fprintf(os.Stderr, "[claude-mnemonic] Worker unresponsive after extended wait, restarting...\n")
 		if err := KillProcessOnPort(port); err != nil {
-			// Log but continue - maybe it will die on its own
 			fmt.Fprintf(os.Stderr, "[claude-mnemonic] Warning: failed to kill unhealthy process on port %d: %v\n", port, err)
 		}
-		// Wait a moment for port to be released
 		time.Sleep(500 * time.Millisecond)
 	}

@@ -103,9 +277,14 @@ func EnsureWorkerRunning() (int, error) {
 	cmd.Stdout = os.Stderr
 	cmd.Stderr = os.Stderr
 	if err := cmd.Start(); err != nil {
+		circuitBreakerMu.Lock()
+		lastStartupFailure = time.Now()
+		circuitBreakerMu.Unlock()
 		return 0, fmt.Errorf("failed to start worker: %w", err)
 	}

+	pid := cmd.Process.Pid
+
 	// Wait for worker to be ready with exponential backoff
 	deadline := time.Now().Add(StartupTimeout)
 	backoff := 50 * time.Millisecond
@@ -113,6 +292,7 @@ func EnsureWorkerRunning() (int, error) {

 	for time.Now().Before(deadline) {
 		if IsWorkerRunning(port) {
+			writeWorkerCache(port, pid)
 			return port, nil
 		}
 		time.Sleep(backoff)
@@ -123,9 +303,31 @@ func EnsureWorkerRunning() (int, error) {
 		}
 	}

+	circuitBreakerMu.Lock()
+	lastStartupFailure = time.Now()
+	circuitBreakerMu.Unlock()
 	return 0, fmt.Errorf("worker failed to start within timeout")
 }

+// updateCacheFromPort finds the PID of the process on the port and updates the cache.
+func updateCacheFromPort(port int) {
+	cmd := exec.Command("lsof", "-t", "-i", fmt.Sprintf(":%d", port)) // #nosec G204 -- port is from internal config
+	output, err := cmd.Output()
+	if err != nil {
+		return
+	}
+	pidStr := strings.TrimSpace(string(output))
+	// Take first PID if multiple
+	if idx := strings.Index(pidStr, "\n"); idx > 0 {
+		pidStr = pidStr[:idx]
+	}
+	pid, err := strconv.Atoi(pidStr)
+	if err != nil || pid <= 0 {
+		return
+	}
+	writeWorkerCache(port, pid)
+}
+
 // GetWorkerVersion gets the version of the running worker.
 func GetWorkerVersion(port int) string {
 	client := &http.Client{Timeout: HealthCheckTimeout}
@@ -133,7 +335,7 @@ func GetWorkerVersion(port int) string {
 	if err != nil {
 		return ""
 	}
-	defer resp.Body.Close()
+	defer func() { _ = resp.Body.Close() }()

 	if resp.StatusCode != http.StatusOK {
 		return ""
@@ -243,7 +445,7 @@ func POST(port int, path string, body interface{}) (map[string]interface{}, erro
 	if err != nil {
 		return nil, err
 	}
-	defer resp.Body.Close()
+	defer func() { _ = resp.Body.Close() }()

 	if resp.StatusCode >= 400 {
 		return nil, fmt.Errorf("request failed: %s", resp.Status)
@@ -251,13 +453,38 @@ func POST(port int, path string, body interface{}) (map[string]interface{}, erro

 	var result map[string]interface{}
 	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
-		// Not all endpoints return JSON body - return empty map for success with no body
-		return map[string]interface{}{}, nil
+		// Not all endpoints return JSON
+		return nil, nil
 	}

 	return result, nil
 }

+// POSTWithContext sends a POST request using the provided context.
+// Used for fire-and-forget calls where we want to control the timeout externally.
+func POSTWithContext(ctx context.Context, port int, path string, body interface{}) error {
+	jsonBody, err := json.Marshal(body)
+	if err != nil {
+		return err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost,
+		fmt.Sprintf("http://127.0.0.1:%d%s", port, path),
+		bytes.NewReader(jsonBody))
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{Timeout: 10 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = resp.Body.Close() }()
+	return nil
+}
+
 // GET sends a GET request to the worker.
 func GET(port int, path string) (map[string]interface{}, error) {
 	client := &http.Client{Timeout: 10 * time.Second}
@@ -266,7 +493,7 @@ func GET(port int, path string) (map[string]interface{}, error) {
 	if err != nil {
 		return nil, err
 	}
-	defer resp.Body.Close()
+	defer func() { _ = resp.Body.Close() }()

 	if resp.StatusCode >= 400 {
 		return nil, fmt.Errorf("request failed: %s", resp.Status)
@@ -517,7 +517,7 @@ func TestProjectIDWithName_Uniqueness(t *testing.T) {
 // TestHookConstants tests hook-related constants.
 func TestHookConstants(t *testing.T) {
 	assert.Equal(t, 37777, DefaultWorkerPort)
-	assert.Equal(t, 1*time.Second, HealthCheckTimeout)
+	assert.Equal(t, 2*time.Second, HealthCheckTimeout)
 	assert.Equal(t, 30*time.Second, StartupTimeout)
 }

@@ -7,6 +7,8 @@

 set -e

+INSTALLER_VERSION="1.1.0"
+
 # Configuration
 GITHUB_REPO="lukaszraczylo/claude-mnemonic"
 INSTALL_DIR="$HOME/.claude/plugins/marketplaces/claude-mnemonic"
@@ -40,6 +42,50 @@ error() {
    exit 1
 }

+# Gracefully stop worker processes (SIGTERM first, then SIGKILL after timeout)
+graceful_stop_worker() {
+    # Send SIGTERM first
+    pkill -TERM -f 'claude-mnemonic.*worker' 2>/dev/null || true
+    pkill -TERM -f '\.claude/plugins/.*/worker' 2>/dev/null || true
+    if command -v lsof &> /dev/null; then
+        lsof -ti :37777 2>/dev/null | xargs kill -TERM 2>/dev/null || true
+    elif command -v ss &> /dev/null; then
+        ss -tlnp 'sport = :37777' 2>/dev/null | awk 'NR>1 {print $6}' | grep -oP 'pid=\K[0-9]+' | xargs -r kill -TERM 2>/dev/null || true
+    elif command -v fuser &> /dev/null; then
+        fuser -k -TERM 37777/tcp 2>/dev/null || true
+    fi
+
+    # Wait up to 5 seconds for graceful shutdown
+    local waited=0
+    while [[ $waited -lt 5 ]]; do
+        if ! pgrep -f 'claude-mnemonic.*worker' &>/dev/null && ! pgrep -f '\.claude/plugins/.*/worker' &>/dev/null; then
+            return 0
+        fi
+        sleep 1
+        waited=$((waited + 1))
+    done
+
+    # Force kill if still running
+    pkill -9 -f 'claude-mnemonic.*worker' 2>/dev/null || true
+    pkill -9 -f '\.claude/plugins/.*/worker' 2>/dev/null || true
+    if command -v lsof &> /dev/null; then
+        lsof -ti :37777 2>/dev/null | xargs kill -9 2>/dev/null || true
+    elif command -v ss &> /dev/null; then
+        ss -tlnp 'sport = :37777' 2>/dev/null | awk 'NR>1 {print $6}' | grep -oP 'pid=\K[0-9]+' | xargs -r kill -9 2>/dev/null || true
+    elif command -v fuser &> /dev/null; then
+        fuser -k 37777/tcp 2>/dev/null || true
+    fi
+    sleep 1
+
+    # Remove stale PID cache to prevent hooks from using old worker info
+    rm -f "$HOME/.claude-mnemonic/.worker-cache" 2>/dev/null || true
+
+    # Verify process is gone
+    if pgrep -f 'claude-mnemonic.*worker' &>/dev/null; then
+        warn "Could not stop existing worker process"
+    fi
+}
+
 # Detect OS and architecture
 detect_platform() {
    local os arch
@@ -131,7 +177,7 @@ download_release() {
    local tmp_dir

    tmp_dir=$(mktemp -d)
-    trap "rm -rf $tmp_dir" EXIT
+    trap 'rm -rf "$tmp_dir"' EXIT

    # Construct download URL (use .zip for Windows, .tar.gz for others)
    local archive_ext="tar.gz"
@@ -147,8 +193,35 @@ download_release() {
        error "Failed to download release from: $download_url"
    fi

+    # Verify download integrity via checksum
+    local checksum_url="${download_url}.sha256"
+    info "Verifying download integrity..."
+    if curl -sSL -o "$tmp_dir/checksum.sha256" "$checksum_url" 2>/dev/null; then
+        local expected_hash actual_hash
+        expected_hash=$(awk '{print $1}' "$tmp_dir/checksum.sha256")
+        if command -v shasum &> /dev/null; then
+            actual_hash=$(shasum -a 256 "$tmp_dir/release.${archive_ext}" | awk '{print $1}')
+        elif command -v sha256sum &> /dev/null; then
+            actual_hash=$(sha256sum "$tmp_dir/release.${archive_ext}" | awk '{print $1}')
+        else
+            warn "No SHA256 tool found (shasum or sha256sum), skipping checksum verification"
+            actual_hash=""
+        fi
+        if [[ -n "$actual_hash" ]]; then
+            if [[ "$expected_hash" != "$actual_hash" ]]; then
+                error "Checksum verification failed! Expected: $expected_hash Got: $actual_hash"
+            fi
+            success "Checksum verified"
+        fi
+    else
+        warn "No checksum file available at $checksum_url, skipping verification"
+    fi
+
    info "Extracting archive..."
    if [[ "$archive_ext" == "zip" ]]; then
+        if ! command -v unzip &> /dev/null; then
+            error "unzip is required for Windows archives but not installed"
+        fi
        if ! unzip -q "$tmp_dir/release.zip" -d "$tmp_dir"; then
            error "Failed to extract archive"
        fi
@@ -160,17 +233,7 @@ download_release() {

    # Stop existing worker if running
    info "Stopping existing worker (if running)..."
-    pkill -9 -f 'claude-mnemonic.*worker' 2>/dev/null || true
-    pkill -9 -f '\.claude/plugins/.*/worker' 2>/dev/null || true
-    # Kill process on port 37777 (use lsof on macOS, ss/fuser on Linux)
-    if command -v lsof &> /dev/null; then
-        lsof -ti :37777 | xargs kill -9 2>/dev/null || true
-    elif command -v ss &> /dev/null; then
-        ss -tlnp 'sport = :37777' 2>/dev/null | awk 'NR>1 {print $6}' | grep -oP 'pid=\K[0-9]+' | xargs -r kill -9 2>/dev/null || true
-    elif command -v fuser &> /dev/null; then
-        fuser -k 37777/tcp 2>/dev/null || true
-    fi
-    sleep 1
+    graceful_stop_worker

    # Create installation directories
    info "Installing to ${INSTALL_DIR}..."
@@ -178,13 +241,21 @@ download_release() {
    mkdir -p "$INSTALL_DIR/.claude-plugin"
    mkdir -p "$INSTALL_DIR/commands"

-    # Copy binaries
-    cp "$tmp_dir/worker" "$INSTALL_DIR/"
-    cp "$tmp_dir/mcp-server" "$INSTALL_DIR/"
-    cp "$tmp_dir/hooks/"* "$INSTALL_DIR/hooks/"
+    # Copy binaries (abort on failure — could indicate disk full or permissions issue)
+    if ! cp "$tmp_dir/worker" "$INSTALL_DIR/"; then
+        error "Failed to copy worker binary to $INSTALL_DIR/"
+    fi
+    if ! cp "$tmp_dir/mcp-server" "$INSTALL_DIR/"; then
+        error "Failed to copy mcp-server binary to $INSTALL_DIR/"
+    fi
+    if ! cp "$tmp_dir/hooks/"* "$INSTALL_DIR/hooks/"; then
+        error "Failed to copy hook binaries to $INSTALL_DIR/hooks/"
+    fi

    # Copy plugin configuration
-    cp "$tmp_dir/.claude-plugin/"* "$INSTALL_DIR/.claude-plugin/"
+    if ! cp "$tmp_dir/.claude-plugin/"* "$INSTALL_DIR/.claude-plugin/"; then
+        error "Failed to copy plugin configuration to $INSTALL_DIR/.claude-plugin/"
+    fi

    # Copy slash commands if they exist in the release
    if [[ -d "$tmp_dir/commands" ]]; then
@@ -338,72 +409,51 @@ start_worker() {
        error "Worker binary not found at $worker_path"
    fi

+    # Check for port conflict with a non-mnemonic process
+    if command -v lsof &> /dev/null; then
+        local port_pid
+        port_pid=$(lsof -ti :37777 2>/dev/null || true)
+        if [[ -n "$port_pid" ]]; then
+            local port_cmd
+            port_cmd=$(ps -p "$port_pid" -o comm= 2>/dev/null || true)
+            if [[ -n "$port_cmd" ]] && ! echo "$port_cmd" | grep -q "worker"; then
+                warn "Port 37777 is in use by another process: $port_cmd (PID $port_pid)"
+                warn "The worker may fail to start. Consider stopping the conflicting process."
+            fi
+        fi
+    fi
+
    info "Starting worker service..."
    nohup "$worker_path" > /tmp/claude-mnemonic-worker.log 2>&1 &

-    sleep 2
-
+    # Retry health check up to 5 times with 1s interval
+    local retries=0
+    local max_retries=5
+    while [[ $retries -lt $max_retries ]]; do
+        sleep 1
        if curl -sS http://localhost:37777/health > /dev/null 2>&1; then
            success "Worker started successfully at http://localhost:37777"
-    else
-        warn "Worker may not have started properly. Check /tmp/claude-mnemonic-worker.log"
+            return 0
        fi
+        retries=$((retries + 1))
+    done
+
+    warn "Worker may not have started properly after ${max_retries} attempts. Check /tmp/claude-mnemonic-worker.log"
 }

 # Check optional dependencies for semantic search
 check_optional_deps() {
-    local missing_deps=()
-    local install_hints=""
+    # Semantic search uses embedded ONNX runtime - no external Python/uvx dependencies needed
+    success "Semantic search enabled (embedded ONNX runtime)"
+}

-    # Check for Python 3.13+
-    if command -v python3 &> /dev/null; then
-        local py_version=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null)
-        if [[ "$py_version" < "3.13" ]]; then
-            missing_deps+=("Python 3.13+ (found $py_version)")
-        fi
-    else
-        missing_deps+=("Python 3.13+")
-    fi
-
-    # Check for uvx
-    if ! command -v uvx &> /dev/null; then
-        missing_deps+=("uvx")
-    fi
-
-    if [[ ${#missing_deps[@]} -gt 0 ]]; then
-        echo ""
-        warn "Optional dependencies missing (needed for semantic search):"
-        for dep in "${missing_deps[@]}"; do
-            echo "  - $dep"
-        done
-        echo ""
-
-        # Detect OS and show appropriate install command
-        case "$(uname -s)" in
-            Darwin)
-                info "Install on macOS:"
-                echo "  brew install python@3.13"
-                echo "  pip3 install uv"
-                ;;
-            Linux)
-                info "Install on Linux:"
-                echo "  sudo apt install python3 python3-pip"
-                echo "  pip3 install uv"
-                ;;
-            MINGW*|MSYS*|CYGWIN*)
-                info "Install on Windows:"
-                echo "  winget install Python.Python.3.13"
-                echo "  pip install uv"
-                ;;
-        esac
-        echo ""
-        info "Note: Requires Python 3.13+. Most package managers install the latest version."
-        echo ""
-        info "Semantic search will be disabled until these are installed."
-        info "Core functionality (SQLite storage, full-text search) will work."
-        echo ""
-    else
-        success "Optional dependencies found (semantic search enabled)"
+# Rollback partially installed files on failure
+INSTALL_COMPLETE=false
+cleanup_on_failure() {
+    if [[ "$INSTALL_COMPLETE" != "true" ]]; then
+        warn "Installation did not complete — cleaning up partial install..."
+        rm -rf "$INSTALL_DIR" 2>/dev/null || true
+        rm -rf "$CACHE_DIR" 2>/dev/null || true
    fi
 }

@@ -411,6 +461,8 @@ check_optional_deps() {
 main() {
    local version="${1:-}"

+    trap cleanup_on_failure EXIT
+
    echo ""
    echo "╔═══════════════════════════════════════════════════════════╗"
    echo "║           Claude Mnemonic - Installation Script           ║"
@@ -455,6 +507,8 @@ main() {
    # Check optional dependencies
    check_optional_deps

+    INSTALL_COMPLETE=true
+
    echo ""
    echo "╔═══════════════════════════════════════════════════════════╗"
    echo "║                  Installation Complete!                   ║"
@@ -467,6 +521,12 @@ main() {
    echo ""
 }

+# Handle --version flag
+if [[ "${1:-}" == "--version" ]]; then
+    echo "claude-mnemonic installer v${INSTALLER_VERSION}"
+    exit 0
+fi
+
 # Handle --register-only flag
 if [[ "${1:-}" == "--register-only" ]]; then
    version=$(cat "$INSTALL_DIR/.claude-plugin/plugin.json" 2>/dev/null | grep '"version"' | sed -E 's/.*"([^"]+)".*/\1/' || echo "1.0.0")
@@ -486,17 +546,7 @@ if [[ "${1:-}" == "--uninstall" ]]; then
    echo ""

    info "Stopping worker processes..."
-    pkill -9 -f 'claude-mnemonic.*worker' 2>/dev/null || true
-    pkill -9 -f '\.claude/plugins/.*/worker' 2>/dev/null || true
-    # Kill process on port 37777 (use lsof on macOS, ss/fuser on Linux)
-    if command -v lsof &> /dev/null; then
-        lsof -ti :37777 | xargs kill -9 2>/dev/null || true
-    elif command -v ss &> /dev/null; then
-        ss -tlnp 'sport = :37777' 2>/dev/null | awk 'NR>1 {print $6}' | grep -oP 'pid=\K[0-9]+' | xargs -r kill -9 2>/dev/null || true
-    elif command -v fuser &> /dev/null; then
-        fuser -k 37777/tcp 2>/dev/null || true
-    fi
-    sleep 1
+    graceful_stop_worker

    info "Removing plugin directories..."
    rm -rf "$INSTALL_DIR"
@@ -16,6 +16,35 @@ CACHE_BASE="$HOME/.claude/plugins/cache/claude-mnemonic/claude-mnemonic"
 CACHE_PATH="$CACHE_BASE/$VERSION"
 TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S.000Z")

+# Helper: safely write JSON via tmp file with validation
+# Usage: safe_jq_write <jq_args...> <input_file>
+# The last argument is treated as the input file, output goes to input_file.tmp
+safe_jq_write() {
+    local args=("$@")
+    local input_file="${args[-1]}"
+    local tmp_file="${input_file}.tmp"
+
+    if jq "${args[@]}" > "$tmp_file"; then
+        if jq . "$tmp_file" > /dev/null 2>&1; then
+            mv "$tmp_file" "$input_file"
+        else
+            echo "ERROR: jq produced invalid JSON for $input_file, aborting"
+            rm -f "$tmp_file"
+            return 1
+        fi
+    else
+        echo "ERROR: jq failed for $input_file, aborting"
+        rm -f "$tmp_file"
+        return 1
+    fi
+}
+
+# Check that Claude Code directory exists
+if [ ! -d "$HOME/.claude" ]; then
+    echo "Warning: $HOME/.claude directory not found. Claude Code may not be installed."
+    echo "Continuing anyway, but plugin may not function until Claude Code is installed."
+fi
+
 # Ensure plugins directory exists
 mkdir -p "$HOME/.claude/plugins"

@@ -42,6 +71,24 @@ fi

 # Check if jq is available
 if command -v jq &> /dev/null; then
+    # Validate jq version (1.6+ required for //= operator)
+    JQ_VERSION=$(jq --version 2>/dev/null | sed 's/jq-//')
+    JQ_MAJOR=$(echo "$JQ_VERSION" | cut -d. -f1)
+    JQ_MINOR=$(echo "$JQ_VERSION" | cut -d. -f2)
+    if [ -n "$JQ_MAJOR" ] && [ -n "$JQ_MINOR" ]; then
+        if [ "$JQ_MAJOR" -lt 1 ] || { [ "$JQ_MAJOR" -eq 1 ] && [ "$JQ_MINOR" -lt 6 ]; }; then
+            echo "ERROR: jq 1.6+ is required (found jq-$JQ_VERSION)"
+            echo "Please upgrade jq: brew install jq (macOS) or apt-get install jq (Linux)"
+            exit 1
+        fi
+    fi
+
+    # Validate marketplace path exists and contains expected files
+    if [ ! -d "$MARKETPLACE_PATH" ]; then
+        echo "Warning: Marketplace directory not found at $MARKETPLACE_PATH"
+        echo "Plugin files may not be copied to cache correctly."
+    fi
+
    # Ensure cache directory exists and copy plugin files
    mkdir -p "$CACHE_PATH/.claude-plugin"
    mkdir -p "$CACHE_PATH/hooks"
@@ -64,9 +111,8 @@ EOF
 )

    # Add or update the plugin entry in installed_plugins.json
-    jq --arg key "$PLUGIN_KEY" --argjson entry "$PLUGIN_ENTRY" \
-        '.plugins[$key] = $entry' "$PLUGINS_FILE" > "${PLUGINS_FILE}.tmp" \
-        && mv "${PLUGINS_FILE}.tmp" "$PLUGINS_FILE"
+    safe_jq_write --arg key "$PLUGIN_KEY" --argjson entry "$PLUGIN_ENTRY" \
+        '.plugins[$key] = $entry' "$PLUGINS_FILE"

    echo "Plugin registered in installed_plugins.json"

@@ -82,9 +128,8 @@ EOF
 EOF
 )

-    jq --arg key "$PLUGIN_KEY" --argjson statusline "$STATUSLINE_ENTRY" \
-        '.enabledPlugins //= {} | .enabledPlugins[$key] = true | .statusLine = $statusline' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" \
-        && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE"
+    safe_jq_write --arg key "$PLUGIN_KEY" --argjson statusline "$STATUSLINE_ENTRY" \
+        '.enabledPlugins //= {} | .enabledPlugins[$key] = true | .statusLine = $statusline' "$SETTINGS_FILE"

    echo "Plugin enabled in settings.json"
    echo "Statusline configured in settings.json"
@@ -102,9 +147,8 @@ EOF
 EOF
 )

-    jq --arg key "$MARKETPLACE_NAME" --argjson entry "$MARKETPLACE_ENTRY" \
-        '.[$key] = $entry' "$MARKETPLACES_FILE" > "${MARKETPLACES_FILE}.tmp" \
-        && mv "${MARKETPLACES_FILE}.tmp" "$MARKETPLACES_FILE"
+    safe_jq_write --arg key "$MARKETPLACE_NAME" --argjson entry "$MARKETPLACE_ENTRY" \
+        '.[$key] = $entry' "$MARKETPLACES_FILE"

    echo "Marketplace registered in known_marketplaces.json"

@@ -126,13 +170,11 @@ EOF
        MCP_ENTRY=$(echo "$MCP_ENTRY" | sed "s|MCP_BINARY_PLACEHOLDER|$MCP_BINARY|g")

        # Add or update mcpServers field
-        if jq --arg key "claude-mnemonic" --argjson entry "$MCP_ENTRY" \
-            '.mcpServers //= {} | .mcpServers[$key] = $entry' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp"; then
-            mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE"
+        if safe_jq_write --arg key "claude-mnemonic" --argjson entry "$MCP_ENTRY" \
+            '.mcpServers //= {} | .mcpServers[$key] = $entry' "$SETTINGS_FILE"; then
            echo "MCP server registered successfully"
        else
            echo "Warning: Failed to register MCP server (jq error)"
-            rm -f "${SETTINGS_FILE}.tmp"
        fi
    else
        echo "MCP server binary not found at $MCP_BINARY, skipping MCP registration"
@@ -3,6 +3,18 @@

 set -e

+# Stop running worker processes before removing binaries
+echo "Stopping worker processes..."
+pkill -TERM -f 'claude-mnemonic.*worker' 2>/dev/null || true
+pkill -TERM -f '\.claude/plugins/.*/worker' 2>/dev/null || true
+sleep 2
+# Force kill if still running
+pkill -9 -f 'claude-mnemonic.*worker' 2>/dev/null || true
+pkill -9 -f '\.claude/plugins/.*/worker' 2>/dev/null || true
+# Clean up port
+lsof -ti :37777 | xargs kill -9 2>/dev/null || true
+sleep 1
+
 PLUGINS_FILE="$HOME/.claude/plugins/installed_plugins.json"
 SETTINGS_FILE="$HOME/.claude/settings.json"
 MARKETPLACES_FILE="$HOME/.claude/plugins/known_marketplaces.json"
@@ -30,16 +42,17 @@ else
    echo "No plugins file found, skipping"
 fi

-# Remove from settings.json (enabledPlugins and statusLine if it points to our plugin)
+# Remove from settings.json (enabledPlugins, statusLine, and mcpServers)
 if [ -f "$SETTINGS_FILE" ]; then
-    # Remove from enabledPlugins and clear statusLine if it references our plugin
+    # Remove from enabledPlugins, clear statusLine if it references our plugin, and remove MCP server
    jq --arg key "$PLUGIN_KEY" '
        del(.enabledPlugins[$key]) |
        if .statusLine.command and (.statusLine.command | contains("claude-mnemonic")) then
            del(.statusLine)
        else
            .
-        end
+        end |
+        del(.mcpServers["claude-mnemonic"])
    ' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" \
        && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE"
    echo "Plugin removed from settings.json"