mirror of
https://github.com/lukaszraczylo/claude-mnemonic.git
synced 2026-06-05 23:03:55 +00:00
fix: address 15 additional hang vectors found during deep audit (#45)
MCP server (5 fixes):
- Move semaphore acquisition inside goroutine so main loop stays
responsive when all slots are taken
- Add 10s write timeout to sendResponse to prevent pipe deadlock
when Claude Code pauses reading stdout
- Send fallback JSON-RPC error when json.Marshal fails instead of
silently swallowing the error and leaving caller waiting forever
- Silence unknown notification methods (req.ID == nil) instead of
sending unsolicited error responses that may desync the host
- Return MCP isError content for tool failures instead of top-level
JSON-RPC error, matching the MCP specification
Vector/embedding (3 fixes):
- Move EmbedBatchWithContext call before writeMu.Lock in AddDocuments
so ONNX inference runs outside the write lock
- Replace singleflight.Do with DoChan + ctx select in both
getOrComputeEmbedding and UnifiedSearch so callers can bail out
independently when their context expires
- Add activeQueries atomic counter; skip cache warming when user
queries are in-flight; reduce warming timeout from 5s to 2s
Hooks (4 fixes):
- Cap EnsureWorkerRunning to 15s hard deadline with context; reduce
StartupTimeout from 30s to 10s; reduce port-in-use retries
- Fix nil dereference panic in user-prompt hook when initResult is
nil (non-JSON worker response); use comma-ok assertions
- Use package-level hookClient/healthClient with DisableKeepAlives
to prevent FD leaks in short-lived hook processes
- Set SysProcAttr{Setpgid: true} to detach worker from hook process
group, preventing kill-cascade from Claude Code
Worker/DB (3 fixes):
- Replace os.Exit(0) in MCP config watcher with context cancellation
for clean protocol shutdown
- Add 60s context.WithTimeout around ProcessObservation calls in
processAllSessions to prevent hung CLI subprocesses from blocking
the queue processor forever
- Set explicit PRAGMA wal_autocheckpoint=1000 and add PASSIVE WAL
checkpoint to Optimize() to prevent checkpoint stalls
Adds 20+ regression tests across all fix areas.
This commit is contained in:
@@ -99,8 +99,9 @@ func NewStore(cfg Config) (*Store, error) {
|
||||
"PRAGMA synchronous=NORMAL",
|
||||
"PRAGMA cache_size=-64000", // 64MB cache (negative = KB)
|
||||
"PRAGMA temp_store=MEMORY", // Store temp tables in memory
|
||||
"PRAGMA mmap_size=268435456", // 256MB memory-mapped I/O
|
||||
"PRAGMA page_size=4096", // 4KB pages (optimal for most systems)
|
||||
"PRAGMA mmap_size=268435456", // 256MB memory-mapped I/O
|
||||
"PRAGMA page_size=4096", // 4KB pages (optimal for most systems)
|
||||
"PRAGMA wal_autocheckpoint=1000", // Explicit default; checkpoint every 1000 WAL frames
|
||||
}
|
||||
for _, pragma := range pragmas {
|
||||
if _, err := sqlDB.Exec(pragma); err != nil {
|
||||
@@ -192,6 +193,11 @@ func (s *Store) Optimize(ctx context.Context) error {
|
||||
log.Warn().Err(err).Msg("PRAGMA optimize failed (non-fatal)")
|
||||
}
|
||||
|
||||
// Passive WAL checkpoint — doesn't block readers/writers
|
||||
if _, err := s.sqlDB.ExecContext(ctx, "PRAGMA wal_checkpoint(PASSIVE)"); err != nil {
|
||||
log.Warn().Err(err).Msg("WAL checkpoint failed (non-fatal)")
|
||||
}
|
||||
|
||||
log.Info().Dur("duration", time.Since(start)).Msg("Database optimization complete")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
package gorm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -150,3 +151,91 @@ func TestMigrationIdempotency(t *testing.T) {
|
||||
|
||||
t.Logf("✅ Migrations are idempotent")
|
||||
}
|
||||
|
||||
func TestWALAutocheckpoint(t *testing.T) {
|
||||
tmpDir, err := os.MkdirTemp("", "gorm_wal_checkpoint_*")
|
||||
if err != nil {
|
||||
t.Fatalf("create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
dbPath := filepath.Join(tmpDir, "test.db")
|
||||
store, err := NewStore(Config{
|
||||
Path: dbPath,
|
||||
MaxConns: 2,
|
||||
LogLevel: logger.Silent,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore failed: %v", err)
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
// Verify wal_autocheckpoint is set to 1000
|
||||
var checkpoint int
|
||||
err = store.GetRawDB().QueryRow("PRAGMA wal_autocheckpoint").Scan(&checkpoint)
|
||||
if err != nil {
|
||||
t.Fatalf("query wal_autocheckpoint: %v", err)
|
||||
}
|
||||
if checkpoint != 1000 {
|
||||
t.Errorf("expected wal_autocheckpoint=1000, got %d", checkpoint)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOptimize_RunsWALCheckpoint(t *testing.T) {
|
||||
tmpDir, err := os.MkdirTemp("", "gorm_optimize_*")
|
||||
if err != nil {
|
||||
t.Fatalf("create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
dbPath := filepath.Join(tmpDir, "test.db")
|
||||
store, err := NewStore(Config{
|
||||
Path: dbPath,
|
||||
MaxConns: 2,
|
||||
LogLevel: logger.Silent,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore failed: %v", err)
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
// Insert some data to generate WAL frames
|
||||
_, err = store.GetRawDB().Exec("INSERT INTO observations (sdk_session_id, title, scope, project, type, created_at, created_at_epoch) VALUES ('test-sess', 'test data', 'project', '/tmp/test', 'decision', '2026-01-01T00:00:00Z', 1735689600)")
|
||||
if err != nil {
|
||||
t.Fatalf("insert test data: %v", err)
|
||||
}
|
||||
|
||||
// Optimize should succeed (includes PASSIVE WAL checkpoint)
|
||||
err = store.Optimize(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Optimize failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOptimize_RespectsContextCancellation(t *testing.T) {
|
||||
tmpDir, err := os.MkdirTemp("", "gorm_optimize_cancel_*")
|
||||
if err != nil {
|
||||
t.Fatalf("create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
dbPath := filepath.Join(tmpDir, "test.db")
|
||||
store, err := NewStore(Config{
|
||||
Path: dbPath,
|
||||
MaxConns: 2,
|
||||
LogLevel: logger.Silent,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore failed: %v", err)
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
// Already-cancelled context should cause Optimize to fail
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
err = store.Optimize(ctx)
|
||||
if err == nil {
|
||||
t.Error("expected error with cancelled context, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user