// Package expansion provides context-aware query expansion for improved search recall. package expansion import ( "context" "regexp" "strings" "sync" "github.com/lukaszraczylo/claude-mnemonic/internal/embedding" "github.com/rs/zerolog/log" ) // QueryIntent represents the detected intent of a query. type QueryIntent string const ( // IntentQuestion indicates a question-type query (how, why, what, etc.) IntentQuestion QueryIntent = "question" // IntentError indicates an error/debugging query IntentError QueryIntent = "error" // IntentImplementation indicates an implementation/coding query IntentImplementation QueryIntent = "implementation" // IntentArchitecture indicates an architecture/design query IntentArchitecture QueryIntent = "architecture" // IntentGeneral indicates a general lookup query IntentGeneral QueryIntent = "general" ) // ExpandedQuery represents a query variant with metadata. type ExpandedQuery struct { Query string `json:"query"` Source string `json:"source"` Intent QueryIntent `json:"intent"` Weight float64 `json:"weight"` } // Expander provides context-aware query expansion. type Expander struct { embedSvc *embedding.Service intentPatterns map[QueryIntent][]*regexp.Regexp vocabulary []VocabEntry vocabVectors [][]float32 vocabMu sync.RWMutex } // VocabEntry represents a vocabulary term from observations. type VocabEntry struct { Term string Source string Weight float64 } // Config holds expander configuration. type Config struct { // MaxExpansions limits the number of expanded queries returned MaxExpansions int // MinSimilarity is the minimum similarity score for vocabulary expansion MinSimilarity float64 // EnableVocabularyExpansion enables finding related terms from observations EnableVocabularyExpansion bool } // DefaultConfig returns sensible default configuration. func DefaultConfig() Config { return Config{ MaxExpansions: 4, MinSimilarity: 0.5, EnableVocabularyExpansion: true, } } // NewExpander creates a new query expander. func NewExpander(embedSvc *embedding.Service) *Expander { e := &Expander{ embedSvc: embedSvc, intentPatterns: buildIntentPatterns(), } return e } // buildIntentPatterns creates regex patterns for intent detection. func buildIntentPatterns() map[QueryIntent][]*regexp.Regexp { patterns := make(map[QueryIntent][]*regexp.Regexp) // Question patterns patterns[IntentQuestion] = []*regexp.Regexp{ regexp.MustCompile(`(?i)^(how|why|what|when|where|which|who)\b`), regexp.MustCompile(`(?i)\?$`), regexp.MustCompile(`(?i)\b(explain|describe|understand)\b`), } // Error/debugging patterns patterns[IntentError] = []*regexp.Regexp{ regexp.MustCompile(`(?i)\b(error|bug|issue|problem|fail|crash|exception|panic)\b`), regexp.MustCompile(`(?i)\b(fix|debug|troubleshoot|resolve)\b`), regexp.MustCompile(`(?i)\b(doesn't work|not working|broken)\b`), } // Implementation patterns patterns[IntentImplementation] = []*regexp.Regexp{ regexp.MustCompile(`(?i)\b(implement|add|create|build|write|code)\b`), regexp.MustCompile(`(?i)\b(function|method|handler|endpoint|api)\b`), regexp.MustCompile(`(?i)\b(feature|functionality)\b`), } // Architecture patterns patterns[IntentArchitecture] = []*regexp.Regexp{ regexp.MustCompile(`(?i)\b(architecture|design|pattern|structure)\b`), regexp.MustCompile(`(?i)\b(component|module|layer|service)\b`), regexp.MustCompile(`(?i)\b(flow|pipeline|workflow)\b`), } return patterns } // DetectIntent analyzes a query to determine its intent. func (e *Expander) DetectIntent(query string) QueryIntent { query = strings.TrimSpace(query) if query == "" { return IntentGeneral } // Check patterns in priority order intentOrder := []QueryIntent{IntentError, IntentQuestion, IntentImplementation, IntentArchitecture} for _, intent := range intentOrder { patterns := e.intentPatterns[intent] for _, pattern := range patterns { if pattern.MatchString(query) { return intent } } } return IntentGeneral } // Expand generates expanded query variants based on the original query. func (e *Expander) Expand(ctx context.Context, query string, cfg Config) []ExpandedQuery { query = strings.TrimSpace(query) if query == "" { return nil } intent := e.DetectIntent(query) expansions := make([]ExpandedQuery, 0, cfg.MaxExpansions) // Always include the original query with highest weight expansions = append(expansions, ExpandedQuery{ Query: query, Weight: 1.0, Source: "original", Intent: intent, }) // Generate intent-based expansions intentExpansions := e.expandByIntent(query, intent) expansions = append(expansions, intentExpansions...) // Generate vocabulary-based expansions if enabled and we have vocabulary if cfg.EnableVocabularyExpansion && e.embedSvc != nil && len(e.vocabulary) > 0 { vocabExpansions := e.expandByVocabulary(ctx, query, cfg.MinSimilarity) expansions = append(expansions, vocabExpansions...) } // Deduplicate and limit expansions = deduplicateExpansions(expansions) if len(expansions) > cfg.MaxExpansions { expansions = expansions[:cfg.MaxExpansions] } log.Debug(). Str("query", truncate(query, 50)). Str("intent", string(intent)). Int("expansions", len(expansions)). Msg("Query expanded") return expansions } // expandByIntent generates expansions based on detected query intent. func (e *Expander) expandByIntent(query string, intent QueryIntent) []ExpandedQuery { var expansions []ExpandedQuery // Extract key terms from query for context-aware expansion keyTerms := extractKeyTerms(query) switch intent { case IntentQuestion: // For questions, create a declarative variant declarative := makeDeclarative(query) if declarative != query { expansions = append(expansions, ExpandedQuery{ Query: declarative, Weight: 0.85, Source: "intent:declarative", Intent: intent, }) } case IntentError: // For errors, expand with solution-oriented terms if len(keyTerms) > 0 { solutionQuery := strings.Join(keyTerms, " ") + " solution fix" expansions = append(expansions, ExpandedQuery{ Query: solutionQuery, Weight: 0.8, Source: "intent:solution", Intent: intent, }) } case IntentImplementation: // For implementation queries, focus on the what/how if len(keyTerms) > 0 { howQuery := "how " + strings.Join(keyTerms, " ") expansions = append(expansions, ExpandedQuery{ Query: howQuery, Weight: 0.75, Source: "intent:how", Intent: intent, }) } case IntentArchitecture: // For architecture queries, expand with design context if len(keyTerms) > 0 { designQuery := strings.Join(keyTerms, " ") + " design structure" expansions = append(expansions, ExpandedQuery{ Query: designQuery, Weight: 0.75, Source: "intent:design", Intent: intent, }) } case IntentGeneral: // For general queries, try noun phrase extraction // No additional expansion - rely on vocabulary expansion } return expansions } // expandByVocabulary finds similar terms from the observation vocabulary. func (e *Expander) expandByVocabulary(ctx context.Context, query string, minSimilarity float64) []ExpandedQuery { e.vocabMu.RLock() defer e.vocabMu.RUnlock() if len(e.vocabulary) == 0 || e.embedSvc == nil { return nil } // Embed the query queryEmb, err := e.embedSvc.Embed(query) if err != nil { log.Warn().Err(err).Msg("Failed to embed query for vocabulary expansion") return nil } // Find similar vocabulary terms type scoredTerm struct { entry VocabEntry score float64 } var similar []scoredTerm for i, entry := range e.vocabulary { if i >= len(e.vocabVectors) { break } score := cosineSimilarity(queryEmb, e.vocabVectors[i]) if score >= minSimilarity { similar = append(similar, scoredTerm{entry: entry, score: score}) } } if len(similar) == 0 { return nil } // Sort by score (descending) using bubble sort for i := 0; i < len(similar)-1; i++ { for j := i + 1; j < len(similar); j++ { if similar[j].score > similar[i].score { similar[i], similar[j] = similar[j], similar[i] } } } // Create expansion by combining top similar terms with query var expansions []ExpandedQuery if len(similar) > 0 { // Take top 2 similar terms and combine with original key terms keyTerms := extractKeyTerms(query) for i := 0; i < min(2, len(similar)); i++ { term := similar[i].entry.Term // Don't add if term is already in query if strings.Contains(strings.ToLower(query), strings.ToLower(term)) { continue } combinedQuery := strings.Join(keyTerms, " ") + " " + term expansions = append(expansions, ExpandedQuery{ Query: combinedQuery, Weight: 0.7 * similar[i].score * similar[i].entry.Weight, Source: "vocabulary:" + term, Intent: IntentGeneral, }) } } return expansions } // Helper functions // extractKeyTerms extracts meaningful terms from a query. func extractKeyTerms(query string) []string { // Common stop words to filter out stopWords := map[string]bool{ "a": true, "an": true, "the": true, "is": true, "are": true, "was": true, "were": true, "be": true, "been": true, "being": true, "have": true, "has": true, "had": true, "do": true, "does": true, "did": true, "will": true, "would": true, "could": true, "should": true, "may": true, "might": true, "must": true, "can": true, "i": true, "me": true, "my": true, "we": true, "our": true, "you": true, "your": true, "it": true, "its": true, "this": true, "that": true, "these": true, "those": true, "what": true, "which": true, "who": true, "whom": true, "how": true, "why": true, "when": true, "where": true, "to": true, "for": true, "with": true, "about": true, "from": true, "in": true, "on": true, "at": true, "by": true, "of": true, "and": true, "or": true, "but": true, "if": true, "then": true, } // Split and filter words := strings.Fields(strings.ToLower(query)) var terms []string for _, word := range words { // Remove punctuation word = strings.Trim(word, ".,?!;:'\"()[]{}") if len(word) < 2 { continue } if stopWords[word] { continue } terms = append(terms, word) } return terms } // makeDeclarative converts a question to a declarative statement. func makeDeclarative(query string) string { query = strings.TrimSpace(query) // Remove question mark query = strings.TrimSuffix(query, "?") // Handle common question patterns patterns := []struct { prefix string replacement string }{ {"how do i ", ""}, {"how to ", ""}, {"how does ", ""}, {"how is ", ""}, {"what is ", ""}, {"what are ", ""}, {"why does ", ""}, {"why is ", ""}, {"where is ", ""}, {"where are ", ""}, {"when does ", ""}, {"when is ", ""}, } lower := strings.ToLower(query) for _, p := range patterns { if strings.HasPrefix(lower, p.prefix) { return strings.TrimSpace(query[len(p.prefix):]) } } return query } // deduplicateExpansions removes duplicate queries while preserving order. func deduplicateExpansions(expansions []ExpandedQuery) []ExpandedQuery { seen := make(map[string]bool) result := make([]ExpandedQuery, 0, len(expansions)) for _, exp := range expansions { normalized := strings.ToLower(strings.TrimSpace(exp.Query)) if !seen[normalized] { seen[normalized] = true result = append(result, exp) } } return result } // cosineSimilarity computes cosine similarity between two vectors. func cosineSimilarity(a, b []float32) float64 { if len(a) != len(b) || len(a) == 0 { return 0 } var dot, normA, normB float64 for i := range a { dot += float64(a[i]) * float64(b[i]) normA += float64(a[i]) * float64(a[i]) normB += float64(b[i]) * float64(b[i]) } if normA == 0 || normB == 0 { return 0 } return dot / (sqrt(normA) * sqrt(normB)) } // sqrt is a simple square root implementation. func sqrt(x float64) float64 { if x <= 0 { return 0 } z := x for i := 0; i < 10; i++ { z = (z + x/z) / 2 } return z } // truncate truncates a string to maxLen characters. func truncate(s string, maxLen int) string { if len(s) <= maxLen { return s } return s[:maxLen] + "..." }