Files
filepuff-mcp/internal/util/regex_cache.go
T

142 lines
3.8 KiB
Go

// Package util provides shared utility functions and caches.
package util
import (
"fmt"
"regexp"
"sync"
"sync/atomic"
)
const (
// MaxPatternLength is the maximum allowed length for regex patterns.
// This prevents memory issues from extremely long patterns.
MaxPatternLength = 1000
// MaxCacheSize is the maximum number of patterns to cache.
// When exceeded, the cache is cleared to prevent unbounded memory growth.
MaxCacheSize = 10000
)
// regexCache is a global thread-safe cache for compiled regular expressions.
// Caching regex compilation provides 10-50x speedup for repeated patterns.
var (
regexCache sync.Map // string -> *regexp.Regexp
cacheSize atomic.Int64
)
// RegexError represents an error during regex compilation or validation.
type RegexError struct {
Pattern string
Reason string
Err error
}
func (e *RegexError) Error() string {
if e.Err != nil {
return fmt.Sprintf("regex error for pattern %q: %s: %v", e.Pattern, e.Reason, e.Err)
}
return fmt.Sprintf("regex error for pattern %q: %s", e.Pattern, e.Reason)
}
func (e *RegexError) Unwrap() error {
return e.Err
}
// ValidatePattern validates a regex pattern for safety.
// Returns an error if the pattern is too long or appears malicious.
func ValidatePattern(pattern string) error {
// Check pattern length
if len(pattern) > MaxPatternLength {
return &RegexError{
Pattern: truncatePattern(pattern),
Reason: fmt.Sprintf("pattern too long (%d chars, max %d)", len(pattern), MaxPatternLength),
}
}
// Note: Go's regexp package uses Thompson NFA which guarantees O(n) matching time,
// making it inherently resistant to ReDoS attacks. However, we still validate
// pattern length to prevent memory issues during compilation.
return nil
}
// CompileRegex compiles a regex pattern with caching and validation for security.
// Thread-safe: uses LoadOrStore to prevent race conditions.
// Returns the compiled regex or an error if the pattern is invalid or unsafe.
func CompileRegex(pattern string) (*regexp.Regexp, error) {
// Validate pattern first
if err := ValidatePattern(pattern); err != nil {
return nil, err
}
// Check cache first
if cached, ok := regexCache.Load(pattern); ok {
return cached.(*regexp.Regexp), nil
}
// Compile regex
re, err := regexp.Compile(pattern)
if err != nil {
return nil, &RegexError{
Pattern: truncatePattern(pattern),
Reason: "invalid regex syntax",
Err: err,
}
}
// Check cache size and clear if too large
if cacheSize.Load() >= MaxCacheSize {
ClearRegexCache()
}
// Try to store - if another goroutine already stored it, use theirs
// This prevents race conditions where multiple goroutines compile the same pattern
actual, loaded := regexCache.LoadOrStore(pattern, re)
if !loaded {
cacheSize.Add(1)
}
return actual.(*regexp.Regexp), nil
}
// CompileRegexUncached compiles a regex pattern without caching.
// Useful for one-off patterns that shouldn't pollute the cache.
func CompileRegexUncached(pattern string) (*regexp.Regexp, error) {
if err := ValidatePattern(pattern); err != nil {
return nil, err
}
re, err := regexp.Compile(pattern)
if err != nil {
return nil, &RegexError{
Pattern: truncatePattern(pattern),
Reason: "invalid regex syntax",
Err: err,
}
}
return re, nil
}
// ClearRegexCache clears all cached compiled regular expressions.
// Useful for testing or when memory usage needs to be reduced.
func ClearRegexCache() {
regexCache.Range(func(key, _ interface{}) bool {
regexCache.Delete(key)
return true
})
cacheSize.Store(0)
}
// CacheStats returns the current number of cached patterns.
func CacheStats() int64 {
return cacheSize.Load()
}
// truncatePattern truncates a pattern for display in error messages.
func truncatePattern(pattern string) string {
if len(pattern) > 50 {
return pattern[:47] + "..."
}
return pattern
}