Files
filepuff-mcp/pkg/fuzzy/fuzzy_test.go
T
2026-01-18 18:40:26 +00:00

276 lines
6.1 KiB
Go

package fuzzy
import (
"testing"
)
func TestLevenshteinDistance(t *testing.T) {
tests := []struct {
s1 string
s2 string
expected int
}{
{"", "", 0},
{"", "abc", 3},
{"abc", "", 3},
{"abc", "abc", 0},
{"abc", "abd", 1},
{"kitten", "sitting", 3},
{"saturday", "sunday", 3},
{"book", "back", 2},
{"café", "cafe", 1}, // Unicode handling
}
for _, tt := range tests {
got := levenshteinDistance(tt.s1, tt.s2)
if got != tt.expected {
t.Errorf("levenshteinDistance(%q, %q) = %d, want %d", tt.s1, tt.s2, got, tt.expected)
}
}
}
func TestDamerauLevenshteinDistance(t *testing.T) {
tests := []struct {
s1 string
s2 string
expected int
}{
{"abc", "abc", 0},
{"abc", "acb", 1}, // Transposition
{"ca", "abc", 3}, // Delete a, delete b, insert c = 3 operations
{"", "abc", 3},
}
for _, tt := range tests {
got := DamerauLevenshteinDistance(tt.s1, tt.s2)
if got != tt.expected {
t.Errorf("DamerauLevenshteinDistance(%q, %q) = %d, want %d", tt.s1, tt.s2, got, tt.expected)
}
}
}
func TestJaroWinklerSimilarity(t *testing.T) {
tests := []struct {
s1 string
s2 string
minScore float64 // Minimum expected similarity
}{
{"", "", 1.0},
{"abc", "abc", 1.0},
{"martha", "marhta", 0.96}, // High similarity for transposition
{"dixon", "dicksonx", 0.76}, // Moderate similarity
{"", "abc", 0.0},
}
for _, tt := range tests {
got := JaroWinklerSimilarity(tt.s1, tt.s2)
if got < tt.minScore {
t.Errorf("JaroWinklerSimilarity(%q, %q) = %.2f, want >= %.2f", tt.s1, tt.s2, got, tt.minScore)
}
}
}
func TestMatcher_Match(t *testing.T) {
m := New(2) // Allow edit distance up to 2
candidates := []string{
"getUserName",
"getUsername",
"get_user_name",
"getUserId",
"setUserName",
"findUser",
"userName",
"usernameField",
}
tests := []struct {
query string
topMatch string
expectMin int
}{
{
query: "getUserName",
expectMin: 3, // Exact + similar variants
topMatch: "getUserName",
},
{
query: "getuser",
expectMin: 2, // Should match getUserName, getUsername at minimum
topMatch: "getUserName",
},
{
query: "username",
expectMin: 2, // Case-insensitive matches
topMatch: "userName",
},
}
for _, tt := range tests {
matches := m.Match(tt.query, candidates)
if len(matches) < tt.expectMin {
t.Errorf("Match(%q) returned %d matches, want at least %d", tt.query, len(matches), tt.expectMin)
}
if len(matches) > 0 {
// Top match should have highest score
if matches[0].Score < matches[len(matches)-1].Score {
t.Errorf("Match(%q) results not sorted by score", tt.query)
}
}
}
}
func TestMatcher_EmptyQuery(t *testing.T) {
m := New(2)
candidates := []string{"test", "example"}
matches := m.Match("", candidates)
if matches != nil {
t.Errorf("Match with empty query should return nil, got %v", matches)
}
}
func TestMatcher_PrefixBonus(t *testing.T) {
m := New(2)
candidates := []string{
"getUserName", // prefix match
"findUserName", // contains but not prefix
}
matches := m.Match("get", candidates)
if len(matches) < 1 {
t.Fatal("Expected at least one match")
}
// Prefix match should score higher
if matches[0].Text != "getUserName" {
t.Errorf("Expected prefix match to rank first, got %q", matches[0].Text)
}
}
func TestMatcher_ExactMatchBonus(t *testing.T) {
m := New(2)
candidates := []string{
"test",
"testing",
"tester",
}
matches := m.Match("test", candidates)
if len(matches) < 1 {
t.Fatal("Expected at least one match")
}
// Exact match should rank first
if matches[0].Text != "test" {
t.Errorf("Expected exact match to rank first, got %q", matches[0].Text)
}
// Exact match should have highest score
if matches[0].Score < 2.0 { // Should have exact match bonus
t.Errorf("Exact match score too low: %.2f", matches[0].Score)
}
}
func TestContainsWordBoundary(t *testing.T) {
tests := []struct {
text string
query string
expected bool
}{
{"getUserName", "get", true}, // At start
{"getUserName", "user", true}, // After lowercase->uppercase boundary
{"get_user_name", "user", true}, // After underscore
{"getUserName", "Name", true}, // After lowercase->uppercase
{"getUserName", "ser", false}, // Middle of word
{"", "test", false}, // Empty text
}
for _, tt := range tests {
got := containsWordBoundary(tt.text, tt.query)
if got != tt.expected {
t.Errorf("containsWordBoundary(%q, %q) = %v, want %v", tt.text, tt.query, got, tt.expected)
}
}
}
func TestMatcher_UnicodeHandling(t *testing.T) {
m := New(2)
candidates := []string{
"café",
"resume",
"naïve",
}
// Test with Unicode characters
matches := m.Match("cafe", candidates)
if len(matches) == 0 {
t.Error("Expected matches for Unicode strings")
}
// Should find café with small edit distance
found := false
for _, match := range matches {
if match.Text == "café" && match.Distance <= 2 {
found = true
break
}
}
if !found {
t.Error("Failed to fuzzy match Unicode string 'café'")
}
}
func BenchmarkLevenshteinDistance(b *testing.B) {
s1 := "the quick brown fox jumps over the lazy dog"
s2 := "the quikc brown fox jumps ovver the lazy dog"
b.ResetTimer()
for i := range b.N {
_ = levenshteinDistance(s1, s2)
_ = i // use i to avoid unused warning
}
}
func BenchmarkDamerauLevenshteinDistance(b *testing.B) {
s1 := "the quick brown fox jumps over the lazy dog"
s2 := "the quikc brown fox jumps ovver the lazy dog"
b.ResetTimer()
for i := range b.N {
_ = DamerauLevenshteinDistance(s1, s2)
_ = i
}
}
func BenchmarkJaroWinklerSimilarity(b *testing.B) {
s1 := "martha"
s2 := "marhta"
b.ResetTimer()
for i := range b.N {
_ = JaroWinklerSimilarity(s1, s2)
_ = i
}
}
func BenchmarkMatcher_Match(b *testing.B) {
m := New(2)
candidates := []string{
"getUserName", "getUsername", "get_user_name", "getUserId",
"setUserName", "findUser", "userName", "usernameField",
"userAccount", "accountUser", "userProfile", "profileUser",
}
b.ResetTimer()
for i := range b.N {
_ = m.Match("getuser", candidates)
_ = i
}
}