Optimize exact match by applying the same trick for fuzzy match
This commit is contained in:
parent
69aa2fea68
commit
8db3345c2f
@ -274,6 +274,41 @@ func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int {
|
|||||||
return from + idx
|
return from + idx
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isAscii(runes []rune) bool {
|
||||||
|
for _, r := range runes {
|
||||||
|
if r >= utf8.RuneSelf {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func asciiFuzzyIndex(input *util.Chars, pattern []rune, caseSensitive bool) int {
|
||||||
|
// Can't determine
|
||||||
|
if !input.IsBytes() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not possible
|
||||||
|
if !isAscii(pattern) {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
firstIdx, idx := 0, 0
|
||||||
|
for pidx := 0; pidx < len(pattern); pidx++ {
|
||||||
|
idx = trySkip(input, caseSensitive, byte(pattern[pidx]), idx)
|
||||||
|
if idx < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
if pidx == 0 && idx > 0 {
|
||||||
|
// Step back to find the right bonus point
|
||||||
|
firstIdx = idx - 1
|
||||||
|
}
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
return firstIdx
|
||||||
|
}
|
||||||
|
|
||||||
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
|
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
|
||||||
// Assume that pattern is given in lowercase if case-insensitive.
|
// Assume that pattern is given in lowercase if case-insensitive.
|
||||||
// First check if there's a match and calculate bonus for each position.
|
// First check if there's a match and calculate bonus for each position.
|
||||||
@ -302,30 +337,15 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
|
|||||||
offset32, T := alloc32(offset32, slab, N, false)
|
offset32, T := alloc32(offset32, slab, N, false)
|
||||||
|
|
||||||
// Phase 1. Optimized search for ASCII string
|
// Phase 1. Optimized search for ASCII string
|
||||||
firstIdx := 0
|
idx := asciiFuzzyIndex(&input, pattern, caseSensitive)
|
||||||
if input.IsBytes() {
|
|
||||||
idx := 0
|
|
||||||
for pidx := 0; pidx < M; pidx++ {
|
|
||||||
// Not possible
|
|
||||||
if pattern[pidx] >= utf8.RuneSelf {
|
|
||||||
return Result{-1, -1, 0}, nil
|
|
||||||
}
|
|
||||||
idx = trySkip(&input, caseSensitive, byte(pattern[pidx]), idx)
|
|
||||||
if idx < 0 {
|
if idx < 0 {
|
||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
if pidx == 0 && idx > 0 {
|
|
||||||
// Step back to find the right bonus point
|
|
||||||
firstIdx = idx - 1
|
|
||||||
}
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Phase 2. Calculate bonus for each point
|
// Phase 2. Calculate bonus for each point
|
||||||
pidx, lastIdx, prevClass := 0, 0, charNonWord
|
pidx, lastIdx, prevClass := 0, 0, charNonWord
|
||||||
input.CopyRunes(T)
|
input.CopyRunes(T)
|
||||||
for idx := firstIdx; idx < N; idx++ {
|
for ; idx < N; idx++ {
|
||||||
char := T[idx]
|
char := T[idx]
|
||||||
var class charClass
|
var class charClass
|
||||||
if char <= unicode.MaxASCII {
|
if char <= unicode.MaxASCII {
|
||||||
@ -657,6 +677,10 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util
|
|||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if asciiFuzzyIndex(&text, pattern, caseSensitive) < 0 {
|
||||||
|
return Result{-1, -1, 0}, nil
|
||||||
|
}
|
||||||
|
|
||||||
// For simplicity, only look at the bonus at the first character position
|
// For simplicity, only look at the bonus at the first character position
|
||||||
pidx := 0
|
pidx := 0
|
||||||
bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
|
bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user