Minor optimization of FuzzyMatchV2
Calculate the first row of the score matrix during phase 2
This commit is contained in:
parent
6aae12288e
commit
941b0a0ff7
@ -360,17 +360,20 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
|
|||||||
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
|
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
|
||||||
offset16 := 0
|
offset16 := 0
|
||||||
offset32 := 0
|
offset32 := 0
|
||||||
|
offset16, H0 := alloc16(offset16, slab, N)
|
||||||
|
offset16, C0 := alloc16(offset16, slab, N)
|
||||||
// Bonus point for each position
|
// Bonus point for each position
|
||||||
offset16, B := alloc16(offset16, slab, N)
|
offset16, B := alloc16(offset16, slab, N)
|
||||||
// The first occurrence of each character in the pattern
|
// The first occurrence of each character in the pattern
|
||||||
offset32, F := alloc32(offset32, slab, M)
|
offset32, F := alloc32(offset32, slab, M)
|
||||||
// Rune array
|
// Rune array
|
||||||
offset32, T := alloc32(offset32, slab, N)
|
offset32, T := alloc32(offset32, slab, N)
|
||||||
|
input.CopyRunes(T)
|
||||||
|
|
||||||
// Phase 2. Calculate bonus for each point
|
// Phase 2. Calculate bonus for each point
|
||||||
pidx, lastIdx, prevClass := 0, 0, charNonWord
|
maxScore, maxScorePos := int16(0), 0
|
||||||
input.CopyRunes(T)
|
pidx, lastIdx := 0, 0
|
||||||
for ; idx < N; idx++ {
|
for pchar0, prevClass, inGap := pattern[0], charNonWord, false; idx < N; idx++ {
|
||||||
char := T[idx]
|
char := T[idx]
|
||||||
var class charClass
|
var class charClass
|
||||||
if char <= unicode.MaxASCII {
|
if char <= unicode.MaxASCII {
|
||||||
@ -392,51 +395,73 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
|
|||||||
}
|
}
|
||||||
|
|
||||||
T[idx] = char
|
T[idx] = char
|
||||||
B[idx] = bonusFor(prevClass, class)
|
bonus := bonusFor(prevClass, class)
|
||||||
|
B[idx] = bonus
|
||||||
prevClass = class
|
prevClass = class
|
||||||
|
|
||||||
|
if char == pattern[util.Min(pidx, M-1)] {
|
||||||
if pidx < M {
|
if pidx < M {
|
||||||
if char == pattern[pidx] {
|
|
||||||
lastIdx = idx
|
|
||||||
F[pidx] = int32(idx)
|
F[pidx] = int32(idx)
|
||||||
pidx++
|
pidx++
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if char == pattern[M-1] {
|
|
||||||
lastIdx = idx
|
lastIdx = idx
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if char == pchar0 {
|
||||||
|
score := scoreMatch + bonus*bonusFirstCharMultiplier
|
||||||
|
H0[idx] = score
|
||||||
|
C0[idx] = 1
|
||||||
|
if M == 1 && (forward && score > maxScore || !forward && score >= maxScore) {
|
||||||
|
maxScore, maxScorePos = score, idx
|
||||||
|
if forward && bonus == bonusBoundary {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inGap = false
|
||||||
|
} else {
|
||||||
|
if idx == 0 {
|
||||||
|
H0[idx] = 0
|
||||||
|
} else if inGap {
|
||||||
|
H0[idx] = util.Max16(H0[idx-1]+scoreGapExtention, 0)
|
||||||
|
} else {
|
||||||
|
H0[idx] = util.Max16(H0[idx-1]+scoreGapStart, 0)
|
||||||
|
}
|
||||||
|
C0[idx] = 0
|
||||||
|
inGap = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if pidx != M {
|
if pidx != M {
|
||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
if M == 1 && B[F[0]] == bonusBoundary {
|
if M == 1 {
|
||||||
p := int(F[0])
|
result := Result{maxScorePos, maxScorePos + 1, int(maxScore)}
|
||||||
result := Result{p, p + 1, scoreMatch + bonusBoundary*bonusFirstCharMultiplier}
|
|
||||||
if !withPos {
|
if !withPos {
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
pos := []int{p}
|
pos := []int{maxScorePos}
|
||||||
return result, &pos
|
return result, &pos
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3. Fill in score matrix (H)
|
// Phase 3. Fill in score matrix (H)
|
||||||
// Unlike the original algorithm, we do not allow omission.
|
// Unlike the original algorithm, we do not allow omission.
|
||||||
width := lastIdx - int(F[0]) + 1
|
f0 := int(F[0])
|
||||||
|
width := lastIdx - f0 + 1
|
||||||
offset16, H := alloc16(offset16, slab, width*M)
|
offset16, H := alloc16(offset16, slab, width*M)
|
||||||
|
copy(H, H0[f0:lastIdx+1])
|
||||||
|
|
||||||
// Possible length of consecutive chunk at each position.
|
// Possible length of consecutive chunk at each position.
|
||||||
offset16, C := alloc16(offset16, slab, width*M)
|
offset16, C := alloc16(offset16, slab, width*M)
|
||||||
|
copy(C, C0[f0:lastIdx+1])
|
||||||
|
|
||||||
maxScore, maxScorePos := int16(0), 0
|
for i := 1; i < M; i++ {
|
||||||
for i := 0; i < M; i++ {
|
|
||||||
I := i * width
|
I := i * width
|
||||||
|
f := int(F[i])
|
||||||
inGap := false
|
inGap := false
|
||||||
for j := int(F[i]); j <= lastIdx; j++ {
|
for j := f; j <= lastIdx; j++ {
|
||||||
j0 := j - int(F[0])
|
j0 := j - f0
|
||||||
var s1, s2, consecutive int16
|
var s1, s2, consecutive int16
|
||||||
|
|
||||||
if j > int(F[i]) {
|
if j > f {
|
||||||
if inGap {
|
if inGap {
|
||||||
s2 = H[I+j0-1] + scoreGapExtention
|
s2 = H[I+j0-1] + scoreGapExtention
|
||||||
} else {
|
} else {
|
||||||
@ -445,14 +470,8 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
|
|||||||
}
|
}
|
||||||
|
|
||||||
if pattern[i] == T[j] {
|
if pattern[i] == T[j] {
|
||||||
var diag int16
|
s1 = H[I-width+j0-1] + scoreMatch
|
||||||
if i > 0 && j0 > 0 {
|
|
||||||
diag = H[I-width+j0-1]
|
|
||||||
}
|
|
||||||
s1 = diag + scoreMatch
|
|
||||||
b := B[j]
|
b := B[j]
|
||||||
if i > 0 {
|
|
||||||
// j > 0 if i > 0
|
|
||||||
consecutive = C[I-width+j0-1] + 1
|
consecutive = C[I-width+j0-1] + 1
|
||||||
// Break consecutive chunk
|
// Break consecutive chunk
|
||||||
if b == bonusBoundary {
|
if b == bonusBoundary {
|
||||||
@ -460,10 +479,6 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
|
|||||||
} else if consecutive > 1 {
|
} else if consecutive > 1 {
|
||||||
b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
|
b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
consecutive = 1
|
|
||||||
b *= bonusFirstCharMultiplier
|
|
||||||
}
|
|
||||||
if s1+b < s2 {
|
if s1+b < s2 {
|
||||||
s1 += B[j]
|
s1 += B[j]
|
||||||
consecutive = 0
|
consecutive = 0
|
||||||
@ -488,14 +503,14 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
|
|||||||
|
|
||||||
// Phase 4. (Optional) Backtrace to find character positions
|
// Phase 4. (Optional) Backtrace to find character positions
|
||||||
pos := posArray(withPos, M)
|
pos := posArray(withPos, M)
|
||||||
j := int(F[0])
|
j := f0
|
||||||
if withPos {
|
if withPos {
|
||||||
i := M - 1
|
i := M - 1
|
||||||
j = maxScorePos
|
j = maxScorePos
|
||||||
preferMatch := true
|
preferMatch := true
|
||||||
for {
|
for {
|
||||||
I := i * width
|
I := i * width
|
||||||
j0 := j - int(F[0])
|
j0 := j - f0
|
||||||
s := H[I+j0]
|
s := H[I+j0]
|
||||||
|
|
||||||
var s1, s2 int16
|
var s1, s2 int16
|
||||||
|
Loading…
x
Reference in New Issue
Block a user