Normalize pattern string before passing it to Algo function
This commit is contained in:
parent
45793d75c2
commit
a16d8f66a9
@ -246,21 +246,9 @@ func normalizeRune(r rune) rune {
|
|||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeRunes(runes []rune) []rune {
|
// Algo functions make two assumptions
|
||||||
ret := make([]rune, len(runes))
|
// 1. "pattern" is given in lowercase if "caseSensitive" is false
|
||||||
copy(ret, runes)
|
// 2. "pattern" is already normalized if "normalize" is true
|
||||||
for idx, r := range runes {
|
|
||||||
if r < 0x00C0 || r > 0x2184 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
n := normalized[r]
|
|
||||||
if n > 0 {
|
|
||||||
ret[idx] = normalized[r]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
|
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
|
||||||
|
|
||||||
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
|
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
|
||||||
@ -283,10 +271,6 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
|
|||||||
return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
|
return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
|
||||||
}
|
}
|
||||||
|
|
||||||
if normalize {
|
|
||||||
pattern = normalizeRunes(pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
|
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
|
||||||
offset16 := 0
|
offset16 := 0
|
||||||
offset32 := 0
|
offset32 := 0
|
||||||
@ -539,10 +523,6 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Ch
|
|||||||
lenRunes := text.Length()
|
lenRunes := text.Length()
|
||||||
lenPattern := len(pattern)
|
lenPattern := len(pattern)
|
||||||
|
|
||||||
if normalize {
|
|
||||||
pattern = normalizeRunes(pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
for index := 0; index < lenRunes; index++ {
|
for index := 0; index < lenRunes; index++ {
|
||||||
char := text.Get(indexAt(index, lenRunes, forward))
|
char := text.Get(indexAt(index, lenRunes, forward))
|
||||||
// This is considerably faster than blindly applying strings.ToLower to the
|
// This is considerably faster than blindly applying strings.ToLower to the
|
||||||
@ -626,10 +606,6 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util
|
|||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if normalize {
|
|
||||||
pattern = normalizeRunes(pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
// For simplicity, only look at the bonus at the first character position
|
// For simplicity, only look at the bonus at the first character position
|
||||||
pidx := 0
|
pidx := 0
|
||||||
bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
|
bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
|
||||||
@ -693,10 +669,6 @@ func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
|
|||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if normalize {
|
|
||||||
pattern = normalizeRunes(pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
for index, r := range pattern {
|
for index, r := range pattern {
|
||||||
char := text.Get(index)
|
char := text.Get(index)
|
||||||
if !caseSensitive {
|
if !caseSensitive {
|
||||||
@ -726,10 +698,6 @@ func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
|
|||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if normalize {
|
|
||||||
pattern = normalizeRunes(pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
for index, r := range pattern {
|
for index, r := range pattern {
|
||||||
char := text.Get(index + diff)
|
char := text.Get(index + diff)
|
||||||
if !caseSensitive {
|
if !caseSensitive {
|
||||||
|
@ -406,3 +406,19 @@ var normalized map[rune]rune = map[rune]rune{
|
|||||||
0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL
|
0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL
|
||||||
0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL
|
0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NormalizeRunes normalizes latin script letters
|
||||||
|
func NormalizeRunes(runes []rune) []rune {
|
||||||
|
ret := make([]rune, len(runes))
|
||||||
|
copy(ret, runes)
|
||||||
|
for idx, r := range runes {
|
||||||
|
if r < 0x00C0 || r > 0x2184 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n := normalized[r]
|
||||||
|
if n > 0 {
|
||||||
|
ret[idx] = normalized[r]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
@ -95,7 +95,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
|
|||||||
termSets := []termSet{}
|
termSets := []termSet{}
|
||||||
|
|
||||||
if extended {
|
if extended {
|
||||||
termSets = parseTerms(fuzzy, caseMode, asString)
|
termSets = parseTerms(fuzzy, caseMode, normalize, asString)
|
||||||
Loop:
|
Loop:
|
||||||
for _, termSet := range termSets {
|
for _, termSet := range termSets {
|
||||||
for idx, term := range termSet {
|
for idx, term := range termSet {
|
||||||
@ -140,7 +140,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
|
|||||||
return ptr
|
return ptr
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
|
func parseTerms(fuzzy bool, caseMode Case, normalize bool, str string) []termSet {
|
||||||
tokens := _splitRegex.Split(str, -1)
|
tokens := _splitRegex.Split(str, -1)
|
||||||
sets := []termSet{}
|
sets := []termSet{}
|
||||||
set := termSet{}
|
set := termSet{}
|
||||||
@ -196,10 +196,14 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
|
|||||||
sets = append(sets, set)
|
sets = append(sets, set)
|
||||||
set = termSet{}
|
set = termSet{}
|
||||||
}
|
}
|
||||||
|
textRunes := []rune(text)
|
||||||
|
if normalize {
|
||||||
|
textRunes = algo.NormalizeRunes(textRunes)
|
||||||
|
}
|
||||||
set = append(set, term{
|
set = append(set, term{
|
||||||
typ: typ,
|
typ: typ,
|
||||||
inv: inv,
|
inv: inv,
|
||||||
text: []rune(text),
|
text: textRunes,
|
||||||
caseSensitive: caseSensitive,
|
caseSensitive: caseSensitive,
|
||||||
origText: origText})
|
origText: origText})
|
||||||
switchSet = true
|
switchSet = true
|
||||||
|
@ -15,7 +15,7 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseTermsExtended(t *testing.T) {
|
func TestParseTermsExtended(t *testing.T) {
|
||||||
terms := parseTerms(true, CaseSmart,
|
terms := parseTerms(true, CaseSmart, false,
|
||||||
"| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |")
|
"| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |")
|
||||||
if len(terms) != 9 ||
|
if len(terms) != 9 ||
|
||||||
terms[0][0].typ != termFuzzy || terms[0][0].inv ||
|
terms[0][0].typ != termFuzzy || terms[0][0].inv ||
|
||||||
@ -50,7 +50,7 @@ func TestParseTermsExtended(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseTermsExtendedExact(t *testing.T) {
|
func TestParseTermsExtendedExact(t *testing.T) {
|
||||||
terms := parseTerms(false, CaseSmart,
|
terms := parseTerms(false, CaseSmart, false,
|
||||||
"aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$")
|
"aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$")
|
||||||
if len(terms) != 8 ||
|
if len(terms) != 8 ||
|
||||||
terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 ||
|
terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 ||
|
||||||
@ -66,7 +66,7 @@ func TestParseTermsExtendedExact(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseTermsEmpty(t *testing.T) {
|
func TestParseTermsEmpty(t *testing.T) {
|
||||||
terms := parseTerms(true, CaseSmart, "' $ ^ !' !^ !$")
|
terms := parseTerms(true, CaseSmart, false, "' $ ^ !' !^ !$")
|
||||||
if len(terms) != 0 {
|
if len(terms) != 0 {
|
||||||
t.Errorf("%s", terms)
|
t.Errorf("%s", terms)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user