Normalize pattern string before passing it to Algo function

This commit is contained in:
Junegunn Choi 2017-01-09 09:52:17 +09:00
parent 45793d75c2
commit a16d8f66a9
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
4 changed files with 29 additions and 41 deletions

View File

@ -246,21 +246,9 @@ func normalizeRune(r rune) rune {
return r return r
} }
func normalizeRunes(runes []rune) []rune { // Algo functions make two assumptions
ret := make([]rune, len(runes)) // 1. "pattern" is given in lowercase if "caseSensitive" is false
copy(ret, runes) // 2. "pattern" is already normalized if "normalize" is true
for idx, r := range runes {
if r < 0x00C0 || r > 0x2184 {
continue
}
n := normalized[r]
if n > 0 {
ret[idx] = normalized[r]
}
}
return ret
}
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
@ -283,10 +271,6 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab) return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
} }
if normalize {
pattern = normalizeRunes(pattern)
}
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
offset16 := 0 offset16 := 0
offset32 := 0 offset32 := 0
@ -539,10 +523,6 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Ch
lenRunes := text.Length() lenRunes := text.Length()
lenPattern := len(pattern) lenPattern := len(pattern)
if normalize {
pattern = normalizeRunes(pattern)
}
for index := 0; index < lenRunes; index++ { for index := 0; index < lenRunes; index++ {
char := text.Get(indexAt(index, lenRunes, forward)) char := text.Get(indexAt(index, lenRunes, forward))
// This is considerably faster than blindly applying strings.ToLower to the // This is considerably faster than blindly applying strings.ToLower to the
@ -626,10 +606,6 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util
return Result{-1, -1, 0}, nil return Result{-1, -1, 0}, nil
} }
if normalize {
pattern = normalizeRunes(pattern)
}
// For simplicity, only look at the bonus at the first character position // For simplicity, only look at the bonus at the first character position
pidx := 0 pidx := 0
bestPos, bonus, bestBonus := -1, int16(0), int16(-1) bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
@ -693,10 +669,6 @@ func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
return Result{-1, -1, 0}, nil return Result{-1, -1, 0}, nil
} }
if normalize {
pattern = normalizeRunes(pattern)
}
for index, r := range pattern { for index, r := range pattern {
char := text.Get(index) char := text.Get(index)
if !caseSensitive { if !caseSensitive {
@ -726,10 +698,6 @@ func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
return Result{-1, -1, 0}, nil return Result{-1, -1, 0}, nil
} }
if normalize {
pattern = normalizeRunes(pattern)
}
for index, r := range pattern { for index, r := range pattern {
char := text.Get(index + diff) char := text.Get(index + diff)
if !caseSensitive { if !caseSensitive {

View File

@ -406,3 +406,19 @@ var normalized map[rune]rune = map[rune]rune{
0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL 0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL
0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL 0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL
} }
// NormalizeRunes normalizes latin script letters
func NormalizeRunes(runes []rune) []rune {
ret := make([]rune, len(runes))
copy(ret, runes)
for idx, r := range runes {
if r < 0x00C0 || r > 0x2184 {
continue
}
n := normalized[r]
if n > 0 {
ret[idx] = normalized[r]
}
}
return ret
}

View File

@ -95,7 +95,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
termSets := []termSet{} termSets := []termSet{}
if extended { if extended {
termSets = parseTerms(fuzzy, caseMode, asString) termSets = parseTerms(fuzzy, caseMode, normalize, asString)
Loop: Loop:
for _, termSet := range termSets { for _, termSet := range termSets {
for idx, term := range termSet { for idx, term := range termSet {
@ -140,7 +140,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
return ptr return ptr
} }
func parseTerms(fuzzy bool, caseMode Case, str string) []termSet { func parseTerms(fuzzy bool, caseMode Case, normalize bool, str string) []termSet {
tokens := _splitRegex.Split(str, -1) tokens := _splitRegex.Split(str, -1)
sets := []termSet{} sets := []termSet{}
set := termSet{} set := termSet{}
@ -196,10 +196,14 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
sets = append(sets, set) sets = append(sets, set)
set = termSet{} set = termSet{}
} }
textRunes := []rune(text)
if normalize {
textRunes = algo.NormalizeRunes(textRunes)
}
set = append(set, term{ set = append(set, term{
typ: typ, typ: typ,
inv: inv, inv: inv,
text: []rune(text), text: textRunes,
caseSensitive: caseSensitive, caseSensitive: caseSensitive,
origText: origText}) origText: origText})
switchSet = true switchSet = true

View File

@ -15,7 +15,7 @@ func init() {
} }
func TestParseTermsExtended(t *testing.T) { func TestParseTermsExtended(t *testing.T) {
terms := parseTerms(true, CaseSmart, terms := parseTerms(true, CaseSmart, false,
"| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |") "| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |")
if len(terms) != 9 || if len(terms) != 9 ||
terms[0][0].typ != termFuzzy || terms[0][0].inv || terms[0][0].typ != termFuzzy || terms[0][0].inv ||
@ -50,7 +50,7 @@ func TestParseTermsExtended(t *testing.T) {
} }
func TestParseTermsExtendedExact(t *testing.T) { func TestParseTermsExtendedExact(t *testing.T) {
terms := parseTerms(false, CaseSmart, terms := parseTerms(false, CaseSmart, false,
"aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$") "aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$")
if len(terms) != 8 || if len(terms) != 8 ||
terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 || terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 ||
@ -66,7 +66,7 @@ func TestParseTermsExtendedExact(t *testing.T) {
} }
func TestParseTermsEmpty(t *testing.T) { func TestParseTermsEmpty(t *testing.T) {
terms := parseTerms(true, CaseSmart, "' $ ^ !' !^ !$") terms := parseTerms(true, CaseSmart, false, "' $ ^ !' !^ !$")
if len(terms) != 0 { if len(terms) != 0 {
t.Errorf("%s", terms) t.Errorf("%s", terms)
} }