Normalize pattern string before passing it to Algo function

2017-01-09 09:52:17 +09:00 · 2017-01-09 09:52:17 +09:00 · a16d8f66a9
commit a16d8f66a9
parent 45793d75c2
4 changed files with 29 additions and 41 deletions
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@ -246,21 +246,9 @@ func normalizeRune(r rune) rune {
 	return r
 }
-func normalizeRunes(runes []rune) []rune {
+// Algo functions make two assumptions
-	ret := make([]rune, len(runes))
+// 1. "pattern" is given in lowercase if "caseSensitive" is false
-	copy(ret, runes)
+// 2. "pattern" is already normalized if "normalize" is true
 	for idx, r := range runes {
 		if r < 0x00C0 || r > 0x2184 {
 			continue
 		}
 		n := normalized[r]
 		if n > 0 {
 			ret[idx] = normalized[r]
 		}
 	}
 	return ret
 }
 type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
 func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
@ -283,10 +271,6 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
 		return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
 	}
 	if normalize {
 		pattern = normalizeRunes(pattern)
 	}
 	// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
 	offset16 := 0
 	offset32 := 0
@ -539,10 +523,6 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Ch
 	lenRunes := text.Length()
 	lenPattern := len(pattern)
 	if normalize {
 		pattern = normalizeRunes(pattern)
 	}
 	for index := 0; index < lenRunes; index++ {
 		char := text.Get(indexAt(index, lenRunes, forward))
 		// This is considerably faster than blindly applying strings.ToLower to the
@ -626,10 +606,6 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util
 		return Result{-1, -1, 0}, nil
 	}
 	if normalize {
 		pattern = normalizeRunes(pattern)
 	}
 	// For simplicity, only look at the bonus at the first character position
 	pidx := 0
 	bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
@ -693,10 +669,6 @@ func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
 		return Result{-1, -1, 0}, nil
 	}
 	if normalize {
 		pattern = normalizeRunes(pattern)
 	}
 	for index, r := range pattern {
 		char := text.Get(index)
 		if !caseSensitive {
@ -726,10 +698,6 @@ func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
 		return Result{-1, -1, 0}, nil
 	}
 	if normalize {
 		pattern = normalizeRunes(pattern)
 	}
 	for index, r := range pattern {
 		char := text.Get(index + diff)
 		if !caseSensitive {
--- a/src/algo/normalize.go
+++ b/src/algo/normalize.go
@ -406,3 +406,19 @@ var normalized map[rune]rune = map[rune]rune{
 	0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL
 	0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL
 }
 // NormalizeRunes normalizes latin script letters
 func NormalizeRunes(runes []rune) []rune {
 	ret := make([]rune, len(runes))
 	copy(ret, runes)
 	for idx, r := range runes {
 		if r < 0x00C0 || r > 0x2184 {
 			continue
 		}
 		n := normalized[r]
 		if n > 0 {
 			ret[idx] = normalized[r]
 		}
 	}
 	return ret
 }
--- a/src/pattern.go
+++ b/src/pattern.go
@ -95,7 +95,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
 	termSets := []termSet{}
 	if extended {
-		termSets = parseTerms(fuzzy, caseMode, asString)
+		termSets = parseTerms(fuzzy, caseMode, normalize, asString)
 	Loop:
 		for _, termSet := range termSets {
 			for idx, term := range termSet {
@ -140,7 +140,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
 	return ptr
 }
-func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
+func parseTerms(fuzzy bool, caseMode Case, normalize bool, str string) []termSet {
 	tokens := _splitRegex.Split(str, -1)
 	sets := []termSet{}
 	set := termSet{}
@ -196,10 +196,14 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
 				sets = append(sets, set)
 				set = termSet{}
 			}
 			textRunes := []rune(text)
 			if normalize {
 				textRunes = algo.NormalizeRunes(textRunes)
 			}
 			set = append(set, term{
 				typ:           typ,
 				inv:           inv,
-				text:          []rune(text),
+				text:          textRunes,
 				caseSensitive: caseSensitive,
 				origText:      origText})
 			switchSet = true
--- a/src/pattern_test.go
+++ b/src/pattern_test.go
@ -15,7 +15,7 @@ func init() {
 }
 func TestParseTermsExtended(t *testing.T) {
-	terms := parseTerms(true, CaseSmart,
+	terms := parseTerms(true, CaseSmart, false,
 		"| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |")
 	if len(terms) != 9 ||
 		terms[0][0].typ != termFuzzy || terms[0][0].inv ||
@ -50,7 +50,7 @@ func TestParseTermsExtended(t *testing.T) {
 }
 func TestParseTermsExtendedExact(t *testing.T) {
-	terms := parseTerms(false, CaseSmart,
+	terms := parseTerms(false, CaseSmart, false,
 		"aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$")
 	if len(terms) != 8 ||
 		terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 ||
@ -66,7 +66,7 @@ func TestParseTermsExtendedExact(t *testing.T) {
 }
 func TestParseTermsEmpty(t *testing.T) {
-	terms := parseTerms(true, CaseSmart, "' $ ^ !' !^ !$")
+	terms := parseTerms(true, CaseSmart, false, "' $ ^ !' !^ !$")
 	if len(terms) != 0 {
 		t.Errorf("%s", terms)
 	}