diff --git a/src/algo/algo.go b/src/algo/algo.go index 2a3bc9d..ac6c66d 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -246,21 +246,9 @@ func normalizeRune(r rune) rune { return r } -func normalizeRunes(runes []rune) []rune { - ret := make([]rune, len(runes)) - copy(ret, runes) - for idx, r := range runes { - if r < 0x00C0 || r > 0x2184 { - continue - } - n := normalized[r] - if n > 0 { - ret[idx] = normalized[r] - } - } - return ret -} - +// Algo functions make two assumptions +// 1. "pattern" is given in lowercase if "caseSensitive" is false +// 2. "pattern" is already normalized if "normalize" is true type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { @@ -283,10 +271,6 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab) } - if normalize { - pattern = normalizeRunes(pattern) - } - // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages offset16 := 0 offset32 := 0 @@ -539,10 +523,6 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Ch lenRunes := text.Length() lenPattern := len(pattern) - if normalize { - pattern = normalizeRunes(pattern) - } - for index := 0; index < lenRunes; index++ { char := text.Get(indexAt(index, lenRunes, forward)) // This is considerably faster than blindly applying strings.ToLower to the @@ -626,10 +606,6 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util return Result{-1, -1, 0}, nil } - if normalize { - pattern = normalizeRunes(pattern) - } - // For simplicity, only look at the bonus at the first character position pidx := 0 bestPos, bonus, bestBonus := -1, int16(0), int16(-1) @@ -693,10 +669,6 @@ func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha return Result{-1, -1, 0}, nil } - if normalize { - pattern = normalizeRunes(pattern) - } - for index, r := range pattern { char := text.Get(index) if !caseSensitive { @@ -726,10 +698,6 @@ func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha return Result{-1, -1, 0}, nil } - if normalize { - pattern = normalizeRunes(pattern) - } - for index, r := range pattern { char := text.Get(index + diff) if !caseSensitive { diff --git a/src/algo/normalize.go b/src/algo/normalize.go index 1168a64..7a49644 100644 --- a/src/algo/normalize.go +++ b/src/algo/normalize.go @@ -406,3 +406,19 @@ var normalized map[rune]rune = map[rune]rune{ 0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL 0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL } + +// NormalizeRunes normalizes latin script letters +func NormalizeRunes(runes []rune) []rune { + ret := make([]rune, len(runes)) + copy(ret, runes) + for idx, r := range runes { + if r < 0x00C0 || r > 0x2184 { + continue + } + n := normalized[r] + if n > 0 { + ret[idx] = normalized[r] + } + } + return ret +} diff --git a/src/pattern.go b/src/pattern.go index 8f1d9bc..731104f 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -95,7 +95,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case, termSets := []termSet{} if extended { - termSets = parseTerms(fuzzy, caseMode, asString) + termSets = parseTerms(fuzzy, caseMode, normalize, asString) Loop: for _, termSet := range termSets { for idx, term := range termSet { @@ -140,7 +140,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case, return ptr } -func parseTerms(fuzzy bool, caseMode Case, str string) []termSet { +func parseTerms(fuzzy bool, caseMode Case, normalize bool, str string) []termSet { tokens := _splitRegex.Split(str, -1) sets := []termSet{} set := termSet{} @@ -196,10 +196,14 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []termSet { sets = append(sets, set) set = termSet{} } + textRunes := []rune(text) + if normalize { + textRunes = algo.NormalizeRunes(textRunes) + } set = append(set, term{ typ: typ, inv: inv, - text: []rune(text), + text: textRunes, caseSensitive: caseSensitive, origText: origText}) switchSet = true diff --git a/src/pattern_test.go b/src/pattern_test.go index 66c0041..ea0082f 100644 --- a/src/pattern_test.go +++ b/src/pattern_test.go @@ -15,7 +15,7 @@ func init() { } func TestParseTermsExtended(t *testing.T) { - terms := parseTerms(true, CaseSmart, + terms := parseTerms(true, CaseSmart, false, "| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |") if len(terms) != 9 || terms[0][0].typ != termFuzzy || terms[0][0].inv || @@ -50,7 +50,7 @@ func TestParseTermsExtended(t *testing.T) { } func TestParseTermsExtendedExact(t *testing.T) { - terms := parseTerms(false, CaseSmart, + terms := parseTerms(false, CaseSmart, false, "aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$") if len(terms) != 8 || terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 || @@ -66,7 +66,7 @@ func TestParseTermsExtendedExact(t *testing.T) { } func TestParseTermsEmpty(t *testing.T) { - terms := parseTerms(true, CaseSmart, "' $ ^ !' !^ !$") + terms := parseTerms(true, CaseSmart, false, "' $ ^ !' !^ !$") if len(terms) != 0 { t.Errorf("%s", terms) }