[perf] Avoid allocating rune array for ascii string

In the best case (all ascii), this reduces the memory footprint by 60%
and the response time by 15% to 20%. In the worst case (every line has
non-ascii characters), 3 to 4% overhead is observed.
This commit is contained in:
Junegunn Choi 2016-08-14 00:39:44 +09:00
parent 822b86942c
commit 1d4057c209
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
16 changed files with 303 additions and 158 deletions

View File

@ -15,11 +15,18 @@ import (
* In short: They try to do as little work as possible. * In short: They try to do as little work as possible.
*/ */
func runeAt(runes []rune, index int, max int, forward bool) rune { func indexAt(index int, max int, forward bool) int {
if forward { if forward {
return runes[index] return index
} }
return runes[max-index-1] return max - index - 1
}
func runeAt(text util.Chars, index int, max int, forward bool) rune {
if forward {
return text.Get(index)
}
return text.Get(max - index - 1)
} }
// Result conatins the results of running a match function. // Result conatins the results of running a match function.
@ -42,14 +49,14 @@ const (
charNumber charNumber
) )
func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, eidx int) int32 { func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int32 {
var bonus int32 var bonus int32
pidx := 0 pidx := 0
lenPattern := len(pattern) lenPattern := len(pattern)
consecutive := false consecutive := false
prevClass := charNonWord prevClass := charNonWord
for index := 0; index < eidx; index++ { for index := 0; index < eidx; index++ {
char := runes[index] char := text.Get(index)
var class charClass var class charClass
if unicode.IsLower(char) { if unicode.IsLower(char) {
class = charLower class = charLower
@ -107,7 +114,7 @@ func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, e
} }
// FuzzyMatch performs fuzzy-match // FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(pattern) == 0 { if len(pattern) == 0 {
return Result{0, 0, 0} return Result{0, 0, 0}
} }
@ -125,11 +132,11 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
sidx := -1 sidx := -1
eidx := -1 eidx := -1
lenRunes := len(runes) lenRunes := text.Length()
lenPattern := len(pattern) lenPattern := len(pattern)
for index := range runes { for index := 0; index < lenRunes; index++ {
char := runeAt(runes, index, lenRunes, forward) char := runeAt(text, index, lenRunes, forward)
// This is considerably faster than blindly applying strings.ToLower to the // This is considerably faster than blindly applying strings.ToLower to the
// whole string // whole string
if !caseSensitive { if !caseSensitive {
@ -142,7 +149,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
char = unicode.To(unicode.LowerCase, char) char = unicode.To(unicode.LowerCase, char)
} }
} }
pchar := runeAt(pattern, pidx, lenPattern, forward) pchar := pattern[indexAt(pidx, lenPattern, forward)]
if char == pchar { if char == pchar {
if sidx < 0 { if sidx < 0 {
sidx = index sidx = index
@ -157,7 +164,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
if sidx >= 0 && eidx >= 0 { if sidx >= 0 && eidx >= 0 {
pidx-- pidx--
for index := eidx - 1; index >= sidx; index-- { for index := eidx - 1; index >= sidx; index-- {
char := runeAt(runes, index, lenRunes, forward) char := runeAt(text, index, lenRunes, forward)
if !caseSensitive { if !caseSensitive {
if char >= 'A' && char <= 'Z' { if char >= 'A' && char <= 'Z' {
char += 32 char += 32
@ -166,7 +173,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
} }
} }
pchar := runeAt(pattern, pidx, lenPattern, forward) pchar := pattern[indexAt(pidx, lenPattern, forward)]
if char == pchar { if char == pchar {
if pidx--; pidx < 0 { if pidx--; pidx < 0 {
sidx = index sidx = index
@ -182,7 +189,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
} }
return Result{int32(sidx), int32(eidx), return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
} }
return Result{-1, -1, 0} return Result{-1, -1, 0}
} }
@ -194,12 +201,12 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
// //
// We might try to implement better algorithms in the future: // We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm // http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(pattern) == 0 { if len(pattern) == 0 {
return Result{0, 0, 0} return Result{0, 0, 0}
} }
lenRunes := len(runes) lenRunes := text.Length()
lenPattern := len(pattern) lenPattern := len(pattern)
if lenRunes < lenPattern { if lenRunes < lenPattern {
@ -208,7 +215,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
pidx := 0 pidx := 0
for index := 0; index < lenRunes; index++ { for index := 0; index < lenRunes; index++ {
char := runeAt(runes, index, lenRunes, forward) char := runeAt(text, index, lenRunes, forward)
if !caseSensitive { if !caseSensitive {
if char >= 'A' && char <= 'Z' { if char >= 'A' && char <= 'Z' {
char += 32 char += 32
@ -216,7 +223,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
char = unicode.To(unicode.LowerCase, char) char = unicode.To(unicode.LowerCase, char)
} }
} }
pchar := runeAt(pattern, pidx, lenPattern, forward) pchar := pattern[indexAt(pidx, lenPattern, forward)]
if pchar == char { if pchar == char {
pidx++ pidx++
if pidx == lenPattern { if pidx == lenPattern {
@ -229,7 +236,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
eidx = lenRunes - (index - lenPattern + 1) eidx = lenRunes - (index - lenPattern + 1)
} }
return Result{int32(sidx), int32(eidx), return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
} }
} else { } else {
index -= pidx index -= pidx
@ -240,13 +247,13 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
} }
// PrefixMatch performs prefix-match // PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(runes) < len(pattern) { if text.Length() < len(pattern) {
return Result{-1, -1, 0} return Result{-1, -1, 0}
} }
for index, r := range pattern { for index, r := range pattern {
char := runes[index] char := text.Get(index)
if !caseSensitive { if !caseSensitive {
char = unicode.ToLower(char) char = unicode.ToLower(char)
} }
@ -256,20 +263,19 @@ func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
} }
lenPattern := len(pattern) lenPattern := len(pattern)
return Result{0, int32(lenPattern), return Result{0, int32(lenPattern),
evaluateBonus(caseSensitive, runes, pattern, 0, lenPattern)} evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)}
} }
// SuffixMatch performs suffix-match // SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) Result { func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
runes := util.TrimRight(input) trimmedLen := text.Length() - text.TrailingWhitespaces()
trimmedLen := len(runes)
diff := trimmedLen - len(pattern) diff := trimmedLen - len(pattern)
if diff < 0 { if diff < 0 {
return Result{-1, -1, 0} return Result{-1, -1, 0}
} }
for index, r := range pattern { for index, r := range pattern {
char := runes[index+diff] char := text.Get(index + diff)
if !caseSensitive { if !caseSensitive {
char = unicode.ToLower(char) char = unicode.ToLower(char)
} }
@ -281,16 +287,16 @@ func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune)
sidx := trimmedLen - lenPattern sidx := trimmedLen - lenPattern
eidx := trimmedLen eidx := trimmedLen
return Result{int32(sidx), int32(eidx), return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
} }
// EqualMatch performs equal-match // EqualMatch performs equal-match
func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
// Note: EqualMatch always return a zero bonus. // Note: EqualMatch always return a zero bonus.
if len(runes) != len(pattern) { if text.Length() != len(pattern) {
return Result{-1, -1, 0} return Result{-1, -1, 0}
} }
runesStr := string(runes) runesStr := text.ToString()
if !caseSensitive { if !caseSensitive {
runesStr = strings.ToLower(runesStr) runesStr = strings.ToLower(runesStr)
} }

View File

@ -3,13 +3,15 @@ package algo
import ( import (
"strings" "strings"
"testing" "testing"
"github.com/junegunn/fzf/src/util"
) )
func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) { func assertMatch(t *testing.T, fun func(bool, bool, util.Chars, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) {
if !caseSensitive { if !caseSensitive {
pattern = strings.ToLower(pattern) pattern = strings.ToLower(pattern)
} }
res := fun(caseSensitive, forward, []rune(input), []rune(pattern)) res := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern))
if res.Start != sidx { if res.Start != sidx {
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern) t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern)
} }

View File

@ -3,6 +3,8 @@ package fzf
import ( import (
"fmt" "fmt"
"testing" "testing"
"github.com/junegunn/fzf/src/util"
) )
func TestChunkList(t *testing.T) { func TestChunkList(t *testing.T) {
@ -10,7 +12,7 @@ func TestChunkList(t *testing.T) {
sortCriteria = []criterion{byMatchLen, byLength} sortCriteria = []criterion{byMatchLen, byLength}
cl := NewChunkList(func(s []byte, i int) *Item { cl := NewChunkList(func(s []byte, i int) *Item {
return &Item{text: []rune(string(s)), rank: buildEmptyRank(int32(i * 2))} return &Item{text: util.ToChars(s), rank: buildEmptyRank(int32(i * 2))}
}) })
// Snapshot // Snapshot
@ -42,8 +44,8 @@ func TestChunkList(t *testing.T) {
last := func(arr [5]int32) int32 { last := func(arr [5]int32) int32 {
return arr[len(arr)-1] return arr[len(arr)-1]
} }
if string((*chunk1)[0].text) != "hello" || last((*chunk1)[0].rank) != 0 || if (*chunk1)[0].text.ToString() != "hello" || last((*chunk1)[0].rank) != 0 ||
string((*chunk1)[1].text) != "world" || last((*chunk1)[1].rank) != 2 { (*chunk1)[1].text.ToString() != "world" || last((*chunk1)[1].rank) != 2 {
t.Error("Invalid data") t.Error("Invalid data")
} }
if chunk1.IsFull() { if chunk1.IsFull() {

View File

@ -63,29 +63,29 @@ func Run(opts *Options) {
eventBox := util.NewEventBox() eventBox := util.NewEventBox()
// ANSI code processor // ANSI code processor
ansiProcessor := func(data []byte) ([]rune, []ansiOffset) { ansiProcessor := func(data []byte) (util.Chars, []ansiOffset) {
return util.BytesToRunes(data), nil return util.ToChars(data), nil
} }
ansiProcessorRunes := func(data []rune) ([]rune, []ansiOffset) { ansiProcessorRunes := func(data []rune) (util.Chars, []ansiOffset) {
return data, nil return util.RunesToChars(data), nil
} }
if opts.Ansi { if opts.Ansi {
if opts.Theme != nil { if opts.Theme != nil {
var state *ansiState var state *ansiState
ansiProcessor = func(data []byte) ([]rune, []ansiOffset) { ansiProcessor = func(data []byte) (util.Chars, []ansiOffset) {
trimmed, offsets, newState := extractColor(string(data), state, nil) trimmed, offsets, newState := extractColor(string(data), state, nil)
state = newState state = newState
return []rune(trimmed), offsets return util.RunesToChars([]rune(trimmed)), offsets
} }
} else { } else {
// When color is disabled but ansi option is given, // When color is disabled but ansi option is given,
// we simply strip out ANSI codes from the input // we simply strip out ANSI codes from the input
ansiProcessor = func(data []byte) ([]rune, []ansiOffset) { ansiProcessor = func(data []byte) (util.Chars, []ansiOffset) {
trimmed, _, _ := extractColor(string(data), nil, nil) trimmed, _, _ := extractColor(string(data), nil, nil)
return []rune(trimmed), nil return util.RunesToChars([]rune(trimmed)), nil
} }
} }
ansiProcessorRunes = func(data []rune) ([]rune, []ansiOffset) { ansiProcessorRunes = func(data []rune) (util.Chars, []ansiOffset) {
return ansiProcessor([]byte(string(data))) return ansiProcessor([]byte(string(data)))
} }
} }
@ -100,29 +100,30 @@ func Run(opts *Options) {
eventBox.Set(EvtHeader, header) eventBox.Set(EvtHeader, header)
return nil return nil
} }
runes, colors := ansiProcessor(data) chars, colors := ansiProcessor(data)
return &Item{ return &Item{
text: runes, text: chars,
colors: colors, colors: colors,
rank: buildEmptyRank(int32(index))} rank: buildEmptyRank(int32(index))}
}) })
} else { } else {
chunkList = NewChunkList(func(data []byte, index int) *Item { chunkList = NewChunkList(func(data []byte, index int) *Item {
runes := util.BytesToRunes(data) chars := util.ToChars(data)
tokens := Tokenize(runes, opts.Delimiter) tokens := Tokenize(chars, opts.Delimiter)
trans := Transform(tokens, opts.WithNth) trans := Transform(tokens, opts.WithNth)
if len(header) < opts.HeaderLines { if len(header) < opts.HeaderLines {
header = append(header, string(joinTokens(trans))) header = append(header, string(joinTokens(trans)))
eventBox.Set(EvtHeader, header) eventBox.Set(EvtHeader, header)
return nil return nil
} }
textRunes := joinTokens(trans)
item := Item{ item := Item{
text: joinTokens(trans), text: util.RunesToChars(textRunes),
origText: &runes, origText: &data,
colors: nil, colors: nil,
rank: buildEmptyRank(int32(index))} rank: buildEmptyRank(int32(index))}
trimmed, colors := ansiProcessorRunes(item.text) trimmed, colors := ansiProcessorRunes(textRunes)
item.text = trimmed item.text = trimmed
item.colors = colors item.colors = colors
return &item return &item
@ -170,7 +171,7 @@ func Run(opts *Options) {
func(runes []byte) bool { func(runes []byte) bool {
item := chunkList.trans(runes, 0) item := chunkList.trans(runes, 0)
if item != nil && pattern.MatchItem(item) { if item != nil && pattern.MatchItem(item) {
fmt.Println(string(item.text)) fmt.Println(item.text.ToString())
found = true found = true
} }
return false return false

View File

@ -4,6 +4,7 @@ import (
"math" "math"
"github.com/junegunn/fzf/src/curses" "github.com/junegunn/fzf/src/curses"
"github.com/junegunn/fzf/src/util"
) )
// Offset holds three 32-bit integers denoting the offsets of a matched substring // Offset holds three 32-bit integers denoting the offsets of a matched substring
@ -17,8 +18,8 @@ type colorOffset struct {
// Item represents each input line // Item represents each input line
type Item struct { type Item struct {
text []rune text util.Chars
origText *[]rune origText *[]byte
transformed []Token transformed []Token
offsets []Offset offsets []Offset
colors []ansiOffset colors []ansiOffset
@ -91,12 +92,14 @@ func (item *Item) Rank(cache bool) [5]int32 {
// If offsets is empty, lenSum will be 0, but we don't care // If offsets is empty, lenSum will be 0, but we don't care
val = int32(lenSum) val = int32(lenSum)
} else { } else {
val = int32(len(item.text)) val = int32(item.text.Length())
} }
case byBegin: case byBegin:
// We can't just look at item.offsets[0][0] because it can be an inverse term // We can't just look at item.offsets[0][0] because it can be an inverse term
whitePrefixLen := 0 whitePrefixLen := 0
for idx, r := range item.text { numChars := item.text.Length()
for idx := 0; idx < numChars; idx++ {
r := item.text.Get(idx)
whitePrefixLen = idx whitePrefixLen = idx
if idx == minBegin || r != ' ' && r != '\t' { if idx == minBegin || r != ' ' && r != '\t' {
break break
@ -105,7 +108,7 @@ func (item *Item) Rank(cache bool) [5]int32 {
val = int32(minBegin - whitePrefixLen) val = int32(minBegin - whitePrefixLen)
case byEnd: case byEnd:
if prevEnd > 0 { if prevEnd > 0 {
val = int32(1 + len(item.text) - prevEnd) val = int32(1 + item.text.Length() - prevEnd)
} else { } else {
// Empty offsets due to inverse terms. // Empty offsets due to inverse terms.
val = 1 val = 1
@ -134,7 +137,7 @@ func (item *Item) StringPtr(stripAnsi bool) *string {
orig := string(*item.origText) orig := string(*item.origText)
return &orig return &orig
} }
str := string(item.text) str := item.text.ToString()
return &str return &str
} }

View File

@ -6,6 +6,7 @@ import (
"testing" "testing"
"github.com/junegunn/fzf/src/curses" "github.com/junegunn/fzf/src/curses"
"github.com/junegunn/fzf/src/util"
) )
func TestOffsetSort(t *testing.T) { func TestOffsetSort(t *testing.T) {
@ -44,13 +45,13 @@ func TestItemRank(t *testing.T) {
sortCriteria = []criterion{byMatchLen, byLength} sortCriteria = []criterion{byMatchLen, byLength}
strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")} strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
item1 := Item{text: strs[0], offsets: []Offset{}, rank: [5]int32{0, 0, 0, 0, 1}} item1 := Item{text: util.RunesToChars(strs[0]), offsets: []Offset{}, rank: [5]int32{0, 0, 0, 0, 1}}
rank1 := item1.Rank(true) rank1 := item1.Rank(true)
if rank1[0] != math.MaxInt32 || rank1[1] != 3 || rank1[4] != 1 { if rank1[0] != math.MaxInt32 || rank1[1] != 3 || rank1[4] != 1 {
t.Error(item1.Rank(true)) t.Error(item1.Rank(true))
} }
// Only differ in index // Only differ in index
item2 := Item{text: strs[0], offsets: []Offset{}} item2 := Item{text: util.RunesToChars(strs[0]), offsets: []Offset{}}
items := []*Item{&item1, &item2} items := []*Item{&item1, &item2}
sort.Sort(ByRelevance(items)) sort.Sort(ByRelevance(items))
@ -66,10 +67,10 @@ func TestItemRank(t *testing.T) {
} }
// Sort by relevance // Sort by relevance
item3 := Item{text: strs[1], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}} item3 := Item{text: util.RunesToChars(strs[1]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item4 := Item{text: strs[1], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}} item4 := Item{text: util.RunesToChars(strs[1]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item5 := Item{text: strs[2], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}} item5 := Item{text: util.RunesToChars(strs[2]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item6 := Item{text: strs[2], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}} item6 := Item{text: util.RunesToChars(strs[2]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
items = []*Item{&item1, &item2, &item3, &item4, &item5, &item6} items = []*Item{&item1, &item2, &item3, &item4, &item5, &item6}
sort.Sort(ByRelevance(items)) sort.Sort(ByRelevance(items))
if items[0] != &item6 || items[1] != &item4 || if items[0] != &item6 || items[1] != &item4 ||

View File

@ -5,6 +5,8 @@ import (
"math/rand" "math/rand"
"sort" "sort"
"testing" "testing"
"github.com/junegunn/fzf/src/util"
) )
func assert(t *testing.T, cond bool, msg ...string) { func assert(t *testing.T, cond bool, msg ...string) {
@ -22,7 +24,7 @@ func randItem() *Item {
offsets[idx] = Offset{sidx, eidx} offsets[idx] = Offset{sidx, eidx}
} }
return &Item{ return &Item{
text: []rune(str), text: util.RunesToChars([]rune(str)),
rank: buildEmptyRank(rand.Int31()), rank: buildEmptyRank(rand.Int31()),
offsets: offsets} offsets: offsets}
} }

View File

@ -5,6 +5,7 @@ import (
"testing" "testing"
"github.com/junegunn/fzf/src/curses" "github.com/junegunn/fzf/src/curses"
"github.com/junegunn/fzf/src/util"
) )
func TestDelimiterRegex(t *testing.T) { func TestDelimiterRegex(t *testing.T) {
@ -42,24 +43,24 @@ func TestDelimiterRegex(t *testing.T) {
func TestDelimiterRegexString(t *testing.T) { func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*") delim := delimiterRegexp("*")
tokens := Tokenize([]rune("-*--*---**---"), delim) tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
if delim.regex != nil || if delim.regex != nil ||
string(tokens[0].text) != "-*" || tokens[0].text.ToString() != "-*" ||
string(tokens[1].text) != "--*" || tokens[1].text.ToString() != "--*" ||
string(tokens[2].text) != "---*" || tokens[2].text.ToString() != "---*" ||
string(tokens[3].text) != "*" || tokens[3].text.ToString() != "*" ||
string(tokens[4].text) != "---" { tokens[4].text.ToString() != "---" {
t.Errorf("%s %s %d", delim, tokens, len(tokens)) t.Errorf("%s %s %d", delim, tokens, len(tokens))
} }
} }
func TestDelimiterRegexRegex(t *testing.T) { func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*") delim := delimiterRegexp("--\\*")
tokens := Tokenize([]rune("-*--*---**---"), delim) tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
if delim.str != nil || if delim.str != nil ||
string(tokens[0].text) != "-*--*" || tokens[0].text.ToString() != "-*--*" ||
string(tokens[1].text) != "---*" || tokens[1].text.ToString() != "---*" ||
string(tokens[2].text) != "*---" { tokens[2].text.ToString() != "*---" {
t.Errorf("%s %d", tokens, len(tokens)) t.Errorf("%s %d", tokens, len(tokens))
} }
} }

View File

@ -49,7 +49,7 @@ type Pattern struct {
cacheable bool cacheable bool
delimiter Delimiter delimiter Delimiter
nth []Range nth []Range
procFun map[termType]func(bool, bool, []rune, []rune) algo.Result procFun map[termType]func(bool, bool, util.Chars, []rune) algo.Result
} }
var ( var (
@ -125,7 +125,7 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool,
cacheable: cacheable, cacheable: cacheable,
nth: nth, nth: nth,
delimiter: delimiter, delimiter: delimiter,
procFun: make(map[termType]func(bool, bool, []rune, []rune) algo.Result)} procFun: make(map[termType]func(bool, bool, util.Chars, []rune) algo.Result)}
ptr.procFun[termFuzzy] = algo.FuzzyMatch ptr.procFun[termFuzzy] = algo.FuzzyMatch
ptr.procFun[termEqual] = algo.EqualMatch ptr.procFun[termEqual] = algo.EqualMatch
@ -361,13 +361,13 @@ func (p *Pattern) prepareInput(item *Item) []Token {
tokens := Tokenize(item.text, p.delimiter) tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth) ret = Transform(tokens, p.nth)
} else { } else {
ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}} ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: item.text.TrimLength()}}
} }
item.transformed = ret item.transformed = ret
return ret return ret
} }
func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) algo.Result, func (p *Pattern) iter(pfun func(bool, bool, util.Chars, []rune) algo.Result,
tokens []Token, caseSensitive bool, forward bool, pattern []rune) (Offset, int32) { tokens []Token, caseSensitive bool, forward bool, pattern []rune) (Offset, int32) {
for _, part := range tokens { for _, part := range tokens {
prefixLength := int32(part.prefixLength) prefixLength := int32(part.prefixLength)

View File

@ -5,6 +5,7 @@ import (
"testing" "testing"
"github.com/junegunn/fzf/src/algo" "github.com/junegunn/fzf/src/algo"
"github.com/junegunn/fzf/src/util"
) )
func TestParseTermsExtended(t *testing.T) { func TestParseTermsExtended(t *testing.T) {
@ -71,7 +72,7 @@ func TestExact(t *testing.T) {
pattern := BuildPattern(true, true, CaseSmart, true, pattern := BuildPattern(true, true, CaseSmart, true,
[]Range{}, Delimiter{}, []rune("'abc")) []Range{}, Delimiter{}, []rune("'abc"))
res := algo.ExactMatchNaive( res := algo.ExactMatchNaive(
pattern.caseSensitive, pattern.forward, []rune("aabbcc abc"), pattern.termSets[0][0].text) pattern.caseSensitive, pattern.forward, util.RunesToChars([]rune("aabbcc abc")), pattern.termSets[0][0].text)
if res.Start != 7 || res.End != 10 { if res.Start != 7 || res.End != 10 {
t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End) t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End)
} }
@ -84,7 +85,7 @@ func TestEqual(t *testing.T) {
match := func(str string, sidxExpected int32, eidxExpected int32) { match := func(str string, sidxExpected int32, eidxExpected int32) {
res := algo.EqualMatch( res := algo.EqualMatch(
pattern.caseSensitive, pattern.forward, []rune(str), pattern.termSets[0][0].text) pattern.caseSensitive, pattern.forward, util.RunesToChars([]rune(str)), pattern.termSets[0][0].text)
if res.Start != sidxExpected || res.End != eidxExpected { if res.Start != sidxExpected || res.End != eidxExpected {
t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End) t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End)
} }
@ -120,20 +121,20 @@ func TestCaseSensitivity(t *testing.T) {
func TestOrigTextAndTransformed(t *testing.T) { func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("jg")) pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("jg"))
tokens := Tokenize([]rune("junegunn"), Delimiter{}) tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}}) trans := Transform(tokens, []Range{Range{1, 1}})
origRunes := []rune("junegunn.choi") origBytes := []byte("junegunn.choi")
for _, extended := range []bool{false, true} { for _, extended := range []bool{false, true} {
chunk := Chunk{ chunk := Chunk{
&Item{ &Item{
text: []rune("junegunn"), text: util.RunesToChars([]rune("junegunn")),
origText: &origRunes, origText: &origBytes,
transformed: trans}, transformed: trans},
} }
pattern.extended = extended pattern.extended = extended
matches := pattern.matchChunk(&chunk) matches := pattern.matchChunk(&chunk)
if string(matches[0].text) != "junegunn" || string(*matches[0].origText) != "junegunn.choi" || if matches[0].text.ToString() != "junegunn" || string(*matches[0].origText) != "junegunn.choi" ||
matches[0].offsets[0][0] != 0 || matches[0].offsets[0][1] != 5 || matches[0].offsets[0][0] != 0 || matches[0].offsets[0][1] != 5 ||
!reflect.DeepEqual(matches[0].transformed, trans) { !reflect.DeepEqual(matches[0].transformed, trans) {
t.Error("Invalid match result", matches) t.Error("Invalid match result", matches)

View File

@ -564,7 +564,7 @@ func (t *Terminal) printHeader() {
trimmed, colors, newState := extractColor(lineStr, state, nil) trimmed, colors, newState := extractColor(lineStr, state, nil)
state = newState state = newState
item := &Item{ item := &Item{
text: []rune(trimmed), text: util.RunesToChars([]rune(trimmed)),
colors: colors, colors: colors,
rank: buildEmptyRank(0)} rank: buildEmptyRank(0)}
@ -663,8 +663,8 @@ func (t *Terminal) printHighlighted(item *Item, bold bool, col1 int, col2 int, c
} }
// Overflow // Overflow
text := make([]rune, len(item.text)) text := make([]rune, item.text.Length())
copy(text, item.text) copy(text, item.text.ToRunes())
offsets := item.colorOffsets(col2, bold, current) offsets := item.colorOffsets(col2, bold, current)
maxWidth := t.window.Width - 3 maxWidth := t.window.Width - 3
maxe = util.Constrain(maxe+util.Min(maxWidth/2-2, t.hscrollOff), 0, len(text)) maxe = util.Constrain(maxe+util.Min(maxWidth/2-2, t.hscrollOff), 0, len(text))

View File

@ -18,7 +18,7 @@ type Range struct {
// Token contains the tokenized part of the strings and its prefix length // Token contains the tokenized part of the strings and its prefix length
type Token struct { type Token struct {
text []rune text util.Chars
prefixLength int prefixLength int
trimLength int trimLength int
} }
@ -75,15 +75,15 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true return newRange(n, n), true
} }
func withPrefixLengths(tokens [][]rune, begin int) []Token { func withPrefixLengths(tokens []util.Chars, begin int) []Token {
ret := make([]Token, len(tokens)) ret := make([]Token, len(tokens))
prefixLength := begin prefixLength := begin
for idx, token := range tokens { for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take // Need to define a new local variable instead of the reused token to take
// the pointer to it // the pointer to it
ret[idx] = Token{token, prefixLength, util.TrimLen(token)} ret[idx] = Token{token, prefixLength, token.TrimLength()}
prefixLength += len(token) prefixLength += token.Length()
} }
return ret return ret
} }
@ -94,13 +94,15 @@ const (
awkWhite awkWhite
) )
func awkTokenizer(input []rune) ([][]rune, int) { func awkTokenizer(input util.Chars) ([]util.Chars, int) {
// 9, 32 // 9, 32
ret := [][]rune{} ret := []util.Chars{}
str := []rune{} str := []rune{}
prefixLength := 0 prefixLength := 0
state := awkNil state := awkNil
for _, r := range input { numChars := input.Length()
for idx := 0; idx < numChars; idx++ {
r := input.Get(idx)
white := r == 9 || r == 32 white := r == 9 || r == 32
switch state { switch state {
case awkNil: case awkNil:
@ -119,34 +121,34 @@ func awkTokenizer(input []rune) ([][]rune, int) {
if white { if white {
str = append(str, r) str = append(str, r)
} else { } else {
ret = append(ret, str) ret = append(ret, util.RunesToChars(str))
state = awkBlack state = awkBlack
str = []rune{r} str = []rune{r}
} }
} }
} }
if len(str) > 0 { if len(str) > 0 {
ret = append(ret, str) ret = append(ret, util.RunesToChars(str))
} }
return ret, prefixLength return ret, prefixLength
} }
// Tokenize tokenizes the given string with the delimiter // Tokenize tokenizes the given string with the delimiter
func Tokenize(runes []rune, delimiter Delimiter) []Token { func Tokenize(text util.Chars, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil { if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*) // AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(runes) tokens, prefixLength := awkTokenizer(text)
return withPrefixLengths(tokens, prefixLength) return withPrefixLengths(tokens, prefixLength)
} }
var tokens []string var tokens []string
if delimiter.str != nil { if delimiter.str != nil {
tokens = strings.Split(string(runes), *delimiter.str) tokens = strings.Split(text.ToString(), *delimiter.str)
for i := 0; i < len(tokens)-1; i++ { for i := 0; i < len(tokens)-1; i++ {
tokens[i] = tokens[i] + *delimiter.str tokens[i] = tokens[i] + *delimiter.str
} }
} else if delimiter.regex != nil { } else if delimiter.regex != nil {
str := string(runes) str := text.ToString()
for len(str) > 0 { for len(str) > 0 {
loc := delimiter.regex.FindStringIndex(str) loc := delimiter.regex.FindStringIndex(str)
if loc == nil { if loc == nil {
@ -157,9 +159,9 @@ func Tokenize(runes []rune, delimiter Delimiter) []Token {
str = str[last:] str = str[last:]
} }
} }
asRunes := make([][]rune, len(tokens)) asRunes := make([]util.Chars, len(tokens))
for i, token := range tokens { for i, token := range tokens {
asRunes[i] = []rune(token) asRunes[i] = util.RunesToChars([]rune(token))
} }
return withPrefixLengths(asRunes, 0) return withPrefixLengths(asRunes, 0)
} }
@ -167,7 +169,7 @@ func Tokenize(runes []rune, delimiter Delimiter) []Token {
func joinTokens(tokens []Token) []rune { func joinTokens(tokens []Token) []rune {
ret := []rune{} ret := []rune{}
for _, token := range tokens { for _, token := range tokens {
ret = append(ret, token.text...) ret = append(ret, token.text.ToRunes()...)
} }
return ret return ret
} }
@ -175,7 +177,7 @@ func joinTokens(tokens []Token) []rune {
func joinTokensAsRunes(tokens []Token) []rune { func joinTokensAsRunes(tokens []Token) []rune {
ret := []rune{} ret := []rune{}
for _, token := range tokens { for _, token := range tokens {
ret = append(ret, token.text...) ret = append(ret, token.text.ToRunes()...)
} }
return ret return ret
} }
@ -197,7 +199,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
} }
if idx >= 1 && idx <= numTokens { if idx >= 1 && idx <= numTokens {
minIdx = idx - 1 minIdx = idx - 1
part = append(part, tokens[idx-1].text...) part = append(part, tokens[idx-1].text.ToRunes()...)
} }
} }
} else { } else {
@ -224,7 +226,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
minIdx = util.Max(0, begin-1) minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ { for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens { if idx >= 1 && idx <= numTokens {
part = append(part, tokens[idx-1].text...) part = append(part, tokens[idx-1].text.ToRunes()...)
} }
} }
} }
@ -234,7 +236,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
} else { } else {
prefixLength = 0 prefixLength = 0
} }
transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)} transTokens[idx] = Token{util.RunesToChars(part), prefixLength, util.TrimLen(part)}
} }
return transTokens return transTokens
} }

View File

@ -1,6 +1,10 @@
package fzf package fzf
import "testing" import (
"testing"
"github.com/junegunn/fzf/src/util"
)
func TestParseRange(t *testing.T) { func TestParseRange(t *testing.T) {
{ {
@ -43,23 +47,23 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) { func TestTokenize(t *testing.T) {
// AWK-style // AWK-style
input := " abc: def: ghi " input := " abc: def: ghi "
tokens := Tokenize([]rune(input), Delimiter{}) tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 { if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter // With delimiter
tokens = Tokenize([]rune(input), delimiterRegexp(":")) tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 { if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter regex // With delimiter regex
tokens = Tokenize([]rune(input), delimiterRegexp("\\s+")) tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 || if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 || tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 || tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 { tokens[3].text.ToString() != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
} }
@ -67,7 +71,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) { func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl" input := " abc: def: ghi: jkl"
{ {
tokens := Tokenize([]rune(input), Delimiter{}) tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
{ {
ranges := splitNth("1,2,3") ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
@ -80,25 +84,25 @@ func TestTransform(t *testing.T) {
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
if string(joinTokens(tx)) != "abc: def: ghi: def: ghi: jklabc: " || if string(joinTokens(tx)) != "abc: def: ghi: def: ghi: jklabc: " ||
len(tx) != 4 || len(tx) != 4 ||
string(tx[0].text) != "abc: def: " || tx[0].prefixLength != 2 || tx[0].text.ToString() != "abc: def: " || tx[0].prefixLength != 2 ||
string(tx[1].text) != "ghi: " || tx[1].prefixLength != 14 || tx[1].text.ToString() != "ghi: " || tx[1].prefixLength != 14 ||
string(tx[2].text) != "def: ghi: jkl" || tx[2].prefixLength != 8 || tx[2].text.ToString() != "def: ghi: jkl" || tx[2].prefixLength != 8 ||
string(tx[3].text) != "abc: " || tx[3].prefixLength != 2 { tx[3].text.ToString() != "abc: " || tx[3].prefixLength != 2 {
t.Errorf("%s", tx) t.Errorf("%s", tx)
} }
} }
} }
{ {
tokens := Tokenize([]rune(input), delimiterRegexp(":")) tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
{ {
ranges := splitNth("1..2,3,2..,1") ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
if string(joinTokens(tx)) != " abc: def: ghi: def: ghi: jkl abc:" || if string(joinTokens(tx)) != " abc: def: ghi: def: ghi: jkl abc:" ||
len(tx) != 4 || len(tx) != 4 ||
string(tx[0].text) != " abc: def:" || tx[0].prefixLength != 0 || tx[0].text.ToString() != " abc: def:" || tx[0].prefixLength != 0 ||
string(tx[1].text) != " ghi:" || tx[1].prefixLength != 12 || tx[1].text.ToString() != " ghi:" || tx[1].prefixLength != 12 ||
string(tx[2].text) != " def: ghi: jkl" || tx[2].prefixLength != 6 || tx[2].text.ToString() != " def: ghi: jkl" || tx[2].prefixLength != 6 ||
string(tx[3].text) != " abc:" || tx[3].prefixLength != 0 { tx[3].text.ToString() != " abc:" || tx[3].prefixLength != 0 {
t.Errorf("%s", tx) t.Errorf("%s", tx)
} }
} }

113
src/util/chars.go Normal file
View File

@ -0,0 +1,113 @@
package util
import (
"unicode/utf8"
)
type Chars struct {
runes []rune
bytes []byte
}
// ToChars converts byte array into rune array
func ToChars(bytea []byte) Chars {
var runes []rune
ascii := true
numBytes := len(bytea)
for i := 0; i < numBytes; {
if bytea[i] < utf8.RuneSelf {
if !ascii {
runes = append(runes, rune(bytea[i]))
}
i++
} else {
if ascii {
ascii = false
runes = make([]rune, i, numBytes)
for j := 0; j < i; j++ {
runes[j] = rune(bytea[j])
}
}
r, sz := utf8.DecodeRune(bytea[i:])
i += sz
runes = append(runes, r)
}
}
if ascii {
return Chars{bytes: bytea}
}
return Chars{runes: runes}
}
func RunesToChars(runes []rune) Chars {
return Chars{runes: runes}
}
func (chars *Chars) Get(i int) rune {
if chars.runes != nil {
return chars.runes[i]
}
return rune(chars.bytes[i])
}
func (chars *Chars) Length() int {
if chars.runes != nil {
return len(chars.runes)
}
return len(chars.bytes)
}
// TrimLength returns the length after trimming leading and trailing whitespaces
func (chars *Chars) TrimLength() int {
var i int
len := chars.Length()
for i = len - 1; i >= 0; i-- {
char := chars.Get(i)
if char != ' ' && char != '\t' {
break
}
}
// Completely empty
if i < 0 {
return 0
}
var j int
for j = 0; j < len; j++ {
char := chars.Get(j)
if char != ' ' && char != '\t' {
break
}
}
return i - j + 1
}
func (chars *Chars) TrailingWhitespaces() int {
whitespaces := 0
for i := chars.Length() - 1; i >= 0; i-- {
char := chars.Get(i)
if char != ' ' && char != '\t' {
break
}
whitespaces++
}
return whitespaces
}
func (chars *Chars) ToString() string {
if chars.runes != nil {
return string(chars.runes)
}
return string(chars.bytes)
}
func (chars *Chars) ToRunes() []rune {
if chars.runes != nil {
return chars.runes
}
runes := make([]rune, len(chars.bytes))
for idx, b := range chars.bytes {
runes[idx] = rune(b)
}
return runes
}

36
src/util/chars_test.go Normal file
View File

@ -0,0 +1,36 @@
package util
import "testing"
func TestToCharsNil(t *testing.T) {
bs := Chars{bytes: []byte{}}
if bs.bytes == nil || bs.runes != nil {
t.Error()
}
rs := RunesToChars([]rune{})
if rs.bytes != nil || rs.runes == nil {
t.Error()
}
}
func TestToCharsAscii(t *testing.T) {
chars := ToChars([]byte("foobar"))
if chars.ToString() != "foobar" || chars.runes != nil {
t.Error()
}
}
func TestCharsLength(t *testing.T) {
chars := ToChars([]byte("\tabc한글 "))
if chars.Length() != 8 || chars.TrimLength() != 5 {
t.Error()
}
}
func TestCharsToString(t *testing.T) {
text := "\tabc한글 "
chars := ToChars([]byte(text))
if chars.ToString() != text {
t.Error()
}
}

View File

@ -7,7 +7,6 @@ import (
"os" "os"
"os/exec" "os/exec"
"time" "time"
"unicode/utf8"
) )
// Max returns the largest integer // Max returns the largest integer
@ -84,34 +83,6 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
} }
// TrimRight returns rune array with trailing white spaces cut off
func TrimRight(runes []rune) []rune {
var i int
for i = len(runes) - 1; i >= 0; i-- {
char := runes[i]
if char != ' ' && char != '\t' {
break
}
}
return runes[0 : i+1]
}
// BytesToRunes converts byte array into rune array
func BytesToRunes(bytea []byte) []rune {
runes := make([]rune, 0, len(bytea))
for i := 0; i < len(bytea); {
if bytea[i] < utf8.RuneSelf {
runes = append(runes, rune(bytea[i]))
i++
} else {
r, sz := utf8.DecodeRune(bytea[i:])
i += sz
runes = append(runes, r)
}
}
return runes
}
// TrimLen returns the length of trimmed rune array // TrimLen returns the length of trimmed rune array
func TrimLen(runes []rune) int { func TrimLen(runes []rune) int {
var i int var i int