Avoid unconditionally storsing input as runes

When --with-nth is used, fzf used to preprocess each line and store the
result as rune array, which was wasteful if the line only contains ascii
characters.
This commit is contained in:
Junegunn Choi 2017-07-20 02:44:30 +09:00
parent bc9d2abdb6
commit c9f16b6430
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
9 changed files with 44 additions and 127 deletions

View File

@ -63,9 +63,6 @@ func Run(opts *Options, revision string) {
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) { ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
return util.ToChars(data), nil return util.ToChars(data), nil
} }
ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) {
return util.RunesToChars(data), nil
}
if opts.Ansi { if opts.Ansi {
if opts.Theme != nil { if opts.Theme != nil {
var state *ansiState var state *ansiState
@ -82,9 +79,6 @@ func Run(opts *Options, revision string) {
return util.RunesToChars([]rune(trimmed)), nil return util.RunesToChars([]rune(trimmed)), nil
} }
} }
ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) {
return ansiProcessor([]byte(string(data)))
}
} }
// Chunk list // Chunk list
@ -103,15 +97,15 @@ func Run(opts *Options, revision string) {
}) })
} else { } else {
chunkList = NewChunkList(func(data []byte, index int) Item { chunkList = NewChunkList(func(data []byte, index int) Item {
tokens := Tokenize(util.ToChars(data), opts.Delimiter) tokens := Tokenize(string(data), opts.Delimiter)
trans := Transform(tokens, opts.WithNth) trans := Transform(tokens, opts.WithNth)
transformed := joinTokens(trans)
if len(header) < opts.HeaderLines { if len(header) < opts.HeaderLines {
header = append(header, string(joinTokens(trans))) header = append(header, transformed)
eventBox.Set(EvtHeader, header) eventBox.Set(EvtHeader, header)
return nilItem return nilItem
} }
textRunes := joinTokens(trans) trimmed, colors := ansiProcessor([]byte(transformed))
trimmed, colors := ansiProcessorRunes(textRunes)
trimmed.Index = int32(index) trimmed.Index = int32(index)
return Item{text: trimmed, colors: colors, origText: &data} return Item{text: trimmed, colors: colors, origText: &data}
}) })

View File

@ -6,7 +6,6 @@ import (
"testing" "testing"
"github.com/junegunn/fzf/src/tui" "github.com/junegunn/fzf/src/tui"
"github.com/junegunn/fzf/src/util"
) )
func TestDelimiterRegex(t *testing.T) { func TestDelimiterRegex(t *testing.T) {
@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) {
func TestDelimiterRegexString(t *testing.T) { func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*") delim := delimiterRegexp("*")
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) tokens := Tokenize("-*--*---**---", delim)
if delim.regex != nil || if delim.regex != nil ||
tokens[0].text.ToString() != "-*" || tokens[0].text.ToString() != "-*" ||
tokens[1].text.ToString() != "--*" || tokens[1].text.ToString() != "--*" ||
@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) {
func TestDelimiterRegexRegex(t *testing.T) { func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*") delim := delimiterRegexp("--\\*")
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) tokens := Tokenize("-*--*---**---", delim)
if delim.str != nil || if delim.str != nil ||
tokens[0].text.ToString() != "-*--*" || tokens[0].text.ToString() != "-*--*" ||
tokens[1].text.ToString() != "---*" || tokens[1].text.ToString() != "---*" ||

View File

@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
return *item.transformed return *item.transformed
} }
tokens := Tokenize(item.text, p.delimiter) tokens := Tokenize(item.text.ToString(), p.delimiter)
ret := Transform(tokens, p.nth) ret := Transform(tokens, p.nth)
item.transformed = &ret item.transformed = &ret
return ret return ret

View File

@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) {
func TestOrigTextAndTransformed(t *testing.T) { func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg")) pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{}) tokens := Tokenize("junegunn", Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}}) trans := Transform(tokens, []Range{Range{1, 1}})
origBytes := []byte("junegunn.choi") origBytes := []byte("junegunn.choi")

View File

@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo
for idx, item := range items { for idx, item := range items {
chars := util.RunesToChars([]rune(item.AsString(stripAnsi))) chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
tokens := Tokenize(chars, delimiter) tokens := Tokenize(chars.ToString(), delimiter)
trans := Transform(tokens, ranges) trans := Transform(tokens, ranges)
str := string(joinTokens(trans)) str := string(joinTokens(trans))
if delimiter.str != nil { if delimiter.str != nil {

View File

@ -1,6 +1,7 @@
package fzf package fzf
import ( import (
"bytes"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true return newRange(n, n), true
} }
func withPrefixLengths(tokens []util.Chars, begin int) []Token { func withPrefixLengths(tokens []string, begin int) []Token {
ret := make([]Token, len(tokens)) ret := make([]Token, len(tokens))
prefixLength := begin prefixLength := begin
for idx, token := range tokens { for idx := range tokens {
// NOTE: &tokens[idx] instead of &tokens chars := util.ToChars([]byte(tokens[idx]))
ret[idx] = Token{&tokens[idx], int32(prefixLength)} ret[idx] = Token{&chars, int32(prefixLength)}
prefixLength += token.Length() prefixLength += chars.Length()
} }
return ret return ret
} }
@ -92,16 +93,15 @@ const (
awkWhite awkWhite
) )
func awkTokenizer(input util.Chars) ([]util.Chars, int) { func awkTokenizer(input string) ([]string, int) {
// 9, 32 // 9, 32
ret := []util.Chars{} ret := []string{}
prefixLength := 0 prefixLength := 0
state := awkNil state := awkNil
numChars := input.Length()
begin := 0 begin := 0
end := 0 end := 0
for idx := 0; idx < numChars; idx++ { for idx := 0; idx < len(input); idx++ {
r := input.Get(idx) r := input[idx]
white := r == 9 || r == 32 white := r == 9 || r == 32
switch state { switch state {
case awkNil: case awkNil:
@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
if white { if white {
end = idx + 1 end = idx + 1
} else { } else {
ret = append(ret, input.Slice(begin, end)) ret = append(ret, input[begin:end])
state, begin, end = awkBlack, idx, idx+1 state, begin, end = awkBlack, idx, idx+1
} }
} }
} }
if begin < end { if begin < end {
ret = append(ret, input.Slice(begin, end)) ret = append(ret, input[begin:end])
} }
return ret, prefixLength return ret, prefixLength
} }
// Tokenize tokenizes the given string with the delimiter // Tokenize tokenizes the given string with the delimiter
func Tokenize(text util.Chars, delimiter Delimiter) []Token { func Tokenize(text string, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil { if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*) // AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(text) tokens, prefixLength := awkTokenizer(text)
@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
} }
if delimiter.str != nil { if delimiter.str != nil {
return withPrefixLengths(text.Split(*delimiter.str), 0) return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
} }
// FIXME performance // FIXME performance
var tokens []string var tokens []string
if delimiter.regex != nil { if delimiter.regex != nil {
str := text.ToString() for len(text) > 0 {
for len(str) > 0 { loc := delimiter.regex.FindStringIndex(text)
loc := delimiter.regex.FindStringIndex(str)
if loc == nil { if loc == nil {
loc = []int{0, len(str)} loc = []int{0, len(text)}
} }
last := util.Max(loc[1], 1) last := util.Max(loc[1], 1)
tokens = append(tokens, str[:last]) tokens = append(tokens, text[:last])
str = str[last:] text = text[last:]
} }
} }
asRunes := make([]util.Chars, len(tokens)) return withPrefixLengths(tokens, 0)
for i, token := range tokens {
asRunes[i] = util.RunesToChars([]rune(token))
}
return withPrefixLengths(asRunes, 0)
} }
func joinTokens(tokens []Token) []rune { func joinTokens(tokens []Token) string {
ret := []rune{} var output bytes.Buffer
for _, token := range tokens { for _, token := range tokens {
ret = append(ret, token.text.ToRunes()...) output.WriteString(token.text.ToString())
} }
return ret return output.String()
} }
// Transform is used to transform the input when --with-nth option is given // Transform is used to transform the input when --with-nth option is given
@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
if r.begin == r.end { if r.begin == r.end {
idx := r.begin idx := r.begin
if idx == rangeEllipsis { if idx == rangeEllipsis {
chars := util.RunesToChars(joinTokens(tokens)) chars := util.ToChars([]byte(joinTokens(tokens)))
parts = append(parts, &chars) parts = append(parts, &chars)
} else { } else {
if idx < 0 { if idx < 0 {
@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token {
var merged util.Chars var merged util.Chars
switch len(parts) { switch len(parts) {
case 0: case 0:
merged = util.RunesToChars([]rune{}) merged = util.ToChars([]byte{})
case 1: case 1:
merged = *parts[0] merged = *parts[0]
default: default:
runes := []rune{} var output bytes.Buffer
for _, part := range parts { for _, part := range parts {
runes = append(runes, part.ToRunes()...) output.WriteString(part.ToString())
} }
merged = util.RunesToChars(runes) merged = util.ToChars([]byte(output.String()))
} }
var prefixLength int32 var prefixLength int32

View File

@ -2,8 +2,6 @@ package fzf
import ( import (
"testing" "testing"
"github.com/junegunn/fzf/src/util"
) )
func TestParseRange(t *testing.T) { func TestParseRange(t *testing.T) {
@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) { func TestTokenize(t *testing.T) {
// AWK-style // AWK-style
input := " abc: def: ghi " input := " abc: def: ghi "
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) tokens := Tokenize(input, Delimiter{})
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 { if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter // With delimiter
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) tokens = Tokenize(input, delimiterRegexp(":"))
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 { if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
t.Errorf("%s", tokens) t.Error(tokens[0].text.ToString(), tokens[0].prefixLength)
} }
// With delimiter regex // With delimiter regex
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+")) tokens = Tokenize(input, delimiterRegexp("\\s+"))
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) { func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl" input := " abc: def: ghi: jkl"
{ {
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) tokens := Tokenize(input, Delimiter{})
{ {
ranges := splitNth("1,2,3") ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
@ -93,7 +91,7 @@ func TestTransform(t *testing.T) {
} }
} }
{ {
tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) tokens := Tokenize(input, delimiterRegexp(":"))
{ {
ranges := splitNth("1..2,3,2..,1") ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)

View File

@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) {
} }
return return
} }
func (chars *Chars) Slice(b int, e int) Chars {
if runes := chars.optionalRunes(); runes != nil {
return RunesToChars(runes[b:e])
}
return Chars{slice: chars.slice[b:e], inBytes: true}
}
func (chars *Chars) Split(delimiter string) []Chars {
delim := []rune(delimiter)
numChars := chars.Length()
numDelim := len(delim)
begin := 0
ret := make([]Chars, 0, 1)
for index := 0; index < numChars; {
if index+numDelim <= numChars {
match := true
for off, d := range delim {
if chars.Get(index+off) != d {
match = false
break
}
}
// Found the delimiter
if match {
incr := Max(numDelim, 1)
ret = append(ret, chars.Slice(begin, index+incr))
index += incr
begin = index
continue
}
} else {
// Impossible to find the delimiter in the remaining substring
break
}
index++
}
if begin < numChars || len(ret) == 0 {
ret = append(ret, chars.Slice(begin, numChars))
}
return ret
}

View File

@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) {
check(" h o ", 5) check(" h o ", 5)
check(" ", 0) check(" ", 0)
} }
func TestSplit(t *testing.T) {
check := func(str string, delim string, tokens ...string) {
input := ToChars([]byte(str))
result := input.Split(delim)
if len(result) != len(tokens) {
t.Errorf(
"Invalid Split result for '%s': %d tokens found (expected %d): %s",
str, len(result), len(tokens), result)
}
for idx, token := range tokens {
if result[idx].ToString() != token {
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
str, result[idx].ToString(), token)
}
}
}
check("abc:def::", ":", "abc:", "def:", ":")
check("abc:def::", "-", "abc:def::")
check("abc", "", "a", "b", "c")
check("abc", "a", "a", "bc")
check("abc", "ab", "ab", "c")
check("abc", "abc", "abc")
check("abc", "abcd", "abc")
check("", "abcd", "")
}