Avoid unconditionally storsing input as runes

When --with-nth is used, fzf used to preprocess each line and store the
result as rune array, which was wasteful if the line only contains ascii
characters.
This commit is contained in:
Junegunn Choi 2017-07-20 02:44:30 +09:00
parent bc9d2abdb6
commit c9f16b6430
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
9 changed files with 44 additions and 127 deletions

View File

@ -63,9 +63,6 @@ func Run(opts *Options, revision string) {
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
return util.ToChars(data), nil
}
ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) {
return util.RunesToChars(data), nil
}
if opts.Ansi {
if opts.Theme != nil {
var state *ansiState
@ -82,9 +79,6 @@ func Run(opts *Options, revision string) {
return util.RunesToChars([]rune(trimmed)), nil
}
}
ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) {
return ansiProcessor([]byte(string(data)))
}
}
// Chunk list
@ -103,15 +97,15 @@ func Run(opts *Options, revision string) {
})
} else {
chunkList = NewChunkList(func(data []byte, index int) Item {
tokens := Tokenize(util.ToChars(data), opts.Delimiter)
tokens := Tokenize(string(data), opts.Delimiter)
trans := Transform(tokens, opts.WithNth)
transformed := joinTokens(trans)
if len(header) < opts.HeaderLines {
header = append(header, string(joinTokens(trans)))
header = append(header, transformed)
eventBox.Set(EvtHeader, header)
return nilItem
}
textRunes := joinTokens(trans)
trimmed, colors := ansiProcessorRunes(textRunes)
trimmed, colors := ansiProcessor([]byte(transformed))
trimmed.Index = int32(index)
return Item{text: trimmed, colors: colors, origText: &data}
})

View File

@ -6,7 +6,6 @@ import (
"testing"
"github.com/junegunn/fzf/src/tui"
"github.com/junegunn/fzf/src/util"
)
func TestDelimiterRegex(t *testing.T) {
@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) {
func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*")
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
tokens := Tokenize("-*--*---**---", delim)
if delim.regex != nil ||
tokens[0].text.ToString() != "-*" ||
tokens[1].text.ToString() != "--*" ||
@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) {
func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*")
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
tokens := Tokenize("-*--*---**---", delim)
if delim.str != nil ||
tokens[0].text.ToString() != "-*--*" ||
tokens[1].text.ToString() != "---*" ||

View File

@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
return *item.transformed
}
tokens := Tokenize(item.text, p.delimiter)
tokens := Tokenize(item.text.ToString(), p.delimiter)
ret := Transform(tokens, p.nth)
item.transformed = &ret
return ret

View File

@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) {
func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{})
tokens := Tokenize("junegunn", Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}})
origBytes := []byte("junegunn.choi")

View File

@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo
for idx, item := range items {
chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
tokens := Tokenize(chars, delimiter)
tokens := Tokenize(chars.ToString(), delimiter)
trans := Transform(tokens, ranges)
str := string(joinTokens(trans))
if delimiter.str != nil {

View File

@ -1,6 +1,7 @@
package fzf
import (
"bytes"
"regexp"
"strconv"
"strings"
@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true
}
func withPrefixLengths(tokens []util.Chars, begin int) []Token {
func withPrefixLengths(tokens []string, begin int) []Token {
ret := make([]Token, len(tokens))
prefixLength := begin
for idx, token := range tokens {
// NOTE: &tokens[idx] instead of &tokens
ret[idx] = Token{&tokens[idx], int32(prefixLength)}
prefixLength += token.Length()
for idx := range tokens {
chars := util.ToChars([]byte(tokens[idx]))
ret[idx] = Token{&chars, int32(prefixLength)}
prefixLength += chars.Length()
}
return ret
}
@ -92,16 +93,15 @@ const (
awkWhite
)
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
func awkTokenizer(input string) ([]string, int) {
// 9, 32
ret := []util.Chars{}
ret := []string{}
prefixLength := 0
state := awkNil
numChars := input.Length()
begin := 0
end := 0
for idx := 0; idx < numChars; idx++ {
r := input.Get(idx)
for idx := 0; idx < len(input); idx++ {
r := input[idx]
white := r == 9 || r == 32
switch state {
case awkNil:
@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
if white {
end = idx + 1
} else {
ret = append(ret, input.Slice(begin, end))
ret = append(ret, input[begin:end])
state, begin, end = awkBlack, idx, idx+1
}
}
}
if begin < end {
ret = append(ret, input.Slice(begin, end))
ret = append(ret, input[begin:end])
}
return ret, prefixLength
}
// Tokenize tokenizes the given string with the delimiter
func Tokenize(text util.Chars, delimiter Delimiter) []Token {
func Tokenize(text string, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(text)
@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
}
if delimiter.str != nil {
return withPrefixLengths(text.Split(*delimiter.str), 0)
return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
}
// FIXME performance
var tokens []string
if delimiter.regex != nil {
str := text.ToString()
for len(str) > 0 {
loc := delimiter.regex.FindStringIndex(str)
for len(text) > 0 {
loc := delimiter.regex.FindStringIndex(text)
if loc == nil {
loc = []int{0, len(str)}
loc = []int{0, len(text)}
}
last := util.Max(loc[1], 1)
tokens = append(tokens, str[:last])
str = str[last:]
tokens = append(tokens, text[:last])
text = text[last:]
}
}
asRunes := make([]util.Chars, len(tokens))
for i, token := range tokens {
asRunes[i] = util.RunesToChars([]rune(token))
}
return withPrefixLengths(asRunes, 0)
return withPrefixLengths(tokens, 0)
}
func joinTokens(tokens []Token) []rune {
ret := []rune{}
func joinTokens(tokens []Token) string {
var output bytes.Buffer
for _, token := range tokens {
ret = append(ret, token.text.ToRunes()...)
output.WriteString(token.text.ToString())
}
return ret
return output.String()
}
// Transform is used to transform the input when --with-nth option is given
@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
if r.begin == r.end {
idx := r.begin
if idx == rangeEllipsis {
chars := util.RunesToChars(joinTokens(tokens))
chars := util.ToChars([]byte(joinTokens(tokens)))
parts = append(parts, &chars)
} else {
if idx < 0 {
@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token {
var merged util.Chars
switch len(parts) {
case 0:
merged = util.RunesToChars([]rune{})
merged = util.ToChars([]byte{})
case 1:
merged = *parts[0]
default:
runes := []rune{}
var output bytes.Buffer
for _, part := range parts {
runes = append(runes, part.ToRunes()...)
output.WriteString(part.ToString())
}
merged = util.RunesToChars(runes)
merged = util.ToChars([]byte(output.String()))
}
var prefixLength int32

View File

@ -2,8 +2,6 @@ package fzf
import (
"testing"
"github.com/junegunn/fzf/src/util"
)
func TestParseRange(t *testing.T) {
@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
tokens := Tokenize(input, Delimiter{})
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens)
}
// With delimiter
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
tokens = Tokenize(input, delimiterRegexp(":"))
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
t.Errorf("%s", tokens)
t.Error(tokens[0].text.ToString(), tokens[0].prefixLength)
}
// With delimiter regex
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
tokens = Tokenize(input, delimiterRegexp("\\s+"))
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl"
{
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
tokens := Tokenize(input, Delimiter{})
{
ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges)
@ -93,7 +91,7 @@ func TestTransform(t *testing.T) {
}
}
{
tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
tokens := Tokenize(input, delimiterRegexp(":"))
{
ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges)

View File

@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) {
}
return
}
func (chars *Chars) Slice(b int, e int) Chars {
if runes := chars.optionalRunes(); runes != nil {
return RunesToChars(runes[b:e])
}
return Chars{slice: chars.slice[b:e], inBytes: true}
}
func (chars *Chars) Split(delimiter string) []Chars {
delim := []rune(delimiter)
numChars := chars.Length()
numDelim := len(delim)
begin := 0
ret := make([]Chars, 0, 1)
for index := 0; index < numChars; {
if index+numDelim <= numChars {
match := true
for off, d := range delim {
if chars.Get(index+off) != d {
match = false
break
}
}
// Found the delimiter
if match {
incr := Max(numDelim, 1)
ret = append(ret, chars.Slice(begin, index+incr))
index += incr
begin = index
continue
}
} else {
// Impossible to find the delimiter in the remaining substring
break
}
index++
}
if begin < numChars || len(ret) == 0 {
ret = append(ret, chars.Slice(begin, numChars))
}
return ret
}

View File

@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) {
check(" h o ", 5)
check(" ", 0)
}
func TestSplit(t *testing.T) {
check := func(str string, delim string, tokens ...string) {
input := ToChars([]byte(str))
result := input.Split(delim)
if len(result) != len(tokens) {
t.Errorf(
"Invalid Split result for '%s': %d tokens found (expected %d): %s",
str, len(result), len(tokens), result)
}
for idx, token := range tokens {
if result[idx].ToString() != token {
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
str, result[idx].ToString(), token)
}
}
}
check("abc:def::", ":", "abc:", "def:", ":")
check("abc:def::", "-", "abc:def::")
check("abc", "", "a", "b", "c")
check("abc", "a", "a", "bc")
check("abc", "ab", "ab", "c")
check("abc", "abc", "abc")
check("abc", "abcd", "abc")
check("", "abcd", "")
}