Avoid unconditionally storsing input as runes
When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters.
This commit is contained in:
parent
bc9d2abdb6
commit
c9f16b6430
14
src/core.go
14
src/core.go
@ -63,9 +63,6 @@ func Run(opts *Options, revision string) {
|
||||
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
|
||||
return util.ToChars(data), nil
|
||||
}
|
||||
ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) {
|
||||
return util.RunesToChars(data), nil
|
||||
}
|
||||
if opts.Ansi {
|
||||
if opts.Theme != nil {
|
||||
var state *ansiState
|
||||
@ -82,9 +79,6 @@ func Run(opts *Options, revision string) {
|
||||
return util.RunesToChars([]rune(trimmed)), nil
|
||||
}
|
||||
}
|
||||
ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) {
|
||||
return ansiProcessor([]byte(string(data)))
|
||||
}
|
||||
}
|
||||
|
||||
// Chunk list
|
||||
@ -103,15 +97,15 @@ func Run(opts *Options, revision string) {
|
||||
})
|
||||
} else {
|
||||
chunkList = NewChunkList(func(data []byte, index int) Item {
|
||||
tokens := Tokenize(util.ToChars(data), opts.Delimiter)
|
||||
tokens := Tokenize(string(data), opts.Delimiter)
|
||||
trans := Transform(tokens, opts.WithNth)
|
||||
transformed := joinTokens(trans)
|
||||
if len(header) < opts.HeaderLines {
|
||||
header = append(header, string(joinTokens(trans)))
|
||||
header = append(header, transformed)
|
||||
eventBox.Set(EvtHeader, header)
|
||||
return nilItem
|
||||
}
|
||||
textRunes := joinTokens(trans)
|
||||
trimmed, colors := ansiProcessorRunes(textRunes)
|
||||
trimmed, colors := ansiProcessor([]byte(transformed))
|
||||
trimmed.Index = int32(index)
|
||||
return Item{text: trimmed, colors: colors, origText: &data}
|
||||
})
|
||||
|
@ -6,7 +6,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/junegunn/fzf/src/tui"
|
||||
"github.com/junegunn/fzf/src/util"
|
||||
)
|
||||
|
||||
func TestDelimiterRegex(t *testing.T) {
|
||||
@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) {
|
||||
|
||||
func TestDelimiterRegexString(t *testing.T) {
|
||||
delim := delimiterRegexp("*")
|
||||
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
|
||||
tokens := Tokenize("-*--*---**---", delim)
|
||||
if delim.regex != nil ||
|
||||
tokens[0].text.ToString() != "-*" ||
|
||||
tokens[1].text.ToString() != "--*" ||
|
||||
@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) {
|
||||
|
||||
func TestDelimiterRegexRegex(t *testing.T) {
|
||||
delim := delimiterRegexp("--\\*")
|
||||
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
|
||||
tokens := Tokenize("-*--*---**---", delim)
|
||||
if delim.str != nil ||
|
||||
tokens[0].text.ToString() != "-*--*" ||
|
||||
tokens[1].text.ToString() != "---*" ||
|
||||
|
@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
|
||||
return *item.transformed
|
||||
}
|
||||
|
||||
tokens := Tokenize(item.text, p.delimiter)
|
||||
tokens := Tokenize(item.text.ToString(), p.delimiter)
|
||||
ret := Transform(tokens, p.nth)
|
||||
item.transformed = &ret
|
||||
return ret
|
||||
|
@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) {
|
||||
|
||||
func TestOrigTextAndTransformed(t *testing.T) {
|
||||
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
|
||||
tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{})
|
||||
tokens := Tokenize("junegunn", Delimiter{})
|
||||
trans := Transform(tokens, []Range{Range{1, 1}})
|
||||
|
||||
origBytes := []byte("junegunn.choi")
|
||||
|
@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo
|
||||
|
||||
for idx, item := range items {
|
||||
chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
|
||||
tokens := Tokenize(chars, delimiter)
|
||||
tokens := Tokenize(chars.ToString(), delimiter)
|
||||
trans := Transform(tokens, ranges)
|
||||
str := string(joinTokens(trans))
|
||||
if delimiter.str != nil {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package fzf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) {
|
||||
return newRange(n, n), true
|
||||
}
|
||||
|
||||
func withPrefixLengths(tokens []util.Chars, begin int) []Token {
|
||||
func withPrefixLengths(tokens []string, begin int) []Token {
|
||||
ret := make([]Token, len(tokens))
|
||||
|
||||
prefixLength := begin
|
||||
for idx, token := range tokens {
|
||||
// NOTE: &tokens[idx] instead of &tokens
|
||||
ret[idx] = Token{&tokens[idx], int32(prefixLength)}
|
||||
prefixLength += token.Length()
|
||||
for idx := range tokens {
|
||||
chars := util.ToChars([]byte(tokens[idx]))
|
||||
ret[idx] = Token{&chars, int32(prefixLength)}
|
||||
prefixLength += chars.Length()
|
||||
}
|
||||
return ret
|
||||
}
|
||||
@ -92,16 +93,15 @@ const (
|
||||
awkWhite
|
||||
)
|
||||
|
||||
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
|
||||
func awkTokenizer(input string) ([]string, int) {
|
||||
// 9, 32
|
||||
ret := []util.Chars{}
|
||||
ret := []string{}
|
||||
prefixLength := 0
|
||||
state := awkNil
|
||||
numChars := input.Length()
|
||||
begin := 0
|
||||
end := 0
|
||||
for idx := 0; idx < numChars; idx++ {
|
||||
r := input.Get(idx)
|
||||
for idx := 0; idx < len(input); idx++ {
|
||||
r := input[idx]
|
||||
white := r == 9 || r == 32
|
||||
switch state {
|
||||
case awkNil:
|
||||
@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
|
||||
if white {
|
||||
end = idx + 1
|
||||
} else {
|
||||
ret = append(ret, input.Slice(begin, end))
|
||||
ret = append(ret, input[begin:end])
|
||||
state, begin, end = awkBlack, idx, idx+1
|
||||
}
|
||||
}
|
||||
}
|
||||
if begin < end {
|
||||
ret = append(ret, input.Slice(begin, end))
|
||||
ret = append(ret, input[begin:end])
|
||||
}
|
||||
return ret, prefixLength
|
||||
}
|
||||
|
||||
// Tokenize tokenizes the given string with the delimiter
|
||||
func Tokenize(text util.Chars, delimiter Delimiter) []Token {
|
||||
func Tokenize(text string, delimiter Delimiter) []Token {
|
||||
if delimiter.str == nil && delimiter.regex == nil {
|
||||
// AWK-style (\S+\s*)
|
||||
tokens, prefixLength := awkTokenizer(text)
|
||||
@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
|
||||
}
|
||||
|
||||
if delimiter.str != nil {
|
||||
return withPrefixLengths(text.Split(*delimiter.str), 0)
|
||||
return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
|
||||
}
|
||||
|
||||
// FIXME performance
|
||||
var tokens []string
|
||||
if delimiter.regex != nil {
|
||||
str := text.ToString()
|
||||
for len(str) > 0 {
|
||||
loc := delimiter.regex.FindStringIndex(str)
|
||||
for len(text) > 0 {
|
||||
loc := delimiter.regex.FindStringIndex(text)
|
||||
if loc == nil {
|
||||
loc = []int{0, len(str)}
|
||||
loc = []int{0, len(text)}
|
||||
}
|
||||
last := util.Max(loc[1], 1)
|
||||
tokens = append(tokens, str[:last])
|
||||
str = str[last:]
|
||||
tokens = append(tokens, text[:last])
|
||||
text = text[last:]
|
||||
}
|
||||
}
|
||||
asRunes := make([]util.Chars, len(tokens))
|
||||
for i, token := range tokens {
|
||||
asRunes[i] = util.RunesToChars([]rune(token))
|
||||
}
|
||||
return withPrefixLengths(asRunes, 0)
|
||||
return withPrefixLengths(tokens, 0)
|
||||
}
|
||||
|
||||
func joinTokens(tokens []Token) []rune {
|
||||
ret := []rune{}
|
||||
func joinTokens(tokens []Token) string {
|
||||
var output bytes.Buffer
|
||||
for _, token := range tokens {
|
||||
ret = append(ret, token.text.ToRunes()...)
|
||||
output.WriteString(token.text.ToString())
|
||||
}
|
||||
return ret
|
||||
return output.String()
|
||||
}
|
||||
|
||||
// Transform is used to transform the input when --with-nth option is given
|
||||
@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
|
||||
if r.begin == r.end {
|
||||
idx := r.begin
|
||||
if idx == rangeEllipsis {
|
||||
chars := util.RunesToChars(joinTokens(tokens))
|
||||
chars := util.ToChars([]byte(joinTokens(tokens)))
|
||||
parts = append(parts, &chars)
|
||||
} else {
|
||||
if idx < 0 {
|
||||
@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token {
|
||||
var merged util.Chars
|
||||
switch len(parts) {
|
||||
case 0:
|
||||
merged = util.RunesToChars([]rune{})
|
||||
merged = util.ToChars([]byte{})
|
||||
case 1:
|
||||
merged = *parts[0]
|
||||
default:
|
||||
runes := []rune{}
|
||||
var output bytes.Buffer
|
||||
for _, part := range parts {
|
||||
runes = append(runes, part.ToRunes()...)
|
||||
output.WriteString(part.ToString())
|
||||
}
|
||||
merged = util.RunesToChars(runes)
|
||||
merged = util.ToChars([]byte(output.String()))
|
||||
}
|
||||
|
||||
var prefixLength int32
|
||||
|
@ -2,8 +2,6 @@ package fzf
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/junegunn/fzf/src/util"
|
||||
)
|
||||
|
||||
func TestParseRange(t *testing.T) {
|
||||
@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) {
|
||||
func TestTokenize(t *testing.T) {
|
||||
// AWK-style
|
||||
input := " abc: def: ghi "
|
||||
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
|
||||
tokens := Tokenize(input, Delimiter{})
|
||||
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
|
||||
t.Errorf("%s", tokens)
|
||||
}
|
||||
|
||||
// With delimiter
|
||||
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
|
||||
tokens = Tokenize(input, delimiterRegexp(":"))
|
||||
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
|
||||
t.Errorf("%s", tokens)
|
||||
t.Error(tokens[0].text.ToString(), tokens[0].prefixLength)
|
||||
}
|
||||
|
||||
// With delimiter regex
|
||||
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
|
||||
tokens = Tokenize(input, delimiterRegexp("\\s+"))
|
||||
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
|
||||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
|
||||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
|
||||
@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) {
|
||||
func TestTransform(t *testing.T) {
|
||||
input := " abc: def: ghi: jkl"
|
||||
{
|
||||
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
|
||||
tokens := Tokenize(input, Delimiter{})
|
||||
{
|
||||
ranges := splitNth("1,2,3")
|
||||
tx := Transform(tokens, ranges)
|
||||
@ -93,7 +91,7 @@ func TestTransform(t *testing.T) {
|
||||
}
|
||||
}
|
||||
{
|
||||
tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
|
||||
tokens := Tokenize(input, delimiterRegexp(":"))
|
||||
{
|
||||
ranges := splitNth("1..2,3,2..,1")
|
||||
tx := Transform(tokens, ranges)
|
||||
|
@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (chars *Chars) Slice(b int, e int) Chars {
|
||||
if runes := chars.optionalRunes(); runes != nil {
|
||||
return RunesToChars(runes[b:e])
|
||||
}
|
||||
return Chars{slice: chars.slice[b:e], inBytes: true}
|
||||
}
|
||||
|
||||
func (chars *Chars) Split(delimiter string) []Chars {
|
||||
delim := []rune(delimiter)
|
||||
numChars := chars.Length()
|
||||
numDelim := len(delim)
|
||||
begin := 0
|
||||
ret := make([]Chars, 0, 1)
|
||||
|
||||
for index := 0; index < numChars; {
|
||||
if index+numDelim <= numChars {
|
||||
match := true
|
||||
for off, d := range delim {
|
||||
if chars.Get(index+off) != d {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// Found the delimiter
|
||||
if match {
|
||||
incr := Max(numDelim, 1)
|
||||
ret = append(ret, chars.Slice(begin, index+incr))
|
||||
index += incr
|
||||
begin = index
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// Impossible to find the delimiter in the remaining substring
|
||||
break
|
||||
}
|
||||
index++
|
||||
}
|
||||
if begin < numChars || len(ret) == 0 {
|
||||
ret = append(ret, chars.Slice(begin, numChars))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) {
|
||||
check(" h o ", 5)
|
||||
check(" ", 0)
|
||||
}
|
||||
|
||||
func TestSplit(t *testing.T) {
|
||||
check := func(str string, delim string, tokens ...string) {
|
||||
input := ToChars([]byte(str))
|
||||
result := input.Split(delim)
|
||||
if len(result) != len(tokens) {
|
||||
t.Errorf(
|
||||
"Invalid Split result for '%s': %d tokens found (expected %d): %s",
|
||||
str, len(result), len(tokens), result)
|
||||
}
|
||||
for idx, token := range tokens {
|
||||
if result[idx].ToString() != token {
|
||||
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
|
||||
str, result[idx].ToString(), token)
|
||||
}
|
||||
}
|
||||
}
|
||||
check("abc:def::", ":", "abc:", "def:", ":")
|
||||
check("abc:def::", "-", "abc:def::")
|
||||
check("abc", "", "a", "b", "c")
|
||||
check("abc", "a", "a", "bc")
|
||||
check("abc", "ab", "ab", "c")
|
||||
check("abc", "abc", "abc")
|
||||
check("abc", "abcd", "abc")
|
||||
check("", "abcd", "")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user