Avoid unconditionally storsing input as runes
When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters.
This commit is contained in:
parent
bc9d2abdb6
commit
c9f16b6430
14
src/core.go
14
src/core.go
@ -63,9 +63,6 @@ func Run(opts *Options, revision string) {
|
|||||||
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
|
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
|
||||||
return util.ToChars(data), nil
|
return util.ToChars(data), nil
|
||||||
}
|
}
|
||||||
ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) {
|
|
||||||
return util.RunesToChars(data), nil
|
|
||||||
}
|
|
||||||
if opts.Ansi {
|
if opts.Ansi {
|
||||||
if opts.Theme != nil {
|
if opts.Theme != nil {
|
||||||
var state *ansiState
|
var state *ansiState
|
||||||
@ -82,9 +79,6 @@ func Run(opts *Options, revision string) {
|
|||||||
return util.RunesToChars([]rune(trimmed)), nil
|
return util.RunesToChars([]rune(trimmed)), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) {
|
|
||||||
return ansiProcessor([]byte(string(data)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chunk list
|
// Chunk list
|
||||||
@ -103,15 +97,15 @@ func Run(opts *Options, revision string) {
|
|||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
chunkList = NewChunkList(func(data []byte, index int) Item {
|
chunkList = NewChunkList(func(data []byte, index int) Item {
|
||||||
tokens := Tokenize(util.ToChars(data), opts.Delimiter)
|
tokens := Tokenize(string(data), opts.Delimiter)
|
||||||
trans := Transform(tokens, opts.WithNth)
|
trans := Transform(tokens, opts.WithNth)
|
||||||
|
transformed := joinTokens(trans)
|
||||||
if len(header) < opts.HeaderLines {
|
if len(header) < opts.HeaderLines {
|
||||||
header = append(header, string(joinTokens(trans)))
|
header = append(header, transformed)
|
||||||
eventBox.Set(EvtHeader, header)
|
eventBox.Set(EvtHeader, header)
|
||||||
return nilItem
|
return nilItem
|
||||||
}
|
}
|
||||||
textRunes := joinTokens(trans)
|
trimmed, colors := ansiProcessor([]byte(transformed))
|
||||||
trimmed, colors := ansiProcessorRunes(textRunes)
|
|
||||||
trimmed.Index = int32(index)
|
trimmed.Index = int32(index)
|
||||||
return Item{text: trimmed, colors: colors, origText: &data}
|
return Item{text: trimmed, colors: colors, origText: &data}
|
||||||
})
|
})
|
||||||
|
@ -6,7 +6,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/junegunn/fzf/src/tui"
|
"github.com/junegunn/fzf/src/tui"
|
||||||
"github.com/junegunn/fzf/src/util"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDelimiterRegex(t *testing.T) {
|
func TestDelimiterRegex(t *testing.T) {
|
||||||
@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) {
|
|||||||
|
|
||||||
func TestDelimiterRegexString(t *testing.T) {
|
func TestDelimiterRegexString(t *testing.T) {
|
||||||
delim := delimiterRegexp("*")
|
delim := delimiterRegexp("*")
|
||||||
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
|
tokens := Tokenize("-*--*---**---", delim)
|
||||||
if delim.regex != nil ||
|
if delim.regex != nil ||
|
||||||
tokens[0].text.ToString() != "-*" ||
|
tokens[0].text.ToString() != "-*" ||
|
||||||
tokens[1].text.ToString() != "--*" ||
|
tokens[1].text.ToString() != "--*" ||
|
||||||
@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) {
|
|||||||
|
|
||||||
func TestDelimiterRegexRegex(t *testing.T) {
|
func TestDelimiterRegexRegex(t *testing.T) {
|
||||||
delim := delimiterRegexp("--\\*")
|
delim := delimiterRegexp("--\\*")
|
||||||
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
|
tokens := Tokenize("-*--*---**---", delim)
|
||||||
if delim.str != nil ||
|
if delim.str != nil ||
|
||||||
tokens[0].text.ToString() != "-*--*" ||
|
tokens[0].text.ToString() != "-*--*" ||
|
||||||
tokens[1].text.ToString() != "---*" ||
|
tokens[1].text.ToString() != "---*" ||
|
||||||
|
@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
|
|||||||
return *item.transformed
|
return *item.transformed
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens := Tokenize(item.text, p.delimiter)
|
tokens := Tokenize(item.text.ToString(), p.delimiter)
|
||||||
ret := Transform(tokens, p.nth)
|
ret := Transform(tokens, p.nth)
|
||||||
item.transformed = &ret
|
item.transformed = &ret
|
||||||
return ret
|
return ret
|
||||||
|
@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) {
|
|||||||
|
|
||||||
func TestOrigTextAndTransformed(t *testing.T) {
|
func TestOrigTextAndTransformed(t *testing.T) {
|
||||||
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
|
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
|
||||||
tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{})
|
tokens := Tokenize("junegunn", Delimiter{})
|
||||||
trans := Transform(tokens, []Range{Range{1, 1}})
|
trans := Transform(tokens, []Range{Range{1, 1}})
|
||||||
|
|
||||||
origBytes := []byte("junegunn.choi")
|
origBytes := []byte("junegunn.choi")
|
||||||
|
@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo
|
|||||||
|
|
||||||
for idx, item := range items {
|
for idx, item := range items {
|
||||||
chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
|
chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
|
||||||
tokens := Tokenize(chars, delimiter)
|
tokens := Tokenize(chars.ToString(), delimiter)
|
||||||
trans := Transform(tokens, ranges)
|
trans := Transform(tokens, ranges)
|
||||||
str := string(joinTokens(trans))
|
str := string(joinTokens(trans))
|
||||||
if delimiter.str != nil {
|
if delimiter.str != nil {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package fzf
|
package fzf
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) {
|
|||||||
return newRange(n, n), true
|
return newRange(n, n), true
|
||||||
}
|
}
|
||||||
|
|
||||||
func withPrefixLengths(tokens []util.Chars, begin int) []Token {
|
func withPrefixLengths(tokens []string, begin int) []Token {
|
||||||
ret := make([]Token, len(tokens))
|
ret := make([]Token, len(tokens))
|
||||||
|
|
||||||
prefixLength := begin
|
prefixLength := begin
|
||||||
for idx, token := range tokens {
|
for idx := range tokens {
|
||||||
// NOTE: &tokens[idx] instead of &tokens
|
chars := util.ToChars([]byte(tokens[idx]))
|
||||||
ret[idx] = Token{&tokens[idx], int32(prefixLength)}
|
ret[idx] = Token{&chars, int32(prefixLength)}
|
||||||
prefixLength += token.Length()
|
prefixLength += chars.Length()
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
@ -92,16 +93,15 @@ const (
|
|||||||
awkWhite
|
awkWhite
|
||||||
)
|
)
|
||||||
|
|
||||||
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
|
func awkTokenizer(input string) ([]string, int) {
|
||||||
// 9, 32
|
// 9, 32
|
||||||
ret := []util.Chars{}
|
ret := []string{}
|
||||||
prefixLength := 0
|
prefixLength := 0
|
||||||
state := awkNil
|
state := awkNil
|
||||||
numChars := input.Length()
|
|
||||||
begin := 0
|
begin := 0
|
||||||
end := 0
|
end := 0
|
||||||
for idx := 0; idx < numChars; idx++ {
|
for idx := 0; idx < len(input); idx++ {
|
||||||
r := input.Get(idx)
|
r := input[idx]
|
||||||
white := r == 9 || r == 32
|
white := r == 9 || r == 32
|
||||||
switch state {
|
switch state {
|
||||||
case awkNil:
|
case awkNil:
|
||||||
@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
|
|||||||
if white {
|
if white {
|
||||||
end = idx + 1
|
end = idx + 1
|
||||||
} else {
|
} else {
|
||||||
ret = append(ret, input.Slice(begin, end))
|
ret = append(ret, input[begin:end])
|
||||||
state, begin, end = awkBlack, idx, idx+1
|
state, begin, end = awkBlack, idx, idx+1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if begin < end {
|
if begin < end {
|
||||||
ret = append(ret, input.Slice(begin, end))
|
ret = append(ret, input[begin:end])
|
||||||
}
|
}
|
||||||
return ret, prefixLength
|
return ret, prefixLength
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokenize tokenizes the given string with the delimiter
|
// Tokenize tokenizes the given string with the delimiter
|
||||||
func Tokenize(text util.Chars, delimiter Delimiter) []Token {
|
func Tokenize(text string, delimiter Delimiter) []Token {
|
||||||
if delimiter.str == nil && delimiter.regex == nil {
|
if delimiter.str == nil && delimiter.regex == nil {
|
||||||
// AWK-style (\S+\s*)
|
// AWK-style (\S+\s*)
|
||||||
tokens, prefixLength := awkTokenizer(text)
|
tokens, prefixLength := awkTokenizer(text)
|
||||||
@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if delimiter.str != nil {
|
if delimiter.str != nil {
|
||||||
return withPrefixLengths(text.Split(*delimiter.str), 0)
|
return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME performance
|
// FIXME performance
|
||||||
var tokens []string
|
var tokens []string
|
||||||
if delimiter.regex != nil {
|
if delimiter.regex != nil {
|
||||||
str := text.ToString()
|
for len(text) > 0 {
|
||||||
for len(str) > 0 {
|
loc := delimiter.regex.FindStringIndex(text)
|
||||||
loc := delimiter.regex.FindStringIndex(str)
|
|
||||||
if loc == nil {
|
if loc == nil {
|
||||||
loc = []int{0, len(str)}
|
loc = []int{0, len(text)}
|
||||||
}
|
}
|
||||||
last := util.Max(loc[1], 1)
|
last := util.Max(loc[1], 1)
|
||||||
tokens = append(tokens, str[:last])
|
tokens = append(tokens, text[:last])
|
||||||
str = str[last:]
|
text = text[last:]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
asRunes := make([]util.Chars, len(tokens))
|
return withPrefixLengths(tokens, 0)
|
||||||
for i, token := range tokens {
|
|
||||||
asRunes[i] = util.RunesToChars([]rune(token))
|
|
||||||
}
|
|
||||||
return withPrefixLengths(asRunes, 0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func joinTokens(tokens []Token) []rune {
|
func joinTokens(tokens []Token) string {
|
||||||
ret := []rune{}
|
var output bytes.Buffer
|
||||||
for _, token := range tokens {
|
for _, token := range tokens {
|
||||||
ret = append(ret, token.text.ToRunes()...)
|
output.WriteString(token.text.ToString())
|
||||||
}
|
}
|
||||||
return ret
|
return output.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transform is used to transform the input when --with-nth option is given
|
// Transform is used to transform the input when --with-nth option is given
|
||||||
@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
|
|||||||
if r.begin == r.end {
|
if r.begin == r.end {
|
||||||
idx := r.begin
|
idx := r.begin
|
||||||
if idx == rangeEllipsis {
|
if idx == rangeEllipsis {
|
||||||
chars := util.RunesToChars(joinTokens(tokens))
|
chars := util.ToChars([]byte(joinTokens(tokens)))
|
||||||
parts = append(parts, &chars)
|
parts = append(parts, &chars)
|
||||||
} else {
|
} else {
|
||||||
if idx < 0 {
|
if idx < 0 {
|
||||||
@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token {
|
|||||||
var merged util.Chars
|
var merged util.Chars
|
||||||
switch len(parts) {
|
switch len(parts) {
|
||||||
case 0:
|
case 0:
|
||||||
merged = util.RunesToChars([]rune{})
|
merged = util.ToChars([]byte{})
|
||||||
case 1:
|
case 1:
|
||||||
merged = *parts[0]
|
merged = *parts[0]
|
||||||
default:
|
default:
|
||||||
runes := []rune{}
|
var output bytes.Buffer
|
||||||
for _, part := range parts {
|
for _, part := range parts {
|
||||||
runes = append(runes, part.ToRunes()...)
|
output.WriteString(part.ToString())
|
||||||
}
|
}
|
||||||
merged = util.RunesToChars(runes)
|
merged = util.ToChars([]byte(output.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
var prefixLength int32
|
var prefixLength int32
|
||||||
|
@ -2,8 +2,6 @@ package fzf
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/junegunn/fzf/src/util"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseRange(t *testing.T) {
|
func TestParseRange(t *testing.T) {
|
||||||
@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) {
|
|||||||
func TestTokenize(t *testing.T) {
|
func TestTokenize(t *testing.T) {
|
||||||
// AWK-style
|
// AWK-style
|
||||||
input := " abc: def: ghi "
|
input := " abc: def: ghi "
|
||||||
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
|
tokens := Tokenize(input, Delimiter{})
|
||||||
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
|
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
|
||||||
t.Errorf("%s", tokens)
|
t.Errorf("%s", tokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
// With delimiter
|
// With delimiter
|
||||||
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
|
tokens = Tokenize(input, delimiterRegexp(":"))
|
||||||
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
|
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
|
||||||
t.Errorf("%s", tokens)
|
t.Error(tokens[0].text.ToString(), tokens[0].prefixLength)
|
||||||
}
|
}
|
||||||
|
|
||||||
// With delimiter regex
|
// With delimiter regex
|
||||||
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
|
tokens = Tokenize(input, delimiterRegexp("\\s+"))
|
||||||
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
|
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
|
||||||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
|
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
|
||||||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
|
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
|
||||||
@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) {
|
|||||||
func TestTransform(t *testing.T) {
|
func TestTransform(t *testing.T) {
|
||||||
input := " abc: def: ghi: jkl"
|
input := " abc: def: ghi: jkl"
|
||||||
{
|
{
|
||||||
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
|
tokens := Tokenize(input, Delimiter{})
|
||||||
{
|
{
|
||||||
ranges := splitNth("1,2,3")
|
ranges := splitNth("1,2,3")
|
||||||
tx := Transform(tokens, ranges)
|
tx := Transform(tokens, ranges)
|
||||||
@ -93,7 +91,7 @@ func TestTransform(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
|
tokens := Tokenize(input, delimiterRegexp(":"))
|
||||||
{
|
{
|
||||||
ranges := splitNth("1..2,3,2..,1")
|
ranges := splitNth("1..2,3,2..,1")
|
||||||
tx := Transform(tokens, ranges)
|
tx := Transform(tokens, ranges)
|
||||||
|
@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) {
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (chars *Chars) Slice(b int, e int) Chars {
|
|
||||||
if runes := chars.optionalRunes(); runes != nil {
|
|
||||||
return RunesToChars(runes[b:e])
|
|
||||||
}
|
|
||||||
return Chars{slice: chars.slice[b:e], inBytes: true}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (chars *Chars) Split(delimiter string) []Chars {
|
|
||||||
delim := []rune(delimiter)
|
|
||||||
numChars := chars.Length()
|
|
||||||
numDelim := len(delim)
|
|
||||||
begin := 0
|
|
||||||
ret := make([]Chars, 0, 1)
|
|
||||||
|
|
||||||
for index := 0; index < numChars; {
|
|
||||||
if index+numDelim <= numChars {
|
|
||||||
match := true
|
|
||||||
for off, d := range delim {
|
|
||||||
if chars.Get(index+off) != d {
|
|
||||||
match = false
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Found the delimiter
|
|
||||||
if match {
|
|
||||||
incr := Max(numDelim, 1)
|
|
||||||
ret = append(ret, chars.Slice(begin, index+incr))
|
|
||||||
index += incr
|
|
||||||
begin = index
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Impossible to find the delimiter in the remaining substring
|
|
||||||
break
|
|
||||||
}
|
|
||||||
index++
|
|
||||||
}
|
|
||||||
if begin < numChars || len(ret) == 0 {
|
|
||||||
ret = append(ret, chars.Slice(begin, numChars))
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) {
|
|||||||
check(" h o ", 5)
|
check(" h o ", 5)
|
||||||
check(" ", 0)
|
check(" ", 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSplit(t *testing.T) {
|
|
||||||
check := func(str string, delim string, tokens ...string) {
|
|
||||||
input := ToChars([]byte(str))
|
|
||||||
result := input.Split(delim)
|
|
||||||
if len(result) != len(tokens) {
|
|
||||||
t.Errorf(
|
|
||||||
"Invalid Split result for '%s': %d tokens found (expected %d): %s",
|
|
||||||
str, len(result), len(tokens), result)
|
|
||||||
}
|
|
||||||
for idx, token := range tokens {
|
|
||||||
if result[idx].ToString() != token {
|
|
||||||
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
|
|
||||||
str, result[idx].ToString(), token)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
check("abc:def::", ":", "abc:", "def:", ":")
|
|
||||||
check("abc:def::", "-", "abc:def::")
|
|
||||||
check("abc", "", "a", "b", "c")
|
|
||||||
check("abc", "a", "a", "bc")
|
|
||||||
check("abc", "ab", "ab", "c")
|
|
||||||
check("abc", "abc", "abc")
|
|
||||||
check("abc", "abcd", "abc")
|
|
||||||
check("", "abcd", "")
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user