Performance tuning - eager rune array conversion

> wc -l /tmp/list2
     2594098 /tmp/list2

    > time cat /tmp/list2 | fzf-0.10.1-darwin_amd64 -fqwerty > /dev/null

    real    0m5.418s
    user    0m10.990s
    sys     0m1.302s

    > time cat /tmp/list2 | fzf-head -fqwerty > /dev/null

    real    0m4.862s
    user    0m6.619s
    sys     0m0.982s
This commit is contained in:
Junegunn Choi 2015-08-02 14:00:18 +09:00
parent 634670e3ea
commit 0ea66329b8
18 changed files with 163 additions and 154 deletions

View File

@ -16,7 +16,7 @@ import (
*/
// FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
func FuzzyMatch(caseSensitive bool, runes []rune, pattern []rune) (int, int) {
if len(pattern) == 0 {
return 0, 0
}
@ -34,7 +34,7 @@ func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
sidx := -1
eidx := -1
for index, char := range *runes {
for index, char := range runes {
// This is considerably faster than blindly applying strings.ToLower to the
// whole string
if !caseSensitive {
@ -61,7 +61,7 @@ func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
if sidx >= 0 && eidx >= 0 {
pidx--
for index := eidx - 1; index >= sidx; index-- {
char := (*runes)[index]
char := runes[index]
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
@ -88,12 +88,12 @@ func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
func ExactMatchNaive(caseSensitive bool, runes []rune, pattern []rune) (int, int) {
if len(pattern) == 0 {
return 0, 0
}
numRunes := len(*runes)
numRunes := len(runes)
plen := len(pattern)
if numRunes < plen {
return -1, -1
@ -101,7 +101,7 @@ func ExactMatchNaive(caseSensitive bool, runes *[]rune, pattern []rune) (int, in
pidx := 0
for index := 0; index < numRunes; index++ {
char := (*runes)[index]
char := runes[index]
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
@ -123,13 +123,13 @@ func ExactMatchNaive(caseSensitive bool, runes *[]rune, pattern []rune) (int, in
}
// PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
if len(*runes) < len(pattern) {
func PrefixMatch(caseSensitive bool, runes []rune, pattern []rune) (int, int) {
if len(runes) < len(pattern) {
return -1, -1
}
for index, r := range pattern {
char := (*runes)[index]
char := runes[index]
if !caseSensitive {
char = unicode.ToLower(char)
}
@ -141,7 +141,7 @@ func PrefixMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
}
// SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, input *[]rune, pattern []rune) (int, int) {
func SuffixMatch(caseSensitive bool, input []rune, pattern []rune) (int, int) {
runes := util.TrimRight(input)
trimmedLen := len(runes)
diff := trimmedLen - len(pattern)
@ -161,11 +161,11 @@ func SuffixMatch(caseSensitive bool, input *[]rune, pattern []rune) (int, int) {
return trimmedLen - len(pattern), trimmedLen
}
func EqualMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
if len(*runes) != len(pattern) {
func EqualMatch(caseSensitive bool, runes []rune, pattern []rune) (int, int) {
if len(runes) != len(pattern) {
return -1, -1
}
runesStr := string(*runes)
runesStr := string(runes)
if !caseSensitive {
runesStr = strings.ToLower(runesStr)
}

View File

@ -5,12 +5,11 @@ import (
"testing"
)
func assertMatch(t *testing.T, fun func(bool, *[]rune, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
func assertMatch(t *testing.T, fun func(bool, []rune, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
if !caseSensitive {
pattern = strings.ToLower(pattern)
}
runes := []rune(input)
s, e := fun(caseSensitive, &runes, []rune(pattern))
s, e := fun(caseSensitive, []rune(input), []rune(pattern))
if s != sidx {
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern)
}

View File

@ -36,7 +36,7 @@ func init() {
ansiRegex = regexp.MustCompile("\x1b\\[[0-9;]*[mK]")
}
func extractColor(str *string, state *ansiState) (*string, []ansiOffset, *ansiState) {
func extractColor(str string, state *ansiState) (string, []ansiOffset, *ansiState) {
var offsets []ansiOffset
var output bytes.Buffer
@ -45,9 +45,9 @@ func extractColor(str *string, state *ansiState) (*string, []ansiOffset, *ansiSt
}
idx := 0
for _, offset := range ansiRegex.FindAllStringIndex(*str, -1) {
output.WriteString((*str)[idx:offset[0]])
newState := interpretCode((*str)[offset[0]:offset[1]], state)
for _, offset := range ansiRegex.FindAllStringIndex(str, -1) {
output.WriteString(str[idx:offset[0]])
newState := interpretCode(str[offset[0]:offset[1]], state)
if !newState.equals(state) {
if state != nil {
@ -69,7 +69,7 @@ func extractColor(str *string, state *ansiState) (*string, []ansiOffset, *ansiSt
idx = offset[1]
}
rest := (*str)[idx:]
rest := str[idx:]
if len(rest) > 0 {
output.WriteString(rest)
if state != nil {
@ -77,8 +77,7 @@ func extractColor(str *string, state *ansiState) (*string, []ansiOffset, *ansiSt
(&offsets[len(offsets)-1]).offset[1] = int32(utf8.RuneCount(output.Bytes()))
}
}
outputStr := output.String()
return &outputStr, offsets, state
return output.String(), offsets, state
}
func interpretCode(ansiCode string, prevState *ansiState) *ansiState {

View File

@ -17,9 +17,9 @@ func TestExtractColor(t *testing.T) {
var state *ansiState
clean := "\x1b[0m"
check := func(assertion func(ansiOffsets []ansiOffset, state *ansiState)) {
output, ansiOffsets, newState := extractColor(&src, state)
output, ansiOffsets, newState := extractColor(src, state)
state = newState
if *output != "hello world" {
if output != "hello world" {
t.Errorf("Invalid output: {}", output)
}
fmt.Println(src, ansiOffsets, clean)

View File

@ -7,7 +7,7 @@ type Chunk []*Item // >>> []Item
// ItemBuilder is a closure type that builds Item object from a pointer to a
// string and an integer
type ItemBuilder func(*string, int) *Item
type ItemBuilder func([]rune, int) *Item
// ChunkList is a list of Chunks
type ChunkList struct {
@ -26,7 +26,7 @@ func NewChunkList(trans ItemBuilder) *ChunkList {
trans: trans}
}
func (c *Chunk) push(trans ItemBuilder, data *string, index int) bool {
func (c *Chunk) push(trans ItemBuilder, data []rune, index int) bool {
item := trans(data, index)
if item != nil {
*c = append(*c, item)
@ -53,7 +53,7 @@ func CountItems(cs []*Chunk) int {
}
// Push adds the item to the list
func (cl *ChunkList) Push(data string) bool {
func (cl *ChunkList) Push(data []rune) bool {
cl.mutex.Lock()
defer cl.mutex.Unlock()
@ -62,7 +62,7 @@ func (cl *ChunkList) Push(data string) bool {
cl.chunks = append(cl.chunks, &newChunk)
}
if cl.lastChunk().push(cl.trans, &data, cl.count) {
if cl.lastChunk().push(cl.trans, data, cl.count) {
cl.count++
return true
}

View File

@ -6,7 +6,7 @@ import (
)
func TestChunkList(t *testing.T) {
cl := NewChunkList(func(s *string, i int) *Item {
cl := NewChunkList(func(s []rune, i int) *Item {
return &Item{text: s, rank: Rank{0, 0, uint32(i * 2)}}
})
@ -17,8 +17,8 @@ func TestChunkList(t *testing.T) {
}
// Add some data
cl.Push("hello")
cl.Push("world")
cl.Push([]rune("hello"))
cl.Push([]rune("world"))
// Previously created snapshot should remain the same
if len(snapshot) > 0 {
@ -36,8 +36,8 @@ func TestChunkList(t *testing.T) {
if len(*chunk1) != 2 {
t.Error("Snapshot should contain only two items")
}
if *(*chunk1)[0].text != "hello" || (*chunk1)[0].rank.index != 0 ||
*(*chunk1)[1].text != "world" || (*chunk1)[1].rank.index != 2 {
if string((*chunk1)[0].text) != "hello" || (*chunk1)[0].rank.index != 0 ||
string((*chunk1)[1].text) != "world" || (*chunk1)[1].rank.index != 2 {
t.Error("Invalid data")
}
if chunk1.IsFull() {
@ -46,7 +46,7 @@ func TestChunkList(t *testing.T) {
// Add more data
for i := 0; i < chunkSize*2; i++ {
cl.Push(fmt.Sprintf("item %d", i))
cl.Push([]rune(fmt.Sprintf("item %d", i)))
}
// Previous snapshot should remain the same
@ -64,8 +64,8 @@ func TestChunkList(t *testing.T) {
t.Error("Unexpected number of items")
}
cl.Push("hello")
cl.Push("world")
cl.Push([]rune("hello"))
cl.Push([]rune("world"))
lastChunkCount := len(*snapshot[len(snapshot)-1])
if lastChunkCount != 2 {

View File

@ -63,24 +63,24 @@ func Run(opts *Options) {
eventBox := util.NewEventBox()
// ANSI code processor
ansiProcessor := func(data *string) (*string, []ansiOffset) {
ansiProcessor := func(runes []rune) ([]rune, []ansiOffset) {
// By default, we do nothing
return data, nil
return runes, nil
}
if opts.Ansi {
if opts.Theme != nil {
var state *ansiState
ansiProcessor = func(data *string) (*string, []ansiOffset) {
trimmed, offsets, newState := extractColor(data, state)
ansiProcessor = func(runes []rune) ([]rune, []ansiOffset) {
trimmed, offsets, newState := extractColor(string(runes), state)
state = newState
return trimmed, offsets
return []rune(trimmed), offsets
}
} else {
// When color is disabled but ansi option is given,
// we simply strip out ANSI codes from the input
ansiProcessor = func(data *string) (*string, []ansiOffset) {
trimmed, _, _ := extractColor(data, nil)
return trimmed, nil
ansiProcessor = func(runes []rune) ([]rune, []ansiOffset) {
trimmed, _, _ := extractColor(string(runes), nil)
return []rune(trimmed), nil
}
}
}
@ -89,9 +89,9 @@ func Run(opts *Options) {
var chunkList *ChunkList
header := make([]string, 0, opts.HeaderLines)
if len(opts.WithNth) == 0 {
chunkList = NewChunkList(func(data *string, index int) *Item {
chunkList = NewChunkList(func(data []rune, index int) *Item {
if len(header) < opts.HeaderLines {
header = append(header, *data)
header = append(header, string(data))
eventBox.Set(EvtHeader, header)
return nil
}
@ -103,17 +103,17 @@ func Run(opts *Options) {
rank: Rank{0, 0, uint32(index)}}
})
} else {
chunkList = NewChunkList(func(data *string, index int) *Item {
chunkList = NewChunkList(func(data []rune, index int) *Item {
tokens := Tokenize(data, opts.Delimiter)
trans := Transform(tokens, opts.WithNth)
if len(header) < opts.HeaderLines {
header = append(header, *joinTokens(trans))
header = append(header, string(joinTokens(trans)))
eventBox.Set(EvtHeader, header)
return nil
}
item := Item{
text: joinTokens(trans),
origText: data,
origText: &data,
index: uint32(index),
colors: nil,
rank: Rank{0, 0, uint32(index)}}
@ -128,8 +128,8 @@ func Run(opts *Options) {
// Reader
streamingFilter := opts.Filter != nil && !sort && !opts.Tac && !opts.Sync
if !streamingFilter {
reader := Reader{func(str string) bool {
return chunkList.Push(str)
reader := Reader{func(data []rune) bool {
return chunkList.Push(data)
}, eventBox, opts.ReadZero}
go reader.ReadSource()
}
@ -151,10 +151,10 @@ func Run(opts *Options) {
if streamingFilter {
reader := Reader{
func(str string) bool {
item := chunkList.trans(&str, 0)
func(runes []rune) bool {
item := chunkList.trans(runes, 0)
if item != nil && pattern.MatchItem(item) {
fmt.Println(*item.text)
fmt.Println(string(item.text))
}
return false
}, eventBox, opts.ReadZero}

View File

@ -17,9 +17,9 @@ type colorOffset struct {
// Item represents each input line
type Item struct {
text *string
origText *string
transformed *[]Token
text []rune
origText *[]rune
transformed []Token
index uint32
offsets []Offset
colors []ansiOffset
@ -66,19 +66,19 @@ func (i *Item) Rank(cache bool) Rank {
// It is guaranteed that .transformed in not null in normal execution
if i.transformed != nil {
lenSum := 0
for _, token := range *i.transformed {
lenSum += len(*token.text)
for _, token := range i.transformed {
lenSum += len(token.text)
}
tiebreak = uint16(lenSum)
} else {
tiebreak = uint16(len(*i.text))
tiebreak = uint16(len(i.text))
}
case byBegin:
// We can't just look at i.offsets[0][0] because it can be an inverse term
tiebreak = uint16(minBegin)
case byEnd:
if prevEnd > 0 {
tiebreak = uint16(1 + len(*i.text) - prevEnd)
tiebreak = uint16(1 + len(i.text) - prevEnd)
} else {
// Empty offsets due to inverse terms.
tiebreak = 1
@ -100,10 +100,12 @@ func (i *Item) AsString() string {
// StringPtr returns the pointer to the original string
func (i *Item) StringPtr() *string {
runes := i.text
if i.origText != nil {
return i.origText
runes = *i.origText
}
return i.text
str := string(runes)
return &str
}
func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset {

View File

@ -39,14 +39,14 @@ func TestRankComparison(t *testing.T) {
// Match length, string length, index
func TestItemRank(t *testing.T) {
strs := []string{"foo", "foobar", "bar", "baz"}
item1 := Item{text: &strs[0], index: 1, offsets: []Offset{}}
strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
item1 := Item{text: strs[0], index: 1, offsets: []Offset{}}
rank1 := item1.Rank(true)
if rank1.matchlen != 0 || rank1.tiebreak != 3 || rank1.index != 1 {
t.Error(item1.Rank(true))
}
// Only differ in index
item2 := Item{text: &strs[0], index: 0, offsets: []Offset{}}
item2 := Item{text: strs[0], index: 0, offsets: []Offset{}}
items := []*Item{&item1, &item2}
sort.Sort(ByRelevance(items))
@ -62,10 +62,10 @@ func TestItemRank(t *testing.T) {
}
// Sort by relevance
item3 := Item{text: &strs[1], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item4 := Item{text: &strs[1], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item5 := Item{text: &strs[2], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item6 := Item{text: &strs[2], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item3 := Item{text: strs[1], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item4 := Item{text: strs[1], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item5 := Item{text: strs[2], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item6 := Item{text: strs[2], rank: Rank{0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
items = []*Item{&item1, &item2, &item3, &item4, &item5, &item6}
sort.Sort(ByRelevance(items))
if items[0] != &item2 || items[1] != &item1 ||

View File

@ -22,7 +22,7 @@ func randItem() *Item {
offsets[idx] = Offset{sidx, eidx}
}
return &Item{
text: &str,
text: []rune(str),
index: rand.Uint32(),
offsets: offsets}
}

View File

@ -44,7 +44,7 @@ type Pattern struct {
hasInvTerm bool
delimiter *regexp.Regexp
nth []Range
procFun map[termType]func(bool, *[]rune, []rune) (int, int)
procFun map[termType]func(bool, []rune, []rune) (int, int)
}
var (
@ -114,7 +114,7 @@ func BuildPattern(mode Mode, caseMode Case,
hasInvTerm: hasInvTerm,
nth: nth,
delimiter: delimiter,
procFun: make(map[termType]func(bool, *[]rune, []rune) (int, int))}
procFun: make(map[termType]func(bool, []rune, []rune) (int, int))}
ptr.procFun[termFuzzy] = algo.FuzzyMatch
ptr.procFun[termEqual] = algo.EqualMatch
@ -305,27 +305,25 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
return offsets
}
func (p *Pattern) prepareInput(item *Item) *[]Token {
func (p *Pattern) prepareInput(item *Item) []Token {
if item.transformed != nil {
return item.transformed
}
var ret *[]Token
var ret []Token
if len(p.nth) > 0 {
tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth)
} else {
runes := []rune(*item.text)
trans := []Token{Token{text: &runes, prefixLength: 0}}
ret = &trans
ret = []Token{Token{text: item.text, prefixLength: 0}}
}
item.transformed = ret
return ret
}
func (p *Pattern) iter(pfun func(bool, *[]rune, []rune) (int, int),
tokens *[]Token, caseSensitive bool, pattern []rune) (int, int) {
for _, part := range *tokens {
func (p *Pattern) iter(pfun func(bool, []rune, []rune) (int, int),
tokens []Token, caseSensitive bool, pattern []rune) (int, int) {
for _, part := range tokens {
prefixLength := part.prefixLength
if sidx, eidx := pfun(caseSensitive, part.text, pattern); sidx >= 0 {
return sidx + prefixLength, eidx + prefixLength

View File

@ -1,6 +1,7 @@
package fzf
import (
"reflect"
"testing"
"github.com/junegunn/fzf/src/algo"
@ -59,8 +60,8 @@ func TestExact(t *testing.T) {
clearPatternCache()
pattern := BuildPattern(ModeExtended, CaseSmart,
[]Range{}, nil, []rune("'abc"))
runes := []rune("aabbcc abc")
sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &runes, pattern.terms[0].text)
sidx, eidx := algo.ExactMatchNaive(
pattern.caseSensitive, []rune("aabbcc abc"), pattern.terms[0].text)
if sidx != 7 || eidx != 10 {
t.Errorf("%s / %d / %d", pattern.terms, sidx, eidx)
}
@ -72,8 +73,8 @@ func TestEqual(t *testing.T) {
pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, nil, []rune("^AbC$"))
match := func(str string, sidxExpected int, eidxExpected int) {
runes := []rune(str)
sidx, eidx := algo.EqualMatch(pattern.caseSensitive, &runes, pattern.terms[0].text)
sidx, eidx := algo.EqualMatch(
pattern.caseSensitive, []rune(str), pattern.terms[0].text)
if sidx != sidxExpected || eidx != eidxExpected {
t.Errorf("%s / %d / %d", pattern.terms, sidx, eidx)
}
@ -108,25 +109,23 @@ func TestCaseSensitivity(t *testing.T) {
}
func TestOrigTextAndTransformed(t *testing.T) {
strptr := func(str string) *string {
return &str
}
pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, nil, []rune("jg"))
tokens := Tokenize(strptr("junegunn"), nil)
tokens := Tokenize([]rune("junegunn"), nil)
trans := Transform(tokens, []Range{Range{1, 1}})
origRunes := []rune("junegunn.choi")
for _, mode := range []Mode{ModeFuzzy, ModeExtended} {
chunk := Chunk{
&Item{
text: strptr("junegunn"),
origText: strptr("junegunn.choi"),
text: []rune("junegunn"),
origText: &origRunes,
transformed: trans},
}
pattern.mode = mode
matches := pattern.matchChunk(&chunk)
if *matches[0].text != "junegunn" || *matches[0].origText != "junegunn.choi" ||
if string(matches[0].text) != "junegunn" || string(*matches[0].origText) != "junegunn.choi" ||
matches[0].offsets[0][0] != 0 || matches[0].offsets[0][1] != 5 ||
matches[0].transformed != trans {
!reflect.DeepEqual(matches[0].transformed, trans) {
t.Error("Invalid match result", matches)
}
}

View File

@ -5,13 +5,14 @@ import (
"io"
"os"
"os/exec"
"unicode/utf8"
"github.com/junegunn/fzf/src/util"
)
// Reader reads from command or standard input
type Reader struct {
pusher func(string) bool
pusher func([]rune) bool
eventBox *util.EventBox
delimNil bool
}
@ -37,13 +38,25 @@ func (r *Reader) feed(src io.Reader) {
}
reader := bufio.NewReader(src)
for {
line, err := reader.ReadString(delim)
if line != "" {
// "ReadString returns err != nil if and only if the returned data does not end in delim."
if err == nil {
line = line[:len(line)-1]
// ReadBytes returns err != nil if and only if the returned data does not
// end in delim.
bytea, err := reader.ReadBytes(delim)
if len(bytea) > 0 {
runes := make([]rune, 0, len(bytea))
for i := 0; i < len(bytea); {
if bytea[i] < utf8.RuneSelf {
runes = append(runes, rune(bytea[i]))
i++
} else {
r, sz := utf8.DecodeRune(bytea[i:])
i += sz
runes = append(runes, r)
}
if r.pusher(line) {
}
if err == nil {
runes = runes[:len(runes)-1]
}
if r.pusher(runes) {
r.eventBox.Set(EvtReadNew, nil)
}
}

View File

@ -10,7 +10,7 @@ func TestReadFromCommand(t *testing.T) {
strs := []string{}
eb := util.NewEventBox()
reader := Reader{
pusher: func(s string) bool { strs = append(strs, s); return true },
pusher: func(s []rune) bool { strs = append(strs, string(s)); return true },
eventBox: eb}
// Check EventBox

View File

@ -441,10 +441,10 @@ func (t *Terminal) printHeader() {
if line >= max {
continue
}
trimmed, colors, newState := extractColor(&lineStr, state)
trimmed, colors, newState := extractColor(lineStr, state)
state = newState
item := &Item{
text: trimmed,
text: []rune(trimmed),
index: 0,
colors: colors,
rank: Rank{0, 0, 0}}
@ -537,7 +537,7 @@ func (t *Terminal) printHighlighted(item *Item, bold bool, col1 int, col2 int, c
}
// Overflow
text := []rune(*item.text)
text := item.text
offsets := item.colorOffsets(col2, bold, current)
maxWidth := C.MaxX() - 3 - t.marginInt[1] - t.marginInt[3]
fullWidth := displayWidth(text)

View File

@ -18,7 +18,7 @@ type Range struct {
// Token contains the tokenized part of the strings and its prefix length
type Token struct {
text *[]rune
text []rune
prefixLength int
}
@ -75,8 +75,7 @@ func withPrefixLengths(tokens []string, begin int) []Token {
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
runes := []rune(token)
ret[idx] = Token{text: &runes, prefixLength: prefixLength}
ret[idx] = Token{text: []rune(token), prefixLength: prefixLength}
prefixLength += len([]rune(token))
}
return ret
@ -88,13 +87,13 @@ const (
awkWhite
)
func awkTokenizer(input *string) ([]string, int) {
func awkTokenizer(input []rune) ([]string, int) {
// 9, 32
ret := []string{}
str := []rune{}
prefixLength := 0
state := awkNil
for _, r := range []rune(*input) {
for _, r := range input {
white := r == 9 || r == 32
switch state {
case awkNil:
@ -126,34 +125,34 @@ func awkTokenizer(input *string) ([]string, int) {
}
// Tokenize tokenizes the given string with the delimiter
func Tokenize(str *string, delimiter *regexp.Regexp) []Token {
func Tokenize(runes []rune, delimiter *regexp.Regexp) []Token {
if delimiter == nil {
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(str)
tokens, prefixLength := awkTokenizer(runes)
return withPrefixLengths(tokens, prefixLength)
}
tokens := delimiter.FindAllString(*str, -1)
tokens := delimiter.FindAllString(string(runes), -1)
return withPrefixLengths(tokens, 0)
}
func joinTokens(tokens *[]Token) *string {
ret := ""
for _, token := range *tokens {
ret += string(*token.text)
func joinTokens(tokens []Token) []rune {
ret := []rune{}
for _, token := range tokens {
ret = append(ret, token.text...)
}
return &ret
return ret
}
func joinTokensAsRunes(tokens *[]Token) *[]rune {
func joinTokensAsRunes(tokens []Token) []rune {
ret := []rune{}
for _, token := range *tokens {
ret = append(ret, *token.text...)
for _, token := range tokens {
ret = append(ret, token.text...)
}
return &ret
return ret
}
// Transform is used to transform the input when --with-nth option is given
func Transform(tokens []Token, withNth []Range) *[]Token {
func Transform(tokens []Token, withNth []Range) []Token {
transTokens := make([]Token, len(withNth))
numTokens := len(tokens)
for idx, r := range withNth {
@ -162,14 +161,14 @@ func Transform(tokens []Token, withNth []Range) *[]Token {
if r.begin == r.end {
idx := r.begin
if idx == rangeEllipsis {
part = append(part, *joinTokensAsRunes(&tokens)...)
part = append(part, joinTokensAsRunes(tokens)...)
} else {
if idx < 0 {
idx += numTokens + 1
}
if idx >= 1 && idx <= numTokens {
minIdx = idx - 1
part = append(part, *tokens[idx-1].text...)
part = append(part, tokens[idx-1].text...)
}
}
} else {
@ -196,7 +195,7 @@ func Transform(tokens []Token, withNth []Range) *[]Token {
minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens {
part = append(part, *tokens[idx-1].text...)
part = append(part, tokens[idx-1].text...)
}
}
}
@ -206,7 +205,7 @@ func Transform(tokens []Token, withNth []Range) *[]Token {
} else {
prefixLength = 0
}
transTokens[idx] = Token{&part, prefixLength}
transTokens[idx] = Token{part, prefixLength}
}
return &transTokens
return transTokens
}

View File

@ -43,14 +43,14 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
tokens := Tokenize(&input, nil)
if string(*tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
tokens := Tokenize([]rune(input), nil)
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens)
}
// With delimiter
tokens = Tokenize(&input, delimiterRegexp(":"))
if string(*tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 {
tokens = Tokenize([]rune(input), delimiterRegexp(":"))
if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 {
t.Errorf("%s", tokens)
}
}
@ -58,39 +58,39 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl"
{
tokens := Tokenize(&input, nil)
tokens := Tokenize([]rune(input), nil)
{
ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges)
if *joinTokens(tx) != "abc: def: ghi: " {
t.Errorf("%s", *tx)
if string(joinTokens(tx)) != "abc: def: ghi: " {
t.Errorf("%s", tx)
}
}
{
ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges)
if *joinTokens(tx) != "abc: def: ghi: def: ghi: jklabc: " ||
len(*tx) != 4 ||
string(*(*tx)[0].text) != "abc: def: " || (*tx)[0].prefixLength != 2 ||
string(*(*tx)[1].text) != "ghi: " || (*tx)[1].prefixLength != 14 ||
string(*(*tx)[2].text) != "def: ghi: jkl" || (*tx)[2].prefixLength != 8 ||
string(*(*tx)[3].text) != "abc: " || (*tx)[3].prefixLength != 2 {
t.Errorf("%s", *tx)
if string(joinTokens(tx)) != "abc: def: ghi: def: ghi: jklabc: " ||
len(tx) != 4 ||
string(tx[0].text) != "abc: def: " || tx[0].prefixLength != 2 ||
string(tx[1].text) != "ghi: " || tx[1].prefixLength != 14 ||
string(tx[2].text) != "def: ghi: jkl" || tx[2].prefixLength != 8 ||
string(tx[3].text) != "abc: " || tx[3].prefixLength != 2 {
t.Errorf("%s", tx)
}
}
}
{
tokens := Tokenize(&input, delimiterRegexp(":"))
tokens := Tokenize([]rune(input), delimiterRegexp(":"))
{
ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges)
if *joinTokens(tx) != " abc: def: ghi: def: ghi: jkl abc:" ||
len(*tx) != 4 ||
string(*(*tx)[0].text) != " abc: def:" || (*tx)[0].prefixLength != 0 ||
string(*(*tx)[1].text) != " ghi:" || (*tx)[1].prefixLength != 12 ||
string(*(*tx)[2].text) != " def: ghi: jkl" || (*tx)[2].prefixLength != 6 ||
string(*(*tx)[3].text) != " abc:" || (*tx)[3].prefixLength != 0 {
t.Errorf("%s", *tx)
if string(joinTokens(tx)) != " abc: def: ghi: def: ghi: jkl abc:" ||
len(tx) != 4 ||
string(tx[0].text) != " abc: def:" || tx[0].prefixLength != 0 ||
string(tx[1].text) != " ghi:" || tx[1].prefixLength != 12 ||
string(tx[2].text) != " def: ghi: jkl" || tx[2].prefixLength != 6 ||
string(tx[3].text) != " abc:" || tx[3].prefixLength != 0 {
t.Errorf("%s", tx)
}
}
}

View File

@ -78,13 +78,13 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
}
func TrimRight(runes *[]rune) []rune {
func TrimRight(runes []rune) []rune {
var i int
for i = len(*runes) - 1; i >= 0; i-- {
char := (*runes)[i]
for i = len(runes) - 1; i >= 0; i-- {
char := runes[i]
if char != ' ' && char != '\t' {
break
}
}
return (*runes)[0 : i+1]
return runes[0 : i+1]
}