Improvements in performance and memory usage
I profiled fzf and it turned out that it was spending significant amount of time repeatedly converting character arrays into Unicode codepoints. This commit greatly improves search performance after the initial scan by memoizing the converted results. This commit also addresses the problem of unbounded memory usage of fzf. fzf is a short-lived process that usually processes small input, so it was implemented to cache the intermediate results very aggressively with no notion of cache expiration/eviction. I still think a proper implementation of caching scheme is definitely an overkill. Instead this commit introduces limits to the maximum size (or minimum selectivity) of the intermediate results that can be cached.
This commit is contained in:
parent
288131ac5a
commit
2fe1e28220
@ -1,6 +1,14 @@
|
||||
CHANGELOG
|
||||
=========
|
||||
|
||||
0.9.10
|
||||
------
|
||||
|
||||
### Improvements
|
||||
|
||||
- Performance optimization
|
||||
- Less aggressive memoization to limit memory usage
|
||||
|
||||
0.9.9
|
||||
-----
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
package algo
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/junegunn/fzf/src/util"
|
||||
)
|
||||
|
||||
/*
|
||||
@ -14,13 +15,11 @@ import (
|
||||
*/
|
||||
|
||||
// FuzzyMatch performs fuzzy-match
|
||||
func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
|
||||
if len(pattern) == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
runes := []rune(*input)
|
||||
|
||||
// 0. (FIXME) How to find the shortest match?
|
||||
// a_____b__c__abc
|
||||
// ^^^^^^^^^^ ^^^
|
||||
@ -34,7 +33,7 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
sidx := -1
|
||||
eidx := -1
|
||||
|
||||
for index, char := range runes {
|
||||
for index, char := range *runes {
|
||||
// This is considerably faster than blindly applying strings.ToLower to the
|
||||
// whole string
|
||||
if !caseSensitive {
|
||||
@ -43,10 +42,10 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
// compiler as of now does not inline non-leaf functions.)
|
||||
if char >= 'A' && char <= 'Z' {
|
||||
char += 32
|
||||
runes[index] = char
|
||||
(*runes)[index] = char
|
||||
} else if char > unicode.MaxASCII {
|
||||
char = unicode.To(unicode.LowerCase, char)
|
||||
runes[index] = char
|
||||
(*runes)[index] = char
|
||||
}
|
||||
}
|
||||
if char == pattern[pidx] {
|
||||
@ -63,7 +62,7 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
if sidx >= 0 && eidx >= 0 {
|
||||
pidx--
|
||||
for index := eidx - 1; index >= sidx; index-- {
|
||||
char := runes[index]
|
||||
char := (*runes)[index]
|
||||
if char == pattern[pidx] {
|
||||
if pidx--; pidx < 0 {
|
||||
sidx = index
|
||||
@ -76,27 +75,6 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
// ExactMatchStrings performs exact-match using strings package.
|
||||
// Currently not used.
|
||||
func ExactMatchStrings(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
if len(pattern) == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
var str string
|
||||
if caseSensitive {
|
||||
str = *input
|
||||
} else {
|
||||
str = strings.ToLower(*input)
|
||||
}
|
||||
|
||||
if idx := strings.Index(str, string(pattern)); idx >= 0 {
|
||||
prefixRuneLen := len([]rune((*input)[:idx]))
|
||||
return prefixRuneLen, prefixRuneLen + len(pattern)
|
||||
}
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
// ExactMatchNaive is a basic string searching algorithm that handles case
|
||||
// sensitivity. Although naive, it still performs better than the combination
|
||||
// of strings.ToLower + strings.Index for typical fzf use cases where input
|
||||
@ -104,13 +82,12 @@ func ExactMatchStrings(caseSensitive bool, input *string, pattern []rune) (int,
|
||||
//
|
||||
// We might try to implement better algorithms in the future:
|
||||
// http://en.wikipedia.org/wiki/String_searching_algorithm
|
||||
func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
func ExactMatchNaive(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
|
||||
if len(pattern) == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
runes := []rune(*input)
|
||||
numRunes := len(runes)
|
||||
numRunes := len(*runes)
|
||||
plen := len(pattern)
|
||||
if numRunes < plen {
|
||||
return -1, -1
|
||||
@ -118,7 +95,7 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
|
||||
|
||||
pidx := 0
|
||||
for index := 0; index < numRunes; index++ {
|
||||
char := runes[index]
|
||||
char := (*runes)[index]
|
||||
if !caseSensitive {
|
||||
if char >= 'A' && char <= 'Z' {
|
||||
char += 32
|
||||
@ -140,14 +117,13 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
|
||||
}
|
||||
|
||||
// PrefixMatch performs prefix-match
|
||||
func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
runes := []rune(*input)
|
||||
if len(runes) < len(pattern) {
|
||||
func PrefixMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
|
||||
if len(*runes) < len(pattern) {
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
for index, r := range pattern {
|
||||
char := runes[index]
|
||||
char := (*runes)[index]
|
||||
if !caseSensitive {
|
||||
char = unicode.ToLower(char)
|
||||
}
|
||||
@ -159,8 +135,8 @@ func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
}
|
||||
|
||||
// SuffixMatch performs suffix-match
|
||||
func SuffixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
|
||||
runes := []rune(strings.TrimRight(*input, " "))
|
||||
func SuffixMatch(caseSensitive bool, input *[]rune, pattern []rune) (int, int) {
|
||||
runes := util.TrimRight(input)
|
||||
trimmedLen := len(runes)
|
||||
diff := trimmedLen - len(pattern)
|
||||
if diff < 0 {
|
||||
|
@ -5,11 +5,12 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func assertMatch(t *testing.T, fun func(bool, *string, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
|
||||
func assertMatch(t *testing.T, fun func(bool, *[]rune, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
|
||||
if !caseSensitive {
|
||||
pattern = strings.ToLower(pattern)
|
||||
}
|
||||
s, e := fun(caseSensitive, &input, []rune(pattern))
|
||||
runes := []rune(input)
|
||||
s, e := fun(caseSensitive, &runes, []rune(pattern))
|
||||
if s != sidx {
|
||||
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern)
|
||||
}
|
||||
@ -45,7 +46,6 @@ func TestSuffixMatch(t *testing.T) {
|
||||
|
||||
func TestEmptyPattern(t *testing.T) {
|
||||
assertMatch(t, FuzzyMatch, true, "foobar", "", 0, 0)
|
||||
assertMatch(t, ExactMatchStrings, true, "foobar", "", 0, 0)
|
||||
assertMatch(t, ExactMatchNaive, true, "foobar", "", 0, 0)
|
||||
assertMatch(t, PrefixMatch, true, "foobar", "", 0, 0)
|
||||
assertMatch(t, SuffixMatch, true, "foobar", "", 6, 6)
|
||||
|
12
src/cache.go
12
src/cache.go
@ -2,23 +2,23 @@ package fzf
|
||||
|
||||
import "sync"
|
||||
|
||||
// QueryCache associates strings to lists of items
|
||||
type QueryCache map[string][]*Item
|
||||
// queryCache associates strings to lists of items
|
||||
type queryCache map[string][]*Item
|
||||
|
||||
// ChunkCache associates Chunk and query string to lists of items
|
||||
type ChunkCache struct {
|
||||
mutex sync.Mutex
|
||||
cache map[*Chunk]*QueryCache
|
||||
cache map[*Chunk]*queryCache
|
||||
}
|
||||
|
||||
// NewChunkCache returns a new ChunkCache
|
||||
func NewChunkCache() ChunkCache {
|
||||
return ChunkCache{sync.Mutex{}, make(map[*Chunk]*QueryCache)}
|
||||
return ChunkCache{sync.Mutex{}, make(map[*Chunk]*queryCache)}
|
||||
}
|
||||
|
||||
// Add adds the list to the cache
|
||||
func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) {
|
||||
if len(key) == 0 || !chunk.IsFull() {
|
||||
if len(key) == 0 || !chunk.IsFull() || len(list) > queryCacheMax {
|
||||
return
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) {
|
||||
|
||||
qc, ok := cc.cache[chunk]
|
||||
if !ok {
|
||||
cc.cache[chunk] = &QueryCache{}
|
||||
cc.cache[chunk] = &queryCache{}
|
||||
qc = cc.cache[chunk]
|
||||
}
|
||||
(*qc)[key] = list
|
||||
|
@ -4,7 +4,7 @@ import "testing"
|
||||
|
||||
func TestChunkCache(t *testing.T) {
|
||||
cache := NewChunkCache()
|
||||
chunk2 := make(Chunk, ChunkSize)
|
||||
chunk2 := make(Chunk, chunkSize)
|
||||
chunk1p := &Chunk{}
|
||||
chunk2p := &chunk2
|
||||
items1 := []*Item{&Item{}}
|
||||
|
@ -2,10 +2,7 @@ package fzf
|
||||
|
||||
import "sync"
|
||||
|
||||
// Capacity of each chunk
|
||||
const ChunkSize int = 100
|
||||
|
||||
// Chunk is a list of Item pointers whose size has the upper limit of ChunkSize
|
||||
// Chunk is a list of Item pointers whose size has the upper limit of chunkSize
|
||||
type Chunk []*Item // >>> []Item
|
||||
|
||||
// ItemBuilder is a closure type that builds Item object from a pointer to a
|
||||
@ -35,7 +32,7 @@ func (c *Chunk) push(trans ItemBuilder, data *string, index int) {
|
||||
|
||||
// IsFull returns true if the Chunk is full
|
||||
func (c *Chunk) IsFull() bool {
|
||||
return len(*c) == ChunkSize
|
||||
return len(*c) == chunkSize
|
||||
}
|
||||
|
||||
func (cl *ChunkList) lastChunk() *Chunk {
|
||||
@ -47,7 +44,7 @@ func CountItems(cs []*Chunk) int {
|
||||
if len(cs) == 0 {
|
||||
return 0
|
||||
}
|
||||
return ChunkSize*(len(cs)-1) + len(*(cs[len(cs)-1]))
|
||||
return chunkSize*(len(cs)-1) + len(*(cs[len(cs)-1]))
|
||||
}
|
||||
|
||||
// Push adds the item to the list
|
||||
@ -56,7 +53,7 @@ func (cl *ChunkList) Push(data string) {
|
||||
defer cl.mutex.Unlock()
|
||||
|
||||
if len(cl.chunks) == 0 || cl.lastChunk().IsFull() {
|
||||
newChunk := Chunk(make([]*Item, 0, ChunkSize))
|
||||
newChunk := Chunk(make([]*Item, 0, chunkSize))
|
||||
cl.chunks = append(cl.chunks, &newChunk)
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ func TestChunkList(t *testing.T) {
|
||||
}
|
||||
|
||||
// Add more data
|
||||
for i := 0; i < ChunkSize*2; i++ {
|
||||
for i := 0; i < chunkSize*2; i++ {
|
||||
cl.Push(fmt.Sprintf("item %d", i))
|
||||
}
|
||||
|
||||
@ -57,7 +57,7 @@ func TestChunkList(t *testing.T) {
|
||||
// New snapshot
|
||||
snapshot, count = cl.Snapshot()
|
||||
if len(snapshot) != 3 || !snapshot[0].IsFull() ||
|
||||
!snapshot[1].IsFull() || snapshot[2].IsFull() || count != ChunkSize*2+2 {
|
||||
!snapshot[1].IsFull() || snapshot[2].IsFull() || count != chunkSize*2+2 {
|
||||
t.Error("Expected two full chunks and one more chunk")
|
||||
}
|
||||
if len(*snapshot[2]) != 2 {
|
||||
|
@ -1,11 +1,38 @@
|
||||
package fzf
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/junegunn/fzf/src/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// Current version
|
||||
const Version = "0.9.9"
|
||||
Version = "0.9.9"
|
||||
|
||||
// Core
|
||||
coordinatorDelayMax time.Duration = 100 * time.Millisecond
|
||||
coordinatorDelayStep time.Duration = 10 * time.Millisecond
|
||||
|
||||
// Reader
|
||||
defaultCommand = `find * -path '*/\.*' -prune -o -type f -print -o -type l -print 2> /dev/null`
|
||||
|
||||
// Terminal
|
||||
initialDelay = 100 * time.Millisecond
|
||||
spinnerDuration = 200 * time.Millisecond
|
||||
|
||||
// Matcher
|
||||
progressMinDuration = 200 * time.Millisecond
|
||||
|
||||
// Capacity of each chunk
|
||||
chunkSize int = 100
|
||||
|
||||
// Do not cache results of low selectivity queries
|
||||
queryCacheMax int = chunkSize / 5
|
||||
|
||||
// Not to cache mergers with large lists
|
||||
mergerCacheMax int = 100000
|
||||
)
|
||||
|
||||
// fzf events
|
||||
const (
|
||||
|
@ -34,9 +34,6 @@ import (
|
||||
"github.com/junegunn/fzf/src/util"
|
||||
)
|
||||
|
||||
const coordinatorDelayMax time.Duration = 100 * time.Millisecond
|
||||
const coordinatorDelayStep time.Duration = 10 * time.Millisecond
|
||||
|
||||
func initProcs() {
|
||||
runtime.GOMAXPROCS(runtime.NumCPU())
|
||||
}
|
||||
@ -99,8 +96,9 @@ func Run(options *Options) {
|
||||
} else {
|
||||
chunkList = NewChunkList(func(data *string, index int) *Item {
|
||||
tokens := Tokenize(data, opts.Delimiter)
|
||||
trans := Transform(tokens, opts.WithNth)
|
||||
item := Item{
|
||||
text: Transform(tokens, opts.WithNth).whole,
|
||||
text: joinTokens(trans),
|
||||
origText: data,
|
||||
index: uint32(index),
|
||||
colors: nil,
|
||||
|
@ -19,7 +19,7 @@ type colorOffset struct {
|
||||
type Item struct {
|
||||
text *string
|
||||
origText *string
|
||||
transformed *Transformed
|
||||
transformed *[]Token
|
||||
index uint32
|
||||
offsets []Offset
|
||||
colors []ansiOffset
|
||||
|
@ -34,10 +34,6 @@ const (
|
||||
reqReset
|
||||
)
|
||||
|
||||
const (
|
||||
progressMinDuration = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
// NewMatcher returns a new Matcher
|
||||
func NewMatcher(patternBuilder func([]rune) *Pattern,
|
||||
sort bool, tac bool, eventBox *util.EventBox) *Matcher {
|
||||
@ -100,7 +96,9 @@ func (m *Matcher) Loop() {
|
||||
}
|
||||
|
||||
if !cancelled {
|
||||
if merger.Cacheable() {
|
||||
m.mergerCache[patternString] = merger
|
||||
}
|
||||
merger.final = request.final
|
||||
m.eventBox.Set(EvtSearchFin, merger)
|
||||
}
|
||||
|
@ -61,8 +61,8 @@ func (mg *Merger) Get(idx int) *Item {
|
||||
if mg.tac {
|
||||
idx = mg.count - idx - 1
|
||||
}
|
||||
chunk := (*mg.chunks)[idx/ChunkSize]
|
||||
return (*chunk)[idx%ChunkSize]
|
||||
chunk := (*mg.chunks)[idx/chunkSize]
|
||||
return (*chunk)[idx%chunkSize]
|
||||
}
|
||||
|
||||
if mg.sorted {
|
||||
@ -82,6 +82,10 @@ func (mg *Merger) Get(idx int) *Item {
|
||||
panic(fmt.Sprintf("Index out of bounds (unsorted, %d/%d)", idx, mg.count))
|
||||
}
|
||||
|
||||
func (mg *Merger) Cacheable() bool {
|
||||
return mg.count < mergerCacheMax
|
||||
}
|
||||
|
||||
func (mg *Merger) mergedGet(idx int) *Item {
|
||||
for i := len(mg.merged); i <= idx; i++ {
|
||||
minRank := Rank{0, 0, 0}
|
||||
|
@ -43,7 +43,7 @@ type Pattern struct {
|
||||
hasInvTerm bool
|
||||
delimiter *regexp.Regexp
|
||||
nth []Range
|
||||
procFun map[termType]func(bool, *string, []rune) (int, int)
|
||||
procFun map[termType]func(bool, *[]rune, []rune) (int, int)
|
||||
}
|
||||
|
||||
var (
|
||||
@ -122,7 +122,7 @@ func BuildPattern(mode Mode, caseMode Case,
|
||||
hasInvTerm: hasInvTerm,
|
||||
nth: nth,
|
||||
delimiter: delimiter,
|
||||
procFun: make(map[termType]func(bool, *string, []rune) (int, int))}
|
||||
procFun: make(map[termType]func(bool, *[]rune, []rune) (int, int))}
|
||||
|
||||
ptr.procFun[termFuzzy] = algo.FuzzyMatch
|
||||
ptr.procFun[termExact] = algo.ExactMatchNaive
|
||||
@ -300,28 +300,27 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
|
||||
return offsets
|
||||
}
|
||||
|
||||
func (p *Pattern) prepareInput(item *Item) *Transformed {
|
||||
func (p *Pattern) prepareInput(item *Item) *[]Token {
|
||||
if item.transformed != nil {
|
||||
return item.transformed
|
||||
}
|
||||
|
||||
var ret *Transformed
|
||||
var ret *[]Token
|
||||
if len(p.nth) > 0 {
|
||||
tokens := Tokenize(item.text, p.delimiter)
|
||||
ret = Transform(tokens, p.nth)
|
||||
} else {
|
||||
trans := Transformed{
|
||||
whole: item.text,
|
||||
parts: []Token{Token{text: item.text, prefixLength: 0}}}
|
||||
runes := []rune(*item.text)
|
||||
trans := []Token{Token{text: &runes, prefixLength: 0}}
|
||||
ret = &trans
|
||||
}
|
||||
item.transformed = ret
|
||||
return ret
|
||||
}
|
||||
|
||||
func (p *Pattern) iter(pfun func(bool, *string, []rune) (int, int),
|
||||
inputs *Transformed, pattern []rune) (int, int) {
|
||||
for _, part := range inputs.parts {
|
||||
func (p *Pattern) iter(pfun func(bool, *[]rune, []rune) (int, int),
|
||||
tokens *[]Token, pattern []rune) (int, int) {
|
||||
for _, part := range *tokens {
|
||||
prefixLength := part.prefixLength
|
||||
if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 {
|
||||
return sidx + prefixLength, eidx + prefixLength
|
||||
|
@ -58,8 +58,8 @@ func TestExact(t *testing.T) {
|
||||
clearPatternCache()
|
||||
pattern := BuildPattern(ModeExtended, CaseSmart,
|
||||
[]Range{}, nil, []rune("'abc"))
|
||||
str := "aabbcc abc"
|
||||
sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &str, pattern.terms[0].text)
|
||||
runes := []rune("aabbcc abc")
|
||||
sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &runes, pattern.terms[0].text)
|
||||
if sidx != 7 || eidx != 10 {
|
||||
t.Errorf("%s / %d / %d", pattern.terms, sidx, eidx)
|
||||
}
|
||||
|
@ -9,8 +9,6 @@ import (
|
||||
"github.com/junegunn/fzf/src/util"
|
||||
)
|
||||
|
||||
const defaultCommand = `find * -path '*/\.*' -prune -o -type f -print -o -type l -print 2> /dev/null`
|
||||
|
||||
// Reader reads from command or standard input
|
||||
type Reader struct {
|
||||
pusher func(string)
|
||||
|
@ -38,7 +38,7 @@ type Terminal struct {
|
||||
progress int
|
||||
reading bool
|
||||
merger *Merger
|
||||
selected map[*string]selectedItem
|
||||
selected map[uint32]selectedItem
|
||||
reqBox *util.EventBox
|
||||
eventBox *util.EventBox
|
||||
mutex sync.Mutex
|
||||
@ -79,11 +79,6 @@ const (
|
||||
reqQuit
|
||||
)
|
||||
|
||||
const (
|
||||
initialDelay = 100 * time.Millisecond
|
||||
spinnerDuration = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
// NewTerminal returns new Terminal object
|
||||
func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal {
|
||||
input := []rune(opts.Query)
|
||||
@ -103,7 +98,7 @@ func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal {
|
||||
pressed: 0,
|
||||
printQuery: opts.PrintQuery,
|
||||
merger: EmptyMerger,
|
||||
selected: make(map[*string]selectedItem),
|
||||
selected: make(map[uint32]selectedItem),
|
||||
reqBox: util.NewEventBox(),
|
||||
eventBox: eventBox,
|
||||
mutex: sync.Mutex{},
|
||||
@ -273,7 +268,7 @@ func (t *Terminal) printList() {
|
||||
}
|
||||
|
||||
func (t *Terminal) printItem(item *Item, current bool) {
|
||||
_, selected := t.selected[item.text]
|
||||
_, selected := t.selected[item.index]
|
||||
if current {
|
||||
C.CPrint(C.ColCursor, true, ">")
|
||||
if selected {
|
||||
@ -565,16 +560,16 @@ func (t *Terminal) Loop() {
|
||||
toggle := func() {
|
||||
if t.cy < t.merger.Length() {
|
||||
item := t.merger.Get(t.cy)
|
||||
if _, found := t.selected[item.text]; !found {
|
||||
if _, found := t.selected[item.index]; !found {
|
||||
var strptr *string
|
||||
if item.origText != nil {
|
||||
strptr = item.origText
|
||||
} else {
|
||||
strptr = item.text
|
||||
}
|
||||
t.selected[item.text] = selectedItem{time.Now(), strptr}
|
||||
t.selected[item.index] = selectedItem{time.Now(), strptr}
|
||||
} else {
|
||||
delete(t.selected, item.text)
|
||||
delete(t.selected, item.index)
|
||||
}
|
||||
req(reqInfo)
|
||||
}
|
||||
|
@ -16,15 +16,9 @@ type Range struct {
|
||||
end int
|
||||
}
|
||||
|
||||
// Transformed holds the result of tokenization and transformation
|
||||
type Transformed struct {
|
||||
whole *string
|
||||
parts []Token
|
||||
}
|
||||
|
||||
// Token contains the tokenized part of the strings and its prefix length
|
||||
type Token struct {
|
||||
text *string
|
||||
text *[]rune
|
||||
prefixLength int
|
||||
}
|
||||
|
||||
@ -81,8 +75,8 @@ func withPrefixLengths(tokens []string, begin int) []Token {
|
||||
for idx, token := range tokens {
|
||||
// Need to define a new local variable instead of the reused token to take
|
||||
// the pointer to it
|
||||
str := token
|
||||
ret[idx] = Token{text: &str, prefixLength: prefixLength}
|
||||
runes := []rune(token)
|
||||
ret[idx] = Token{text: &runes, prefixLength: prefixLength}
|
||||
prefixLength += len([]rune(token))
|
||||
}
|
||||
return ret
|
||||
@ -142,33 +136,40 @@ func Tokenize(str *string, delimiter *regexp.Regexp) []Token {
|
||||
return withPrefixLengths(tokens, 0)
|
||||
}
|
||||
|
||||
func joinTokens(tokens []Token) string {
|
||||
func joinTokens(tokens *[]Token) *string {
|
||||
ret := ""
|
||||
for _, token := range tokens {
|
||||
ret += *token.text
|
||||
for _, token := range *tokens {
|
||||
ret += string(*token.text)
|
||||
}
|
||||
return ret
|
||||
return &ret
|
||||
}
|
||||
|
||||
func joinTokensAsRunes(tokens *[]Token) *[]rune {
|
||||
ret := []rune{}
|
||||
for _, token := range *tokens {
|
||||
ret = append(ret, *token.text...)
|
||||
}
|
||||
return &ret
|
||||
}
|
||||
|
||||
// Transform is used to transform the input when --with-nth option is given
|
||||
func Transform(tokens []Token, withNth []Range) *Transformed {
|
||||
func Transform(tokens []Token, withNth []Range) *[]Token {
|
||||
transTokens := make([]Token, len(withNth))
|
||||
numTokens := len(tokens)
|
||||
whole := ""
|
||||
for idx, r := range withNth {
|
||||
part := ""
|
||||
part := []rune{}
|
||||
minIdx := 0
|
||||
if r.begin == r.end {
|
||||
idx := r.begin
|
||||
if idx == rangeEllipsis {
|
||||
part += joinTokens(tokens)
|
||||
part = append(part, *joinTokensAsRunes(&tokens)...)
|
||||
} else {
|
||||
if idx < 0 {
|
||||
idx += numTokens + 1
|
||||
}
|
||||
if idx >= 1 && idx <= numTokens {
|
||||
minIdx = idx - 1
|
||||
part += *tokens[idx-1].text
|
||||
part = append(part, *tokens[idx-1].text...)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -195,11 +196,10 @@ func Transform(tokens []Token, withNth []Range) *Transformed {
|
||||
minIdx = util.Max(0, begin-1)
|
||||
for idx := begin; idx <= end; idx++ {
|
||||
if idx >= 1 && idx <= numTokens {
|
||||
part += *tokens[idx-1].text
|
||||
part = append(part, *tokens[idx-1].text...)
|
||||
}
|
||||
}
|
||||
}
|
||||
whole += part
|
||||
var prefixLength int
|
||||
if minIdx < numTokens {
|
||||
prefixLength = tokens[minIdx].prefixLength
|
||||
@ -208,7 +208,5 @@ func Transform(tokens []Token, withNth []Range) *Transformed {
|
||||
}
|
||||
transTokens[idx] = Token{&part, prefixLength}
|
||||
}
|
||||
return &Transformed{
|
||||
whole: &whole,
|
||||
parts: transTokens}
|
||||
return &transTokens
|
||||
}
|
||||
|
@ -44,13 +44,13 @@ func TestTokenize(t *testing.T) {
|
||||
// AWK-style
|
||||
input := " abc: def: ghi "
|
||||
tokens := Tokenize(&input, nil)
|
||||
if *tokens[0].text != "abc: " || tokens[0].prefixLength != 2 {
|
||||
if string(*tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
|
||||
t.Errorf("%s", tokens)
|
||||
}
|
||||
|
||||
// With delimiter
|
||||
tokens = Tokenize(&input, delimiterRegexp(":"))
|
||||
if *tokens[0].text != " abc:" || tokens[0].prefixLength != 0 {
|
||||
if string(*tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 {
|
||||
t.Errorf("%s", tokens)
|
||||
}
|
||||
}
|
||||
@ -62,19 +62,19 @@ func TestTransform(t *testing.T) {
|
||||
{
|
||||
ranges := splitNth("1,2,3")
|
||||
tx := Transform(tokens, ranges)
|
||||
if *tx.whole != "abc: def: ghi: " {
|
||||
if *joinTokens(tx) != "abc: def: ghi: " {
|
||||
t.Errorf("%s", *tx)
|
||||
}
|
||||
}
|
||||
{
|
||||
ranges := splitNth("1..2,3,2..,1")
|
||||
tx := Transform(tokens, ranges)
|
||||
if *tx.whole != "abc: def: ghi: def: ghi: jklabc: " ||
|
||||
len(tx.parts) != 4 ||
|
||||
*tx.parts[0].text != "abc: def: " || tx.parts[0].prefixLength != 2 ||
|
||||
*tx.parts[1].text != "ghi: " || tx.parts[1].prefixLength != 14 ||
|
||||
*tx.parts[2].text != "def: ghi: jkl" || tx.parts[2].prefixLength != 8 ||
|
||||
*tx.parts[3].text != "abc: " || tx.parts[3].prefixLength != 2 {
|
||||
if *joinTokens(tx) != "abc: def: ghi: def: ghi: jklabc: " ||
|
||||
len(*tx) != 4 ||
|
||||
string(*(*tx)[0].text) != "abc: def: " || (*tx)[0].prefixLength != 2 ||
|
||||
string(*(*tx)[1].text) != "ghi: " || (*tx)[1].prefixLength != 14 ||
|
||||
string(*(*tx)[2].text) != "def: ghi: jkl" || (*tx)[2].prefixLength != 8 ||
|
||||
string(*(*tx)[3].text) != "abc: " || (*tx)[3].prefixLength != 2 {
|
||||
t.Errorf("%s", *tx)
|
||||
}
|
||||
}
|
||||
@ -84,12 +84,12 @@ func TestTransform(t *testing.T) {
|
||||
{
|
||||
ranges := splitNth("1..2,3,2..,1")
|
||||
tx := Transform(tokens, ranges)
|
||||
if *tx.whole != " abc: def: ghi: def: ghi: jkl abc:" ||
|
||||
len(tx.parts) != 4 ||
|
||||
*tx.parts[0].text != " abc: def:" || tx.parts[0].prefixLength != 0 ||
|
||||
*tx.parts[1].text != " ghi:" || tx.parts[1].prefixLength != 12 ||
|
||||
*tx.parts[2].text != " def: ghi: jkl" || tx.parts[2].prefixLength != 6 ||
|
||||
*tx.parts[3].text != " abc:" || tx.parts[3].prefixLength != 0 {
|
||||
if *joinTokens(tx) != " abc: def: ghi: def: ghi: jkl abc:" ||
|
||||
len(*tx) != 4 ||
|
||||
string(*(*tx)[0].text) != " abc: def:" || (*tx)[0].prefixLength != 0 ||
|
||||
string(*(*tx)[1].text) != " ghi:" || (*tx)[1].prefixLength != 12 ||
|
||||
string(*(*tx)[2].text) != " def: ghi: jkl" || (*tx)[2].prefixLength != 6 ||
|
||||
string(*(*tx)[3].text) != " abc:" || (*tx)[3].prefixLength != 0 {
|
||||
t.Errorf("%s", *tx)
|
||||
}
|
||||
}
|
||||
|
@ -77,3 +77,14 @@ func Between(val int, min int, max int) bool {
|
||||
func IsTty() bool {
|
||||
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
|
||||
}
|
||||
|
||||
func TrimRight(runes *[]rune) []rune {
|
||||
var i int
|
||||
for i = len(*runes) - 1; i >= 0; i-- {
|
||||
char := (*runes)[i]
|
||||
if char != ' ' && char != '\t' {
|
||||
break
|
||||
}
|
||||
}
|
||||
return (*runes)[0 : i+1]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user