From 92a75c9563600a174e9ee8334853f99ed560492a Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Fri, 2 Oct 2015 18:40:20 +0900 Subject: [PATCH] Use trimmed length when --nth is used with --tiebreak=length This change improves sort ordering for aligned tabular input. Given the following input: apple juice 100 apple pie 200 fzf --nth=2 will now prefer the one with pie. Before this change fzf compared "juice " and "pie ", both of which have the same length. --- src/item.go | 19 ++++++++--------- src/pattern.go | 23 +++++++++++---------- src/tokenizer.go | 5 +++-- src/tokenizer_test.go | 12 +++++------ src/util/util.go | 26 ++++++++++++++++++++++++ src/util/util_test.go | 20 ++++++++++++++++++ test/test_go.rb | 47 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 124 insertions(+), 28 deletions(-) diff --git a/src/item.go b/src/item.go index 12ca3df..f2f105a 100644 --- a/src/item.go +++ b/src/item.go @@ -6,8 +6,8 @@ import ( "github.com/junegunn/fzf/src/curses" ) -// Offset holds two 32-bit integers denoting the offsets of a matched substring -type Offset [2]int32 +// Offset holds three 32-bit integers denoting the offsets of a matched substring +type Offset [3]int32 type colorOffset struct { offset [2]int32 @@ -43,10 +43,13 @@ func (item *Item) Rank(cache bool) Rank { } matchlen := 0 prevEnd := 0 + lenSum := 0 minBegin := math.MaxUint16 for _, offset := range item.offsets { begin := int(offset[0]) end := int(offset[1]) + trimLen := int(offset[2]) + lenSum += trimLen if prevEnd > begin { begin = prevEnd } @@ -65,10 +68,7 @@ func (item *Item) Rank(cache bool) Rank { case byLength: // It is guaranteed that .transformed in not null in normal execution if item.transformed != nil { - lenSum := 0 - for _, token := range item.transformed { - lenSum += len(token.text) - } + // If offsets is empty, lenSum will be 0, but we don't care tiebreak = uint16(lenSum) } else { tiebreak = uint16(len(item.text)) @@ -116,7 +116,8 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset if len(item.colors) == 0 { var offsets []colorOffset for _, off := range item.offsets { - offsets = append(offsets, colorOffset{offset: off, color: color, bold: bold}) + + offsets = append(offsets, colorOffset{offset: [2]int32{off[0], off[1]}, color: color, bold: bold}) } return offsets } @@ -160,7 +161,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset if curr != 0 && idx > start { if curr == -1 { offsets = append(offsets, colorOffset{ - offset: Offset{int32(start), int32(idx)}, color: color, bold: bold}) + offset: [2]int32{int32(start), int32(idx)}, color: color, bold: bold}) } else { ansi := item.colors[curr-1] fg := ansi.color.fg @@ -180,7 +181,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset } } offsets = append(offsets, colorOffset{ - offset: Offset{int32(start), int32(idx)}, + offset: [2]int32{int32(start), int32(idx)}, color: curses.PairFor(fg, bg), bold: ansi.color.bold || bold}) } diff --git a/src/pattern.go b/src/pattern.go index 5466b86..f5dd8a7 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/junegunn/fzf/src/algo" + "github.com/junegunn/fzf/src/util" ) // fuzzy @@ -251,9 +252,9 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item { matches := []*Item{} if p.mode == ModeFuzzy { for _, item := range *chunk { - if sidx, eidx := p.fuzzyMatch(item); sidx >= 0 { + if sidx, eidx, tlen := p.fuzzyMatch(item); sidx >= 0 { matches = append(matches, - dupItem(item, []Offset{Offset{int32(sidx), int32(eidx)}})) + dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}})) } } } else { @@ -269,7 +270,7 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item { // MatchItem returns true if the Item is a match func (p *Pattern) MatchItem(item *Item) bool { if p.mode == ModeFuzzy { - sidx, _ := p.fuzzyMatch(item) + sidx, _, _ := p.fuzzyMatch(item) return sidx >= 0 } offsets := p.extendedMatch(item) @@ -288,7 +289,7 @@ func dupItem(item *Item, offsets []Offset) *Item { rank: Rank{0, 0, item.index}} } -func (p *Pattern) fuzzyMatch(item *Item) (int, int) { +func (p *Pattern) fuzzyMatch(item *Item) (int, int, int) { input := p.prepareInput(item) return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text) } @@ -298,13 +299,13 @@ func (p *Pattern) extendedMatch(item *Item) []Offset { offsets := []Offset{} for _, term := range p.terms { pfun := p.procFun[term.typ] - if sidx, eidx := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 { + if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 { if term.inv { break } - offsets = append(offsets, Offset{int32(sidx), int32(eidx)}) + offsets = append(offsets, Offset{int32(sidx), int32(eidx), int32(tlen)}) } else if term.inv { - offsets = append(offsets, Offset{0, 0}) + offsets = append(offsets, Offset{0, 0, 0}) } } return offsets @@ -320,19 +321,19 @@ func (p *Pattern) prepareInput(item *Item) []Token { tokens := Tokenize(item.text, p.delimiter) ret = Transform(tokens, p.nth) } else { - ret = []Token{Token{text: item.text, prefixLength: 0}} + ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}} } item.transformed = ret return ret } func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int), - tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int) { + tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) { for _, part := range tokens { prefixLength := part.prefixLength if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 { - return sidx + prefixLength, eidx + prefixLength + return sidx + prefixLength, eidx + prefixLength, part.trimLength } } - return -1, -1 + return -1, -1, -1 // math.MaxUint16 } diff --git a/src/tokenizer.go b/src/tokenizer.go index a8d0400..4b89b38 100644 --- a/src/tokenizer.go +++ b/src/tokenizer.go @@ -20,6 +20,7 @@ type Range struct { type Token struct { text []rune prefixLength int + trimLength int } // Delimiter for tokenizing the input @@ -81,7 +82,7 @@ func withPrefixLengths(tokens [][]rune, begin int) []Token { for idx, token := range tokens { // Need to define a new local variable instead of the reused token to take // the pointer to it - ret[idx] = Token{text: token, prefixLength: prefixLength} + ret[idx] = Token{token, prefixLength, util.TrimLen(token)} prefixLength += len(token) } return ret @@ -233,7 +234,7 @@ func Transform(tokens []Token, withNth []Range) []Token { } else { prefixLength = 0 } - transTokens[idx] = Token{part, prefixLength} + transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)} } return transTokens } diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go index 0f95aa1..b092440 100644 --- a/src/tokenizer_test.go +++ b/src/tokenizer_test.go @@ -44,22 +44,22 @@ func TestTokenize(t *testing.T) { // AWK-style input := " abc: def: ghi " tokens := Tokenize([]rune(input), Delimiter{}) - if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 { + if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 { t.Errorf("%s", tokens) } // With delimiter tokens = Tokenize([]rune(input), delimiterRegexp(":")) - if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 { + if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 { t.Errorf("%s", tokens) } // With delimiter regex tokens = Tokenize([]rune(input), delimiterRegexp("\\s+")) - if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || - string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || - string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || - string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 { + if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 || + string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 || + string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 || + string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 { t.Errorf("%s", tokens) } } diff --git a/src/util/util.go b/src/util/util.go index aa5f227..e7e4f31 100644 --- a/src/util/util.go +++ b/src/util/util.go @@ -75,6 +75,7 @@ func IsTty() bool { return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 } +// TrimRight returns rune array with trailing white spaces cut off func TrimRight(runes []rune) []rune { var i int for i = len(runes) - 1; i >= 0; i-- { @@ -86,6 +87,7 @@ func TrimRight(runes []rune) []rune { return runes[0 : i+1] } +// BytesToRunes converts byte array into rune array func BytesToRunes(bytea []byte) []rune { runes := make([]rune, 0, len(bytea)) for i := 0; i < len(bytea); { @@ -100,3 +102,27 @@ func BytesToRunes(bytea []byte) []rune { } return runes } + +// TrimLen returns the length of trimmed rune array +func TrimLen(runes []rune) int { + var i int + for i = len(runes) - 1; i >= 0; i-- { + char := runes[i] + if char != ' ' && char != '\t' { + break + } + } + // Completely empty + if i < 0 { + return 0 + } + + var j int + for j = 0; j < len(runes); j++ { + char := runes[j] + if char != ' ' && char != '\t' { + break + } + } + return i - j + 1 +} diff --git a/src/util/util_test.go b/src/util/util_test.go index 06cfd4f..8aeaeac 100644 --- a/src/util/util_test.go +++ b/src/util/util_test.go @@ -20,3 +20,23 @@ func TestContrain(t *testing.T) { t.Error("Expected", 3) } } + +func TestTrimLen(t *testing.T) { + check := func(str string, exp int) { + trimmed := TrimLen([]rune(str)) + if trimmed != exp { + t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)", + str, trimmed, exp) + } + } + check("hello", 5) + check("hello ", 5) + check("hello ", 5) + check(" hello", 5) + check(" hello", 5) + check(" hello ", 5) + check(" hello ", 5) + check("h o", 5) + check(" h o ", 5) + check(" ", 0) +} diff --git a/test/test_go.rb b/test/test_go.rb index e76b520..77414ec 100644 --- a/test/test_go.rb +++ b/test/test_go.rb @@ -527,6 +527,53 @@ class TestGoFZF < TestBase assert_equal output, `cat #{tempname} | #{FZF} -fh -n2 -d:`.split($/) end + def test_tiebreak_length_with_nth_trim_length + input = [ + "apple juice bottle 1", + "apple ui bottle 2", + "app ice bottle 3", + "app ic bottle 4", + ] + writelines tempname, input + + # len(1) + output = [ + "app ice bottle 3", + "app ic bottle 4", + "apple juice bottle 1", + "apple ui bottle 2", + ] + assert_equal output, `cat #{tempname} | #{FZF} -fa -n1`.split($/) + + # len(1 ~ 2) + output = [ + "apple ui bottle 2", + "app ic bottle 4", + "apple juice bottle 1", + "app ice bottle 3", + ] + assert_equal output, `cat #{tempname} | #{FZF} -fai -n1..2`.split($/) + + # len(1) + len(2) + output = [ + "app ic bottle 4", + "app ice bottle 3", + "apple ui bottle 2", + "apple juice bottle 1", + ] + assert_equal output, `cat #{tempname} | #{FZF} -x -f"a i" -n1,2`.split($/) + + # len(2) + output = [ + "apple ui bottle 2", + "app ic bottle 4", + "app ice bottle 3", + "apple juice bottle 1", + ] + assert_equal output, `cat #{tempname} | #{FZF} -fi -n2`.split($/) + assert_equal output, `cat #{tempname} | #{FZF} -fi -n2,1..2`.split($/) + end + def test_tiebreak_end_backward_scan input = %w[ foobar-fb