From 2e3dc75425d23b4d9e8e05a901395914cf8d3120 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Fri, 2 Jun 2017 13:25:35 +0900 Subject: [PATCH] Fix inconsistent tiebreak scores when --nth is used Make sure to consistently calculate tiebreak scores based on the original line. This change may not be preferable if you filter aligned tabular input on a subset of columns using --nth. However, if we calculate length tiebreak only on the matched components instead of the entire line, the result can be very confusing when multiple --nth components are specified, so let's keep it simple and consistent. Close #926 --- src/core.go | 14 ++++++----- src/item.go | 9 ++++++++ src/pattern.go | 31 +++++++++++-------------- src/result.go | 7 +++--- src/result_test.go | 12 +++++----- src/tokenizer.go | 5 ++-- src/tokenizer_test.go | 12 +++++----- test/test_go.rb | 54 +------------------------------------------ 8 files changed, 49 insertions(+), 95 deletions(-) diff --git a/src/core.go b/src/core.go index 9b0109b..2b91ab6 100644 --- a/src/core.go +++ b/src/core.go @@ -95,9 +95,10 @@ func Run(opts *Options) { } chars, colors := ansiProcessor(data) return &Item{ - index: int32(index), - text: chars, - colors: colors} + index: int32(index), + trimLength: -1, + text: chars, + colors: colors} }) } else { chunkList = NewChunkList(func(data []byte, index int) *Item { @@ -110,9 +111,10 @@ func Run(opts *Options) { } textRunes := joinTokens(trans) item := Item{ - index: int32(index), - origText: &data, - colors: nil} + index: int32(index), + trimLength: -1, + origText: &data, + colors: nil} trimmed, colors := ansiProcessorRunes(textRunes) item.text = trimmed diff --git a/src/item.go b/src/item.go index 4e60faf..c67ac7a 100644 --- a/src/item.go +++ b/src/item.go @@ -7,6 +7,7 @@ import ( // Item represents each input line type Item struct { index int32 + trimLength int32 text util.Chars origText *[]byte colors *[]ansiOffset @@ -18,6 +19,14 @@ func (item *Item) Index() int32 { return item.index } +func (item *Item) TrimLength() int32 { + if item.trimLength >= 0 { + return item.trimLength + } + item.trimLength = int32(item.text.TrimLength()) + return item.trimLength +} + // Colors returns ansiOffsets of the Item func (item *Item) Colors() []ansiOffset { if item.colors == nil { diff --git a/src/pattern.go b/src/pattern.go index 4f64660..4614e07 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -299,20 +299,20 @@ func (p *Pattern) matchChunk(chunk *Chunk, space []*Result, slab *util.Slab) []* // MatchItem returns true if the Item is a match func (p *Pattern) MatchItem(item *Item, withPos bool, slab *util.Slab) (*Result, []Offset, *[]int) { if p.extended { - if offsets, bonus, trimLen, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) { - return buildResult(item, offsets, bonus, trimLen), offsets, pos + if offsets, bonus, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) { + return buildResult(item, offsets, bonus), offsets, pos } return nil, nil, nil } - offset, bonus, trimLen, pos := p.basicMatch(item, withPos, slab) + offset, bonus, pos := p.basicMatch(item, withPos, slab) if sidx := offset[0]; sidx >= 0 { offsets := []Offset{offset} - return buildResult(item, offsets, bonus, trimLen), offsets, pos + return buildResult(item, offsets, bonus), offsets, pos } return nil, nil, nil } -func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) { +func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, *[]int) { input := p.prepareInput(item) if p.fuzzy { return p.iter(p.fuzzyAlgo, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab) @@ -320,11 +320,10 @@ func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab) } -func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, int, *[]int) { +func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, *[]int) { input := p.prepareInput(item) offsets := []Offset{} var totalScore int - var totalTrimLen int var allPos *[]int if withPos { allPos = &[]int{} @@ -332,16 +331,15 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of for _, termSet := range p.termSets { var offset Offset var currentScore int - var trimLen int matched := false for _, term := range termSet { pfun := p.procFun[term.typ] - off, score, tLen, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab) + off, score, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab) if sidx := off[0]; sidx >= 0 { if term.inv { continue } - offset, currentScore, trimLen = off, score, tLen + offset, currentScore = off, score matched = true if withPos { if pos != nil { @@ -354,7 +352,7 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of } break } else if term.inv { - offset, currentScore, trimLen = Offset{0, 0}, 0, 0 + offset, currentScore = Offset{0, 0}, 0 matched = true continue } @@ -362,10 +360,9 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of if matched { offsets = append(offsets, offset) totalScore += currentScore - totalTrimLen += trimLen } } - return offsets, totalScore, totalTrimLen, allPos + return offsets, totalScore, allPos } func (p *Pattern) prepareInput(item *Item) []Token { @@ -375,7 +372,7 @@ func (p *Pattern) prepareInput(item *Item) []Token { var ret []Token if len(p.nth) == 0 { - ret = []Token{Token{text: &item.text, prefixLength: 0, trimLength: int32(item.text.TrimLength())}} + ret = []Token{Token{text: &item.text, prefixLength: 0}} } else { tokens := Tokenize(item.text, p.delimiter) ret = Transform(tokens, p.nth) @@ -384,7 +381,7 @@ func (p *Pattern) prepareInput(item *Item) []Token { return ret } -func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) { +func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, *[]int) { for _, part := range tokens { if res, pos := pfun(caseSensitive, normalize, forward, *part.text, pattern, withPos, slab); res.Start >= 0 { sidx := int32(res.Start) + part.prefixLength @@ -394,8 +391,8 @@ func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, norma (*pos)[idx] += int(part.prefixLength) } } - return Offset{sidx, eidx}, res.Score, int(part.trimLength), pos + return Offset{sidx, eidx}, res.Score, pos } } - return Offset{-1, -1}, 0, -1, nil + return Offset{-1, -1}, 0, nil } diff --git a/src/result.go b/src/result.go index e071a9e..0b1fbf0 100644 --- a/src/result.go +++ b/src/result.go @@ -29,7 +29,7 @@ type Result struct { rank rank } -func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result { +func buildResult(item *Item, offsets []Offset, score int) *Result { if len(offsets) > 1 { sort.Sort(ByOrder(offsets)) } @@ -57,8 +57,7 @@ func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result { // Higher is better val = math.MaxUint16 - util.AsUint16(score) case byLength: - // If offsets is empty, trimLen will be 0, but we don't care - val = util.AsUint16(trimLen) + val = util.AsUint16(int(item.TrimLength())) case byBegin, byEnd: if validOffsetFound { whitePrefixLen := 0 @@ -72,7 +71,7 @@ func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result { if criterion == byBegin { val = util.AsUint16(minEnd - whitePrefixLen) } else { - val = util.AsUint16(math.MaxUint16 - math.MaxUint16*(maxEnd-whitePrefixLen)/trimLen) + val = util.AsUint16(math.MaxUint16 - math.MaxUint16*(maxEnd-whitePrefixLen)/int(item.TrimLength())) } } } diff --git a/src/result_test.go b/src/result_test.go index 0e91fc8..ad510c2 100644 --- a/src/result_test.go +++ b/src/result_test.go @@ -52,7 +52,7 @@ func TestResultRank(t *testing.T) { sortCriteria = []criterion{byScore, byLength} strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")} - item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1}, []Offset{}, 2, 3) + item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1, trimLength: -1}, []Offset{}, 2) if item1.rank.points[0] != math.MaxUint16-2 || // Bonus item1.rank.points[1] != 3 || // Length item1.rank.points[2] != 0 || // Unused @@ -61,7 +61,7 @@ func TestResultRank(t *testing.T) { t.Error(item1.rank) } // Only differ in index - item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2, 3) + item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2) items := []*Result{item1, item2} sort.Sort(ByRelevance(items)) @@ -77,10 +77,10 @@ func TestResultRank(t *testing.T) { } // Sort by relevance - item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3, 0) - item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4, 0) - item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5, 0) - item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6, 0) + item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3) + item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4) + item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5) + item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6) items = []*Result{item1, item2, item3, item4, item5, item6} sort.Sort(ByRelevance(items)) if !(items[0] == item6 && items[1] == item5 && diff --git a/src/tokenizer.go b/src/tokenizer.go index ed87362..0e216ac 100644 --- a/src/tokenizer.go +++ b/src/tokenizer.go @@ -20,7 +20,6 @@ type Range struct { type Token struct { text *util.Chars prefixLength int32 - trimLength int32 } // Delimiter for tokenizing the input @@ -81,7 +80,7 @@ func withPrefixLengths(tokens []util.Chars, begin int) []Token { prefixLength := begin for idx, token := range tokens { // NOTE: &tokens[idx] instead of &tokens - ret[idx] = Token{&tokens[idx], int32(prefixLength), int32(token.TrimLength())} + ret[idx] = Token{&tokens[idx], int32(prefixLength)} prefixLength += token.Length() } return ret @@ -242,7 +241,7 @@ func Transform(tokens []Token, withNth []Range) []Token { } else { prefixLength = 0 } - transTokens[idx] = Token{&merged, prefixLength, int32(merged.TrimLength())} + transTokens[idx] = Token{&merged, prefixLength} } return transTokens } diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go index 1dd4414..5925090 100644 --- a/src/tokenizer_test.go +++ b/src/tokenizer_test.go @@ -48,22 +48,22 @@ func TestTokenize(t *testing.T) { // AWK-style input := " abc: def: ghi " tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) - if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 { + if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 { t.Errorf("%s", tokens) } // With delimiter tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) - if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 { + if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 { t.Errorf("%s", tokens) } // With delimiter regex tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+")) - if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 || - tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 || - tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 || - tokens[3].text.ToString() != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 { + if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || + tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || + tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || + tokens[3].text.ToString() != "ghi " || tokens[3].prefixLength != 14 { t.Errorf("%s", tokens) } } diff --git a/test/test_go.rb b/test/test_go.rb index 85b8f37..3c7000b 100644 --- a/test/test_go.rb +++ b/test/test_go.rb @@ -683,62 +683,10 @@ class TestGoFZF < TestBase ] assert_equal output, `#{FZF} -fh < #{tempname}`.split($/) - output = %w[ - 1234567:h - 12345:he - 1:hell - 123:hello - ] + # Since 0.16.8, --nth doesn't affect --tiebreak assert_equal output, `#{FZF} -fh -n2 -d: < #{tempname}`.split($/) end - def test_tiebreak_length_with_nth_trim_length - input = [ - "apple juice bottle 1", - "apple ui bottle 2", - "app ice bottle 3", - "app ic bottle 4", - ] - writelines tempname, input - - # len(1) - output = [ - "app ice bottle 3", - "app ic bottle 4", - "apple juice bottle 1", - "apple ui bottle 2", - ] - assert_equal output, `#{FZF} -fa -n1 < #{tempname}`.split($/) - - # len(1 ~ 2) - output = [ - "app ic bottle 4", - "app ice bottle 3", - "apple ui bottle 2", - "apple juice bottle 1", - ] - assert_equal output, `#{FZF} -fai -n1..2 < #{tempname}`.split($/) - - # len(1) + len(2) - output = [ - "app ic bottle 4", - "app ice bottle 3", - "apple ui bottle 2", - "apple juice bottle 1", - ] - assert_equal output, `#{FZF} -x -f"a i" -n1,2 < #{tempname}`.split($/) - - # len(2) - output = [ - "app ic bottle 4", - "app ice bottle 3", - "apple ui bottle 2", - "apple juice bottle 1", - ] - assert_equal output, `#{FZF} -fi -n2 < #{tempname}`.split($/) - assert_equal output, `#{FZF} -fi -n2,1..2 < #{tempname}`.split($/) - end - def test_invalid_cache tmux.send_keys "(echo d; echo D; echo x) | #{fzf '-q d'}", :Enter tmux.until { |lines| lines[-2].include? '2/3' }