From 9e85cba0d06025983a1a747bfc06c9955388d9c0 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Sun, 16 Jul 2017 23:31:19 +0900 Subject: [PATCH] Reduce memory footprint of Item struct --- src/algo/algo.go | 5 +- src/cache.go | 4 +- src/cache_test.go | 24 +++++----- src/chunklist_test.go | 8 ++-- src/core.go | 18 ++------ src/item.go | 26 ++++------- src/pattern.go | 19 ++++---- src/pattern_test.go | 6 +-- src/result.go | 6 +-- src/result_test.go | 24 +++++++--- src/util/chars.go | 101 +++++++++++++++++++++++++++-------------- src/util/chars_test.go | 20 ++------ 12 files changed, 139 insertions(+), 122 deletions(-) diff --git a/src/algo/algo.go b/src/algo/algo.go index ac6c66d..c4930c1 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -283,8 +283,9 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C // Phase 1. Check if there's a match and calculate bonus for each point pidx, lastIdx, prevClass := 0, 0, charNonWord + input.CopyRunes(T) for idx := 0; idx < N; idx++ { - char := input.Get(idx) + char := T[idx] var class charClass if char <= unicode.MaxASCII { class = charClassOfAscii(char) @@ -389,7 +390,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C if i == 0 { fmt.Print(" ") for j := int(F[i]); j <= lastIdx; j++ { - fmt.Printf(" " + string(input.Get(j)) + " ") + fmt.Printf(" " + string(T[j]) + " ") } fmt.Println() } diff --git a/src/cache.go b/src/cache.go index 272a78b..0540bdc 100644 --- a/src/cache.go +++ b/src/cache.go @@ -33,8 +33,8 @@ func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Result) { (*qc)[key] = list } -// Find is called to lookup ChunkCache -func (cc *ChunkCache) Find(chunk *Chunk, key string) []*Result { +// Lookup is called to lookup ChunkCache +func (cc *ChunkCache) Lookup(chunk *Chunk, key string) []*Result { if len(key) == 0 || !chunk.IsFull() { return nil } diff --git a/src/cache_test.go b/src/cache_test.go index 8703fc4..54f3fb0 100644 --- a/src/cache_test.go +++ b/src/cache_test.go @@ -14,27 +14,27 @@ func TestChunkCache(t *testing.T) { cache.Add(chunk2p, "bar", items2) { // chunk1 is not full - cached, found := cache.Find(chunk1p, "foo") - if found { - t.Error("Cached disabled for non-empty chunks", found, cached) + cached := cache.Lookup(chunk1p, "foo") + if cached != nil { + t.Error("Cached disabled for non-empty chunks", cached) } } { - cached, found := cache.Find(chunk2p, "foo") - if !found || len(cached) != 1 { - t.Error("Expected 1 item cached", found, cached) + cached := cache.Lookup(chunk2p, "foo") + if cached == nil || len(cached) != 1 { + t.Error("Expected 1 item cached", cached) } } { - cached, found := cache.Find(chunk2p, "bar") - if !found || len(cached) != 2 { - t.Error("Expected 2 items cached", found, cached) + cached := cache.Lookup(chunk2p, "bar") + if cached == nil || len(cached) != 2 { + t.Error("Expected 2 items cached", cached) } } { - cached, found := cache.Find(chunk1p, "foobar") - if found { - t.Error("Expected 0 item cached", found, cached) + cached := cache.Lookup(chunk1p, "foobar") + if cached != nil { + t.Error("Expected 0 item cached", cached) } } } diff --git a/src/chunklist_test.go b/src/chunklist_test.go index 983a7ed..78468e3 100644 --- a/src/chunklist_test.go +++ b/src/chunklist_test.go @@ -12,7 +12,9 @@ func TestChunkList(t *testing.T) { sortCriteria = []criterion{byScore, byLength} cl := NewChunkList(func(s []byte, i int) Item { - return Item{text: util.ToChars(s), index: int32(i * 2)} + chars := util.ToChars(s) + chars.Index = int32(i * 2) + return Item{text: chars} }) // Snapshot @@ -41,8 +43,8 @@ func TestChunkList(t *testing.T) { if len(*chunk1) != 2 { t.Error("Snapshot should contain only two items") } - if (*chunk1)[0].text.ToString() != "hello" || (*chunk1)[0].index != 0 || - (*chunk1)[1].text.ToString() != "world" || (*chunk1)[1].index != 2 { + if (*chunk1)[0].text.ToString() != "hello" || (*chunk1)[0].Index() != 0 || + (*chunk1)[1].text.ToString() != "world" || (*chunk1)[1].Index() != 2 { t.Error("Invalid data") } if chunk1.IsFull() { diff --git a/src/core.go b/src/core.go index 7e16dc3..aa42510 100644 --- a/src/core.go +++ b/src/core.go @@ -98,11 +98,8 @@ func Run(opts *Options, revision string) { return nilItem } chars, colors := ansiProcessor(data) - return Item{ - index: int32(index), - trimLength: -1, - text: chars, - colors: colors} + chars.Index = int32(index) + return Item{text: chars, colors: colors} }) } else { chunkList = NewChunkList(func(data []byte, index int) Item { @@ -114,16 +111,9 @@ func Run(opts *Options, revision string) { return nilItem } textRunes := joinTokens(trans) - item := Item{ - index: int32(index), - trimLength: -1, - origText: &data, - colors: nil} - trimmed, colors := ansiProcessorRunes(textRunes) - item.text = trimmed - item.colors = colors - return item + trimmed.Index = int32(index) + return Item{text: trimmed, colors: colors, origText: &data} }) } diff --git a/src/item.go b/src/item.go index 955c31d..b3879cb 100644 --- a/src/item.go +++ b/src/item.go @@ -4,33 +4,27 @@ import ( "github.com/junegunn/fzf/src/util" ) -// Item represents each input line +// Item represents each input line. 56 bytes. type Item struct { - index int32 - trimLength int32 - text util.Chars - origText *[]byte - colors *[]ansiOffset - transformed []Token + text util.Chars // 32 = 24 + 1 + 1 + 2 + 4 + transformed *[]Token // 8 + origText *[]byte // 8 + colors *[]ansiOffset // 8 } // Index returns ordinal index of the Item func (item *Item) Index() int32 { - return item.index + return item.text.Index } -var nilItem = Item{index: -1} +var nilItem = Item{text: util.Chars{Index: -1}} func (item *Item) Nil() bool { - return item.index < 0 + return item.Index() < 0 } -func (item *Item) TrimLength() int32 { - if item.trimLength >= 0 { - return item.trimLength - } - item.trimLength = int32(item.text.TrimLength()) - return item.trimLength +func (item *Item) TrimLength() uint16 { + return item.text.TrimLength() } // Colors returns ansiOffsets of the Item diff --git a/src/pattern.go b/src/pattern.go index f1caeba..07ed9cd 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -247,7 +247,7 @@ func (p *Pattern) Match(chunk *Chunk, slab *util.Slab) []*Result { // ChunkCache: Exact match cacheKey := p.CacheKey() if p.cacheable { - if cached := _cache.Find(chunk, cacheKey); cached != nil { + if cached := _cache.Lookup(chunk, cacheKey); cached != nil { return cached } } @@ -352,18 +352,17 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of } func (p *Pattern) prepareInput(item *Item) []Token { - if item.transformed != nil { - return item.transformed + if len(p.nth) == 0 { + return []Token{Token{text: &item.text, prefixLength: 0}} } - var ret []Token - if len(p.nth) == 0 { - ret = []Token{Token{text: &item.text, prefixLength: 0}} - } else { - tokens := Tokenize(item.text, p.delimiter) - ret = Transform(tokens, p.nth) + if item.transformed != nil { + return *item.transformed } - item.transformed = ret + + tokens := Tokenize(item.text, p.delimiter) + ret := Transform(tokens, p.nth) + item.transformed = &ret return ret } diff --git a/src/pattern_test.go b/src/pattern_test.go index 5722be4..31a127e 100644 --- a/src/pattern_test.go +++ b/src/pattern_test.go @@ -142,13 +142,13 @@ func TestOrigTextAndTransformed(t *testing.T) { Item{ text: util.RunesToChars([]rune("junegunn")), origText: &origBytes, - transformed: trans}, + transformed: &trans}, } pattern.extended = extended matches := pattern.matchChunk(&chunk, nil, slab) // No cache if !(matches[0].item.text.ToString() == "junegunn" && string(*matches[0].item.origText) == "junegunn.choi" && - reflect.DeepEqual(matches[0].item.transformed, trans)) { + reflect.DeepEqual(*matches[0].item.transformed, trans)) { t.Error("Invalid match result", matches) } @@ -156,7 +156,7 @@ func TestOrigTextAndTransformed(t *testing.T) { if !(match.item.text.ToString() == "junegunn" && string(*match.item.origText) == "junegunn.choi" && offsets[0][0] == 0 && offsets[0][1] == 5 && - reflect.DeepEqual(match.item.transformed, trans)) { + reflect.DeepEqual(*match.item.transformed, trans)) { t.Error("Invalid match result", match, offsets, extended) } if !((*pos)[0] == 4 && (*pos)[1] == 0) { diff --git a/src/result.go b/src/result.go index 0b1fbf0..fd4d1a9 100644 --- a/src/result.go +++ b/src/result.go @@ -34,7 +34,7 @@ func buildResult(item *Item, offsets []Offset, score int) *Result { sort.Sort(ByOrder(offsets)) } - result := Result{item: item, rank: rank{index: item.index}} + result := Result{item: item, rank: rank{index: item.Index()}} numChars := item.text.Length() minBegin := math.MaxUint16 minEnd := math.MaxUint16 @@ -57,7 +57,7 @@ func buildResult(item *Item, offsets []Offset, score int) *Result { // Higher is better val = math.MaxUint16 - util.AsUint16(score) case byLength: - val = util.AsUint16(int(item.TrimLength())) + val = item.TrimLength() case byBegin, byEnd: if validOffsetFound { whitePrefixLen := 0 @@ -86,7 +86,7 @@ var sortCriteria []criterion // Index returns ordinal index of the Item func (result *Result) Index() int32 { - return result.item.index + return result.item.Index() } func minRank() rank { diff --git a/src/result_test.go b/src/result_test.go index ad510c2..8c74691 100644 --- a/src/result_test.go +++ b/src/result_test.go @@ -11,6 +11,11 @@ import ( "github.com/junegunn/fzf/src/util" ) +func withIndex(i *Item, index int) *Item { + (*i).text.Index = int32(index) + return i +} + func TestOffsetSort(t *testing.T) { offsets := []Offset{ Offset{3, 5}, Offset{2, 7}, @@ -52,12 +57,13 @@ func TestResultRank(t *testing.T) { sortCriteria = []criterion{byScore, byLength} strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")} - item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1, trimLength: -1}, []Offset{}, 2) + item1 := buildResult( + withIndex(&Item{text: util.RunesToChars(strs[0])}, 1), []Offset{}, 2) if item1.rank.points[0] != math.MaxUint16-2 || // Bonus item1.rank.points[1] != 3 || // Length item1.rank.points[2] != 0 || // Unused item1.rank.points[3] != 0 || // Unused - item1.item.index != 1 { + item1.item.Index() != 1 { t.Error(item1.rank) } // Only differ in index @@ -73,14 +79,18 @@ func TestResultRank(t *testing.T) { sort.Sort(ByRelevance(items)) if items[0] != item2 || items[1] != item2 || items[2] != item1 || items[3] != item1 { - t.Error(items, item1, item1.item.index, item2, item2.item.index) + t.Error(items, item1, item1.item.Index(), item2, item2.item.Index()) } // Sort by relevance - item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3) - item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4) - item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5) - item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6) + item3 := buildResult( + withIndex(&Item{}, 2), []Offset{Offset{1, 3}, Offset{5, 7}}, 3) + item4 := buildResult( + withIndex(&Item{}, 2), []Offset{Offset{1, 2}, Offset{6, 7}}, 4) + item5 := buildResult( + withIndex(&Item{}, 2), []Offset{Offset{1, 3}, Offset{5, 7}}, 5) + item6 := buildResult( + withIndex(&Item{}, 2), []Offset{Offset{1, 2}, Offset{6, 7}}, 6) items = []*Result{item1, item2, item3, item4, item5, item6} sort.Sort(ByRelevance(items)) if !(items[0] == item6 && items[1] == item5 && diff --git a/src/util/chars.go b/src/util/chars.go index 061120e..8325cf4 100644 --- a/src/util/chars.go +++ b/src/util/chars.go @@ -3,63 +3,81 @@ package util import ( "unicode" "unicode/utf8" + "unsafe" ) type Chars struct { - runes []rune - bytes []byte + slice []byte // or []rune + inBytes bool + trimLengthKnown bool + trimLength uint16 + + // XXX Piggybacking item index here is a horrible idea. But I'm trying to + // minimize the memory footprint by not wasting padded spaces. + Index int32 } // ToChars converts byte array into rune array -func ToChars(bytea []byte) Chars { +func ToChars(bytes []byte) Chars { var runes []rune - ascii := true - numBytes := len(bytea) + inBytes := true + numBytes := len(bytes) for i := 0; i < numBytes; { - if bytea[i] < utf8.RuneSelf { - if !ascii { - runes = append(runes, rune(bytea[i])) + if bytes[i] < utf8.RuneSelf { + if !inBytes { + runes = append(runes, rune(bytes[i])) } i++ } else { - if ascii { - ascii = false + if inBytes { + inBytes = false runes = make([]rune, i, numBytes) for j := 0; j < i; j++ { - runes[j] = rune(bytea[j]) + runes[j] = rune(bytes[j]) } } - r, sz := utf8.DecodeRune(bytea[i:]) + r, sz := utf8.DecodeRune(bytes[i:]) i += sz runes = append(runes, r) } } - if ascii { - return Chars{bytes: bytea} + if inBytes { + return Chars{slice: bytes, inBytes: inBytes} } - return Chars{runes: runes} + return RunesToChars(runes) } func RunesToChars(runes []rune) Chars { - return Chars{runes: runes} + return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false} +} + +func (chars *Chars) optionalRunes() []rune { + if chars.inBytes { + return nil + } + return *(*[]rune)(unsafe.Pointer(&chars.slice)) } func (chars *Chars) Get(i int) rune { - if chars.runes != nil { - return chars.runes[i] + if runes := chars.optionalRunes(); runes != nil { + return runes[i] } - return rune(chars.bytes[i]) + return rune(chars.slice[i]) } func (chars *Chars) Length() int { - if chars.runes != nil { - return len(chars.runes) + if runes := chars.optionalRunes(); runes != nil { + return len(runes) } - return len(chars.bytes) + return len(chars.slice) } // TrimLength returns the length after trimming leading and trailing whitespaces -func (chars *Chars) TrimLength() int { +func (chars *Chars) TrimLength() uint16 { + if chars.trimLengthKnown { + return chars.trimLength + } + chars.trimLengthKnown = true var i int len := chars.Length() for i = len - 1; i >= 0; i-- { @@ -80,7 +98,8 @@ func (chars *Chars) TrimLength() int { break } } - return i - j + 1 + chars.trimLength = AsUint16(i - j + 1) + return chars.trimLength } func (chars *Chars) TrailingWhitespaces() int { @@ -96,28 +115,40 @@ func (chars *Chars) TrailingWhitespaces() int { } func (chars *Chars) ToString() string { - if chars.runes != nil { - return string(chars.runes) + if runes := chars.optionalRunes(); runes != nil { + return string(runes) } - return string(chars.bytes) + return string(chars.slice) } func (chars *Chars) ToRunes() []rune { - if chars.runes != nil { - return chars.runes + if runes := chars.optionalRunes(); runes != nil { + return runes } - runes := make([]rune, len(chars.bytes)) - for idx, b := range chars.bytes { + bytes := chars.slice + runes := make([]rune, len(bytes)) + for idx, b := range bytes { runes[idx] = rune(b) } return runes } -func (chars *Chars) Slice(b int, e int) Chars { - if chars.runes != nil { - return Chars{runes: chars.runes[b:e]} +func (chars *Chars) CopyRunes(dest []rune) { + if runes := chars.optionalRunes(); runes != nil { + copy(dest, runes) + return } - return Chars{bytes: chars.bytes[b:e]} + for idx, b := range chars.slice { + dest[idx] = rune(b) + } + return +} + +func (chars *Chars) Slice(b int, e int) Chars { + if runes := chars.optionalRunes(); runes != nil { + return RunesToChars(runes[b:e]) + } + return Chars{slice: chars.slice[b:e], inBytes: true} } func (chars *Chars) Split(delimiter string) []Chars { diff --git a/src/util/chars_test.go b/src/util/chars_test.go index 12c629d..07b8dea 100644 --- a/src/util/chars_test.go +++ b/src/util/chars_test.go @@ -2,27 +2,16 @@ package util import "testing" -func TestToCharsNil(t *testing.T) { - bs := Chars{bytes: []byte{}} - if bs.bytes == nil || bs.runes != nil { - t.Error() - } - rs := RunesToChars([]rune{}) - if rs.bytes != nil || rs.runes == nil { - t.Error() - } -} - func TestToCharsAscii(t *testing.T) { chars := ToChars([]byte("foobar")) - if chars.ToString() != "foobar" || chars.runes != nil { + if !chars.inBytes || chars.ToString() != "foobar" || !chars.inBytes { t.Error() } } func TestCharsLength(t *testing.T) { chars := ToChars([]byte("\tabc한글 ")) - if chars.Length() != 8 || chars.TrimLength() != 5 { + if chars.inBytes || chars.Length() != 8 || chars.TrimLength() != 5 { t.Error() } } @@ -36,7 +25,7 @@ func TestCharsToString(t *testing.T) { } func TestTrimLength(t *testing.T) { - check := func(str string, exp int) { + check := func(str string, exp uint16) { chars := ToChars([]byte(str)) trimmed := chars.TrimLength() if trimmed != exp { @@ -61,7 +50,8 @@ func TestSplit(t *testing.T) { input := ToChars([]byte(str)) result := input.Split(delim) if len(result) != len(tokens) { - t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s", + t.Errorf( + "Invalid Split result for '%s': %d tokens found (expected %d): %s", str, len(result), len(tokens), result) } for idx, token := range tokens {