fzf/src/pattern.go

374 lines
8.4 KiB
Go
Raw Normal View History

2015-01-02 04:49:30 +09:00
package fzf
import (
"regexp"
"sort"
2015-01-02 04:49:30 +09:00
"strings"
2015-01-12 12:56:17 +09:00
"github.com/junegunn/fzf/src/algo"
"github.com/junegunn/fzf/src/util"
2015-01-02 04:49:30 +09:00
)
// fuzzy
// 'exact
// ^exact-prefix
// exact-suffix$
// !not-fuzzy
// !'not-exact
// !^not-exact-prefix
// !not-exact-suffix$
2015-01-12 03:01:24 +09:00
type termType int
2015-01-02 04:49:30 +09:00
const (
2015-01-12 03:01:24 +09:00
termFuzzy termType = iota
termExact
termPrefix
termSuffix
2015-06-08 23:16:31 +09:00
termEqual
2015-01-02 04:49:30 +09:00
)
2015-01-12 03:01:24 +09:00
type term struct {
typ termType
inv bool
text []rune
caseSensitive bool
origText []rune
2015-01-02 04:49:30 +09:00
}
2015-11-09 00:58:20 +09:00
type termSet []term
2015-01-12 03:01:24 +09:00
// Pattern represents search pattern
2015-01-02 04:49:30 +09:00
type Pattern struct {
2015-11-03 22:49:32 +09:00
fuzzy bool
extended bool
2015-01-02 04:49:30 +09:00
caseSensitive bool
forward bool
2015-01-02 04:49:30 +09:00
text []rune
2015-11-09 00:58:20 +09:00
termSets []termSet
cacheable bool
delimiter Delimiter
2015-01-02 04:49:30 +09:00
nth []Range
procFun map[termType]func(bool, bool, []rune, []rune) (int, int)
2015-01-02 04:49:30 +09:00
}
var (
_patternCache map[string]*Pattern
_splitRegex *regexp.Regexp
_cache ChunkCache
)
func init() {
_splitRegex = regexp.MustCompile("\\s+")
2015-03-31 22:05:02 +09:00
clearPatternCache()
clearChunkCache()
2015-01-02 04:49:30 +09:00
}
func clearPatternCache() {
2015-03-31 22:05:02 +09:00
// We can uniquely identify the pattern for a given string since
2015-11-03 22:49:32 +09:00
// search mode and caseMode do not change while the program is running
2015-01-02 04:49:30 +09:00
_patternCache = make(map[string]*Pattern)
}
2015-03-31 22:05:02 +09:00
func clearChunkCache() {
_cache = NewChunkCache()
}
2015-01-12 03:01:24 +09:00
// BuildPattern builds Pattern object from the given arguments
2015-11-03 22:49:32 +09:00
func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool,
nth []Range, delimiter Delimiter, runes []rune) *Pattern {
2015-01-02 04:49:30 +09:00
var asString string
2015-11-03 22:49:32 +09:00
if extended {
2015-01-02 04:49:30 +09:00
asString = strings.Trim(string(runes), " ")
2015-11-03 22:49:32 +09:00
} else {
2015-01-02 04:49:30 +09:00
asString = string(runes)
}
cached, found := _patternCache[asString]
if found {
return cached
}
2015-11-09 00:58:20 +09:00
caseSensitive, cacheable := true, true
termSets := []termSet{}
2015-01-02 04:49:30 +09:00
2015-11-03 22:49:32 +09:00
if extended {
2015-11-09 00:58:20 +09:00
termSets = parseTerms(fuzzy, caseMode, asString)
Loop:
for _, termSet := range termSets {
for idx, term := range termSet {
// If the query contains inverse search terms or OR operators,
// we cannot cache the search scope
if idx > 0 || term.inv {
cacheable = false
break Loop
}
2015-01-02 04:49:30 +09:00
}
}
2015-11-03 22:49:32 +09:00
} else {
lowerString := strings.ToLower(asString)
caseSensitive = caseMode == CaseRespect ||
caseMode == CaseSmart && lowerString != asString
if !caseSensitive {
asString = lowerString
}
2015-01-02 04:49:30 +09:00
}
ptr := &Pattern{
2015-11-03 22:49:32 +09:00
fuzzy: fuzzy,
extended: extended,
2015-01-02 04:49:30 +09:00
caseSensitive: caseSensitive,
forward: forward,
text: []rune(asString),
2015-11-09 00:58:20 +09:00
termSets: termSets,
cacheable: cacheable,
2015-01-02 04:49:30 +09:00
nth: nth,
delimiter: delimiter,
procFun: make(map[termType]func(bool, bool, []rune, []rune) (int, int))}
2015-01-02 04:49:30 +09:00
2015-01-12 12:56:17 +09:00
ptr.procFun[termFuzzy] = algo.FuzzyMatch
2015-06-08 23:16:31 +09:00
ptr.procFun[termEqual] = algo.EqualMatch
2015-01-12 12:56:17 +09:00
ptr.procFun[termExact] = algo.ExactMatchNaive
ptr.procFun[termPrefix] = algo.PrefixMatch
ptr.procFun[termSuffix] = algo.SuffixMatch
2015-01-02 04:49:30 +09:00
_patternCache[asString] = ptr
return ptr
}
2015-11-09 00:58:20 +09:00
func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
2015-01-02 04:49:30 +09:00
tokens := _splitRegex.Split(str, -1)
2015-11-09 00:58:20 +09:00
sets := []termSet{}
set := termSet{}
switchSet := false
2015-01-02 04:49:30 +09:00
for _, token := range tokens {
2015-01-12 03:01:24 +09:00
typ, inv, text := termFuzzy, false, token
lowerText := strings.ToLower(text)
caseSensitive := caseMode == CaseRespect ||
caseMode == CaseSmart && text != lowerText
if !caseSensitive {
text = lowerText
}
2015-01-02 04:49:30 +09:00
origText := []rune(text)
2015-11-03 22:49:32 +09:00
if !fuzzy {
2015-01-12 03:01:24 +09:00
typ = termExact
2015-01-02 04:49:30 +09:00
}
2015-11-09 00:58:20 +09:00
if text == "|" {
switchSet = false
continue
}
2015-01-02 04:49:30 +09:00
if strings.HasPrefix(text, "!") {
inv = true
text = text[1:]
}
if strings.HasPrefix(text, "'") {
2015-11-03 22:49:32 +09:00
// Flip exactness
if fuzzy {
2015-01-12 03:01:24 +09:00
typ = termExact
2015-01-02 04:49:30 +09:00
text = text[1:]
2015-11-03 22:49:32 +09:00
} else {
typ = termFuzzy
text = text[1:]
2015-01-02 04:49:30 +09:00
}
} else if strings.HasPrefix(text, "^") {
2015-06-08 23:16:31 +09:00
if strings.HasSuffix(text, "$") {
typ = termEqual
text = text[1 : len(text)-1]
} else {
typ = termPrefix
text = text[1:]
}
2015-01-02 04:49:30 +09:00
} else if strings.HasSuffix(text, "$") {
2015-01-12 03:01:24 +09:00
typ = termSuffix
2015-01-02 04:49:30 +09:00
text = text[:len(text)-1]
}
if len(text) > 0 {
2015-11-09 00:58:20 +09:00
if switchSet {
sets = append(sets, set)
set = termSet{}
}
set = append(set, term{
typ: typ,
inv: inv,
text: []rune(text),
caseSensitive: caseSensitive,
origText: origText})
2015-11-09 00:58:20 +09:00
switchSet = true
2015-01-02 04:49:30 +09:00
}
}
2015-11-09 00:58:20 +09:00
if len(set) > 0 {
sets = append(sets, set)
}
return sets
2015-01-02 04:49:30 +09:00
}
2015-01-12 03:01:24 +09:00
// IsEmpty returns true if the pattern is effectively empty
2015-01-02 04:49:30 +09:00
func (p *Pattern) IsEmpty() bool {
2015-11-03 22:49:32 +09:00
if !p.extended {
2015-01-02 04:49:30 +09:00
return len(p.text) == 0
}
2015-11-09 00:58:20 +09:00
return len(p.termSets) == 0
2015-01-02 04:49:30 +09:00
}
2015-01-12 03:01:24 +09:00
// AsString returns the search query in string type
2015-01-02 04:49:30 +09:00
func (p *Pattern) AsString() string {
return string(p.text)
}
2015-01-12 03:01:24 +09:00
// CacheKey is used to build string to be used as the key of result cache
2015-01-02 04:49:30 +09:00
func (p *Pattern) CacheKey() string {
2015-11-03 22:49:32 +09:00
if !p.extended {
2015-01-02 04:49:30 +09:00
return p.AsString()
}
cacheableTerms := []string{}
2015-11-09 00:58:20 +09:00
for _, termSet := range p.termSets {
if len(termSet) == 1 && !termSet[0].inv {
cacheableTerms = append(cacheableTerms, string(termSet[0].origText))
2015-01-02 04:49:30 +09:00
}
}
return strings.Join(cacheableTerms, " ")
}
2015-01-12 03:01:24 +09:00
// Match returns the list of matches Items in the given Chunk
2015-01-02 04:49:30 +09:00
func (p *Pattern) Match(chunk *Chunk) []*Item {
space := chunk
// ChunkCache: Exact match
cacheKey := p.CacheKey()
2015-11-09 00:58:20 +09:00
if p.cacheable {
2015-01-02 04:49:30 +09:00
if cached, found := _cache.Find(chunk, cacheKey); found {
return cached
}
}
2015-01-11 03:53:07 +09:00
// ChunkCache: Prefix/suffix match
Loop:
for idx := 1; idx < len(cacheKey); idx++ {
// [---------| ] | [ |---------]
// [--------| ] | [ |--------]
// [-------| ] | [ |-------]
prefix := cacheKey[:len(cacheKey)-idx]
suffix := cacheKey[idx:]
for _, substr := range [2]*string{&prefix, &suffix} {
if cached, found := _cache.Find(chunk, *substr); found {
2015-01-02 04:49:30 +09:00
cachedChunk := Chunk(cached)
space = &cachedChunk
2015-01-11 03:53:07 +09:00
break Loop
2015-01-02 04:49:30 +09:00
}
}
}
matches := p.matchChunk(space)
2015-01-02 04:49:30 +09:00
2015-11-09 00:58:20 +09:00
if p.cacheable {
2015-01-02 04:49:30 +09:00
_cache.Add(chunk, cacheKey, matches)
}
return matches
}
func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
matches := []*Item{}
2015-11-03 22:49:32 +09:00
if !p.extended {
for _, item := range *chunk {
2015-11-03 22:49:32 +09:00
if sidx, eidx, tlen := p.basicMatch(item); sidx >= 0 {
matches = append(matches,
dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}}))
}
}
} else {
for _, item := range *chunk {
2015-11-09 00:58:20 +09:00
if offsets := p.extendedMatch(item); len(offsets) == len(p.termSets) {
matches = append(matches, dupItem(item, offsets))
}
}
}
return matches
}
// MatchItem returns true if the Item is a match
func (p *Pattern) MatchItem(item *Item) bool {
2015-11-03 22:49:32 +09:00
if !p.extended {
sidx, _, _ := p.basicMatch(item)
return sidx >= 0
}
offsets := p.extendedMatch(item)
2015-11-09 00:58:20 +09:00
return len(offsets) == len(p.termSets)
}
func dupItem(item *Item, offsets []Offset) *Item {
sort.Sort(ByOrder(offsets))
return &Item{
text: item.text,
origText: item.origText,
transformed: item.transformed,
index: item.index,
offsets: offsets,
2015-03-19 01:59:14 +09:00
colors: item.colors,
rank: Rank{0, 0, item.index}}
}
2015-11-03 22:49:32 +09:00
func (p *Pattern) basicMatch(item *Item) (int, int, int) {
input := p.prepareInput(item)
2015-11-03 22:49:32 +09:00
if p.fuzzy {
return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text)
}
return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.forward, p.text)
2015-01-02 04:49:30 +09:00
}
func (p *Pattern) extendedMatch(item *Item) []Offset {
input := p.prepareInput(item)
offsets := []Offset{}
2015-11-09 00:58:20 +09:00
for _, termSet := range p.termSets {
var offset *Offset
2015-11-09 00:58:20 +09:00
for _, term := range termSet {
pfun := p.procFun[term.typ]
if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
if term.inv {
continue
2015-11-09 00:58:20 +09:00
}
offset = &Offset{int32(sidx), int32(eidx), int32(tlen)}
2015-11-09 00:58:20 +09:00
break
} else if term.inv {
offset = &Offset{0, 0, 0}
continue
2015-01-02 04:49:30 +09:00
}
}
if offset != nil {
offsets = append(offsets, *offset)
}
2015-01-02 04:49:30 +09:00
}
return offsets
2015-01-02 04:49:30 +09:00
}
func (p *Pattern) prepareInput(item *Item) []Token {
2015-01-02 04:49:30 +09:00
if item.transformed != nil {
return item.transformed
}
var ret []Token
2015-01-02 04:49:30 +09:00
if len(p.nth) > 0 {
tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth)
} else {
ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}}
2015-01-02 04:49:30 +09:00
}
item.transformed = ret
return ret
}
func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int),
tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) {
for _, part := range tokens {
2015-01-02 04:49:30 +09:00
prefixLength := part.prefixLength
if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 {
return sidx + prefixLength, eidx + prefixLength, part.trimLength
2015-01-02 04:49:30 +09:00
}
}
return -1, -1, -1 // math.MaxUint16
2015-01-02 04:49:30 +09:00
}