fzf/src/tokenizer.go

241 lines
5.1 KiB
Go
Raw Normal View History

2015-01-01 14:49:30 -05:00
package fzf
import (
"regexp"
"strconv"
"strings"
2015-01-11 22:56:17 -05:00
"github.com/junegunn/fzf/src/util"
2015-01-01 14:49:30 -05:00
)
2015-01-11 13:01:24 -05:00
const rangeEllipsis = 0
2015-01-01 14:49:30 -05:00
2015-01-11 13:01:24 -05:00
// Range represents nth-expression
2015-01-01 14:49:30 -05:00
type Range struct {
begin int
end int
}
2015-01-11 13:01:24 -05:00
// Token contains the tokenized part of the strings and its prefix length
2015-01-01 14:49:30 -05:00
type Token struct {
text []rune
2015-01-01 14:49:30 -05:00
prefixLength int
trimLength int
2015-01-01 14:49:30 -05:00
}
// Delimiter for tokenizing the input
type Delimiter struct {
regex *regexp.Regexp
str *string
}
func newRange(begin int, end int) Range {
if begin == 1 {
begin = rangeEllipsis
}
if end == -1 {
end = rangeEllipsis
}
return Range{begin, end}
}
2015-01-11 13:01:24 -05:00
// ParseRange parses nth-expression and returns the corresponding Range object
2015-01-01 14:49:30 -05:00
func ParseRange(str *string) (Range, bool) {
if (*str) == ".." {
return newRange(rangeEllipsis, rangeEllipsis), true
2015-01-01 14:49:30 -05:00
} else if strings.HasPrefix(*str, "..") {
end, err := strconv.Atoi((*str)[2:])
if err != nil || end == 0 {
return Range{}, false
}
return newRange(rangeEllipsis, end), true
2015-01-01 14:49:30 -05:00
} else if strings.HasSuffix(*str, "..") {
begin, err := strconv.Atoi((*str)[:len(*str)-2])
if err != nil || begin == 0 {
return Range{}, false
}
return newRange(begin, rangeEllipsis), true
2015-01-01 14:49:30 -05:00
} else if strings.Contains(*str, "..") {
ns := strings.Split(*str, "..")
if len(ns) != 2 {
return Range{}, false
}
begin, err1 := strconv.Atoi(ns[0])
end, err2 := strconv.Atoi(ns[1])
if err1 != nil || err2 != nil || begin == 0 || end == 0 {
2015-01-01 14:49:30 -05:00
return Range{}, false
}
return newRange(begin, end), true
2015-01-01 14:49:30 -05:00
}
n, err := strconv.Atoi(*str)
if err != nil || n == 0 {
return Range{}, false
}
return newRange(n, n), true
2015-01-01 14:49:30 -05:00
}
func withPrefixLengths(tokens [][]rune, begin int) []Token {
2015-01-01 14:49:30 -05:00
ret := make([]Token, len(tokens))
prefixLength := begin
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
ret[idx] = Token{token, prefixLength, util.TrimLen(token)}
prefixLength += len(token)
2015-01-01 14:49:30 -05:00
}
return ret
}
const (
2015-01-11 13:01:24 -05:00
awkNil = iota
awkBlack
awkWhite
2015-01-01 14:49:30 -05:00
)
func awkTokenizer(input []rune) ([][]rune, int) {
2015-01-01 14:49:30 -05:00
// 9, 32
ret := [][]rune{}
2015-01-01 14:49:30 -05:00
str := []rune{}
prefixLength := 0
2015-01-11 13:01:24 -05:00
state := awkNil
for _, r := range input {
2015-01-01 14:49:30 -05:00
white := r == 9 || r == 32
switch state {
2015-01-11 13:01:24 -05:00
case awkNil:
2015-01-01 14:49:30 -05:00
if white {
prefixLength++
} else {
2015-01-11 13:01:24 -05:00
state = awkBlack
2015-01-01 14:49:30 -05:00
str = append(str, r)
}
2015-01-11 13:01:24 -05:00
case awkBlack:
2015-01-01 14:49:30 -05:00
str = append(str, r)
if white {
2015-01-11 13:01:24 -05:00
state = awkWhite
2015-01-01 14:49:30 -05:00
}
2015-01-11 13:01:24 -05:00
case awkWhite:
2015-01-01 14:49:30 -05:00
if white {
str = append(str, r)
} else {
ret = append(ret, str)
2015-01-11 13:01:24 -05:00
state = awkBlack
2015-01-01 14:49:30 -05:00
str = []rune{r}
}
}
}
if len(str) > 0 {
ret = append(ret, str)
2015-01-01 14:49:30 -05:00
}
return ret, prefixLength
}
2015-01-11 13:01:24 -05:00
// Tokenize tokenizes the given string with the delimiter
func Tokenize(runes []rune, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil {
2015-01-01 14:49:30 -05:00
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(runes)
2015-01-01 14:49:30 -05:00
return withPrefixLengths(tokens, prefixLength)
}
var tokens []string
if delimiter.str != nil {
tokens = strings.Split(string(runes), *delimiter.str)
for i := 0; i < len(tokens)-1; i++ {
tokens[i] = tokens[i] + *delimiter.str
}
} else if delimiter.regex != nil {
str := string(runes)
for len(str) > 0 {
loc := delimiter.regex.FindStringIndex(str)
if loc == nil {
loc = []int{0, len(str)}
}
last := util.Max(loc[1], 1)
tokens = append(tokens, str[:last])
str = str[last:]
}
}
asRunes := make([][]rune, len(tokens))
for i, token := range tokens {
asRunes[i] = []rune(token)
}
return withPrefixLengths(asRunes, 0)
2015-01-01 14:49:30 -05:00
}
func joinTokens(tokens []Token) []rune {
ret := []rune{}
for _, token := range tokens {
ret = append(ret, token.text...)
2015-01-01 14:49:30 -05:00
}
return ret
}
func joinTokensAsRunes(tokens []Token) []rune {
ret := []rune{}
for _, token := range tokens {
ret = append(ret, token.text...)
}
return ret
2015-01-01 14:49:30 -05:00
}
2015-01-11 13:01:24 -05:00
// Transform is used to transform the input when --with-nth option is given
func Transform(tokens []Token, withNth []Range) []Token {
2015-01-01 14:49:30 -05:00
transTokens := make([]Token, len(withNth))
numTokens := len(tokens)
for idx, r := range withNth {
part := []rune{}
2015-01-01 14:49:30 -05:00
minIdx := 0
if r.begin == r.end {
idx := r.begin
2015-01-11 13:01:24 -05:00
if idx == rangeEllipsis {
part = append(part, joinTokensAsRunes(tokens)...)
2015-01-01 14:49:30 -05:00
} else {
if idx < 0 {
idx += numTokens + 1
}
if idx >= 1 && idx <= numTokens {
minIdx = idx - 1
part = append(part, tokens[idx-1].text...)
2015-01-01 14:49:30 -05:00
}
}
} else {
var begin, end int
2015-01-11 13:01:24 -05:00
if r.begin == rangeEllipsis { // ..N
2015-01-01 14:49:30 -05:00
begin, end = 1, r.end
if end < 0 {
end += numTokens + 1
}
2015-01-11 13:01:24 -05:00
} else if r.end == rangeEllipsis { // N..
2015-01-01 14:49:30 -05:00
begin, end = r.begin, numTokens
if begin < 0 {
begin += numTokens + 1
}
} else {
begin, end = r.begin, r.end
if begin < 0 {
begin += numTokens + 1
}
if end < 0 {
end += numTokens + 1
}
}
2015-01-11 22:56:17 -05:00
minIdx = util.Max(0, begin-1)
2015-01-01 14:49:30 -05:00
for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens {
part = append(part, tokens[idx-1].text...)
2015-01-01 14:49:30 -05:00
}
}
}
var prefixLength int
if minIdx < numTokens {
prefixLength = tokens[minIdx].prefixLength
} else {
prefixLength = 0
}
transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)}
2015-01-01 14:49:30 -05:00
}
return transTokens
2015-01-01 14:49:30 -05:00
}