Fix --with-nth performance; avoid regex if possible

Close #317
This commit is contained in:
Junegunn Choi 2015-08-10 18:34:20 +09:00
parent a7b75c99a5
commit 766427de0c
7 changed files with 117 additions and 35 deletions

View File

@ -1,6 +1,15 @@
CHANGELOG
=========
0.10.3
------
- Fixed slow performance of `--with-nth` when used with `--delimiter`
- Regular expression engine of Golang as of now is very slow, so the fixed
version will treat the given delimiter pattern as a plain string instead
of a regular expression unless it contains special characters and is
a valid regular expression.
0.10.2
------

View File

@ -104,7 +104,7 @@ type Options struct {
Case Case
Nth []Range
WithNth []Range
Delimiter *regexp.Regexp
Delimiter Delimiter
Sort int
Tac bool
Tiebreak tiebreak
@ -149,7 +149,7 @@ func defaultOptions() *Options {
Case: CaseSmart,
Nth: make([]Range, 0),
WithNth: make([]Range, 0),
Delimiter: nil,
Delimiter: Delimiter{},
Sort: 1000,
Tac: false,
Tiebreak: byLength,
@ -268,17 +268,27 @@ func splitNth(str string) []Range {
return ranges
}
func delimiterRegexp(str string) *regexp.Regexp {
rx, e := regexp.Compile(str)
if e != nil {
str = regexp.QuoteMeta(str)
func delimiterRegexp(str string) Delimiter {
// Special handling of \t
str = strings.Replace(str, "\\t", "\t", -1)
// 1. Pattern does not contain any special character
if regexp.QuoteMeta(str) == str {
return Delimiter{str: &str}
}
rx, e := regexp.Compile(str)
// 2. Pattern is not a valid regular expression
if e != nil {
return Delimiter{str: &str}
}
// 3. Pattern as regular expression. Slow.
rx, e = regexp.Compile(fmt.Sprintf("(?:.*?%s)|(?:.+?$)", str))
if e != nil {
errorExit("invalid regular expression: " + e.Error())
}
return rx
return Delimiter{regex: rx}
}
func isAlphabet(char uint8) bool {

View File

@ -2,16 +2,60 @@ package fzf
import (
"fmt"
"strings"
"testing"
"github.com/junegunn/fzf/src/curses"
)
func TestDelimiterRegex(t *testing.T) {
rx := delimiterRegexp("*")
// Valid regex
delim := delimiterRegexp(".")
if delim.regex == nil || delim.str != nil {
t.Error(delim)
}
// Broken regex -> string
delim = delimiterRegexp("[0-9")
if delim.regex != nil || *delim.str != "[0-9" {
t.Error(delim)
}
// Valid regex
delim = delimiterRegexp("[0-9]")
if strings.Index(delim.regex.String(), "[0-9]") < 0 || delim.str != nil {
t.Error(delim)
}
// Tab character
delim = delimiterRegexp("\t")
if delim.regex != nil || *delim.str != "\t" {
t.Error(delim)
}
// Tab expression
delim = delimiterRegexp("\\t")
if delim.regex != nil || *delim.str != "\t" {
t.Error(delim)
}
// Tabs -> regex
delim = delimiterRegexp("\t+")
if delim.regex == nil || delim.str != nil {
t.Error(delim)
}
}
func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*")
tokens := strings.Split("-*--*---**---", *delim.str)
if delim.regex != nil || tokens[0] != "-" || tokens[1] != "--" ||
tokens[2] != "---" || tokens[3] != "" || tokens[4] != "---" {
t.Errorf("%s %s %d", delim, tokens, len(tokens))
}
}
func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*")
rx := delim.regex
tokens := rx.FindAllString("-*--*---**---", -1)
if tokens[0] != "-*" || tokens[1] != "--*" || tokens[2] != "---*" ||
tokens[3] != "*" || tokens[4] != "---" {
if delim.str != nil ||
tokens[0] != "-*--*" || tokens[1] != "---*" || tokens[2] != "*---" {
t.Errorf("%s %s %d", rx, tokens, len(tokens))
}
}

View File

@ -42,7 +42,7 @@ type Pattern struct {
text []rune
terms []term
hasInvTerm bool
delimiter *regexp.Regexp
delimiter Delimiter
nth []Range
procFun map[termType]func(bool, []rune, []rune) (int, int)
}
@ -71,7 +71,7 @@ func clearChunkCache() {
// BuildPattern builds Pattern object from the given arguments
func BuildPattern(mode Mode, caseMode Case,
nth []Range, delimiter *regexp.Regexp, runes []rune) *Pattern {
nth []Range, delimiter Delimiter, runes []rune) *Pattern {
var asString string
switch mode {

View File

@ -59,7 +59,7 @@ func TestExact(t *testing.T) {
defer clearPatternCache()
clearPatternCache()
pattern := BuildPattern(ModeExtended, CaseSmart,
[]Range{}, nil, []rune("'abc"))
[]Range{}, Delimiter{}, []rune("'abc"))
sidx, eidx := algo.ExactMatchNaive(
pattern.caseSensitive, []rune("aabbcc abc"), pattern.terms[0].text)
if sidx != 7 || eidx != 10 {
@ -70,7 +70,7 @@ func TestExact(t *testing.T) {
func TestEqual(t *testing.T) {
defer clearPatternCache()
clearPatternCache()
pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, nil, []rune("^AbC$"))
pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, Delimiter{}, []rune("^AbC$"))
match := func(str string, sidxExpected int, eidxExpected int) {
sidx, eidx := algo.EqualMatch(
@ -86,17 +86,17 @@ func TestEqual(t *testing.T) {
func TestCaseSensitivity(t *testing.T) {
defer clearPatternCache()
clearPatternCache()
pat1 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, nil, []rune("abc"))
pat1 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, Delimiter{}, []rune("abc"))
clearPatternCache()
pat2 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, nil, []rune("Abc"))
pat2 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, Delimiter{}, []rune("Abc"))
clearPatternCache()
pat3 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, nil, []rune("abc"))
pat3 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, Delimiter{}, []rune("abc"))
clearPatternCache()
pat4 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, nil, []rune("Abc"))
pat4 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, Delimiter{}, []rune("Abc"))
clearPatternCache()
pat5 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, nil, []rune("abc"))
pat5 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, Delimiter{}, []rune("abc"))
clearPatternCache()
pat6 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, nil, []rune("Abc"))
pat6 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, Delimiter{}, []rune("Abc"))
if string(pat1.text) != "abc" || pat1.caseSensitive != false ||
string(pat2.text) != "Abc" || pat2.caseSensitive != true ||
@ -109,8 +109,8 @@ func TestCaseSensitivity(t *testing.T) {
}
func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, nil, []rune("jg"))
tokens := Tokenize([]rune("junegunn"), nil)
pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, Delimiter{}, []rune("jg"))
tokens := Tokenize([]rune("junegunn"), Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}})
origRunes := []rune("junegunn.choi")

View File

@ -22,6 +22,12 @@ type Token struct {
prefixLength int
}
// Delimiter for tokenizing the input
type Delimiter struct {
regex *regexp.Regexp
str *string
}
func newRange(begin int, end int) Range {
if begin == 1 {
begin = rangeEllipsis
@ -68,15 +74,15 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true
}
func withPrefixLengths(tokens []string, begin int) []Token {
func withPrefixLengths(tokens [][]rune, begin int) []Token {
ret := make([]Token, len(tokens))
prefixLength := begin
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
ret[idx] = Token{text: []rune(token), prefixLength: prefixLength}
prefixLength += len([]rune(token))
ret[idx] = Token{text: token, prefixLength: prefixLength}
prefixLength += len(token)
}
return ret
}
@ -87,9 +93,9 @@ const (
awkWhite
)
func awkTokenizer(input []rune) ([]string, int) {
func awkTokenizer(input []rune) ([][]rune, int) {
// 9, 32
ret := []string{}
ret := [][]rune{}
str := []rune{}
prefixLength := 0
state := awkNil
@ -112,27 +118,40 @@ func awkTokenizer(input []rune) ([]string, int) {
if white {
str = append(str, r)
} else {
ret = append(ret, string(str))
ret = append(ret, str)
state = awkBlack
str = []rune{r}
}
}
}
if len(str) > 0 {
ret = append(ret, string(str))
ret = append(ret, str)
}
return ret, prefixLength
}
// Tokenize tokenizes the given string with the delimiter
func Tokenize(runes []rune, delimiter *regexp.Regexp) []Token {
if delimiter == nil {
func Tokenize(runes []rune, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(runes)
return withPrefixLengths(tokens, prefixLength)
}
tokens := delimiter.FindAllString(string(runes), -1)
return withPrefixLengths(tokens, 0)
var tokens []string
if delimiter.str != nil {
tokens = strings.Split(string(runes), *delimiter.str)
for i := 0; i < len(tokens)-1; i++ {
tokens[i] = tokens[i] + *delimiter.str
}
} else if delimiter.regex != nil {
tokens = delimiter.regex.FindAllString(string(runes), -1)
}
asRunes := make([][]rune, len(tokens))
for i, token := range tokens {
asRunes[i] = []rune(token)
}
return withPrefixLengths(asRunes, 0)
}
func joinTokens(tokens []Token) []rune {

View File

@ -43,7 +43,7 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
tokens := Tokenize([]rune(input), nil)
tokens := Tokenize([]rune(input), Delimiter{})
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens)
}
@ -58,7 +58,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl"
{
tokens := Tokenize([]rune(input), nil)
tokens := Tokenize([]rune(input), Delimiter{})
{
ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges)