[perf] Remove memory copy when using string delimiter

This commit is contained in:
Junegunn Choi 2016-08-14 04:23:37 +09:00
parent ddc7bb9064
commit d9c8a9a880
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
3 changed files with 67 additions and 6 deletions

View File

@ -140,13 +140,13 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
return withPrefixLengths(tokens, prefixLength)
}
var tokens []string
if delimiter.str != nil {
tokens = strings.Split(text.ToString(), *delimiter.str)
for i := 0; i < len(tokens)-1; i++ {
tokens[i] = tokens[i] + *delimiter.str
return withPrefixLengths(text.Split(*delimiter.str), 0)
}
} else if delimiter.regex != nil {
// FIXME performance
var tokens []string
if delimiter.regex != nil {
str := text.ToString()
for len(str) > 0 {
loc := delimiter.regex.FindStringIndex(str)

View File

@ -118,3 +118,39 @@ func (chars *Chars) Slice(b int, e int) Chars {
}
return Chars{bytes: chars.bytes[b:e]}
}
func (chars *Chars) Split(delimiter string) []Chars {
delim := []rune(delimiter)
numChars := chars.Length()
numDelim := len(delim)
begin := 0
ret := make([]Chars, 0, 1)
for index := 0; index < numChars; {
if index+numDelim <= numChars {
match := true
for off, d := range delim {
if chars.Get(index+off) != d {
match = false
break
}
}
// Found the delimiter
if match {
incr := Max(numDelim, 1)
ret = append(ret, chars.Slice(begin, index+incr))
index += incr
begin = index
continue
}
} else {
// Impossible to find the delimiter in the remaining substring
break
}
index++
}
if begin < numChars || len(ret) == 0 {
ret = append(ret, chars.Slice(begin, numChars))
}
return ret
}

View File

@ -55,3 +55,28 @@ func TestTrimLength(t *testing.T) {
check(" h o ", 5)
check(" ", 0)
}
func TestSplit(t *testing.T) {
check := func(str string, delim string, tokens ...string) {
input := ToChars([]byte(str))
result := input.Split(delim)
if len(result) != len(tokens) {
t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s",
str, len(result), len(tokens), result)
}
for idx, token := range tokens {
if result[idx].ToString() != token {
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
str, result[idx].ToString(), token)
}
}
}
check("abc:def::", ":", "abc:", "def:", ":")
check("abc:def::", "-", "abc:def::")
check("abc", "", "a", "b", "c")
check("abc", "a", "a", "bc")
check("abc", "ab", "ab", "c")
check("abc", "abc", "abc")
check("abc", "abcd", "abc")
check("", "abcd", "")
}