[perf] Remove memory copy when using string delimiter
This commit is contained in:
parent
ddc7bb9064
commit
d9c8a9a880
@ -140,13 +140,13 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
|
||||
return withPrefixLengths(tokens, prefixLength)
|
||||
}
|
||||
|
||||
var tokens []string
|
||||
if delimiter.str != nil {
|
||||
tokens = strings.Split(text.ToString(), *delimiter.str)
|
||||
for i := 0; i < len(tokens)-1; i++ {
|
||||
tokens[i] = tokens[i] + *delimiter.str
|
||||
}
|
||||
} else if delimiter.regex != nil {
|
||||
return withPrefixLengths(text.Split(*delimiter.str), 0)
|
||||
}
|
||||
|
||||
// FIXME performance
|
||||
var tokens []string
|
||||
if delimiter.regex != nil {
|
||||
str := text.ToString()
|
||||
for len(str) > 0 {
|
||||
loc := delimiter.regex.FindStringIndex(str)
|
||||
|
@ -118,3 +118,39 @@ func (chars *Chars) Slice(b int, e int) Chars {
|
||||
}
|
||||
return Chars{bytes: chars.bytes[b:e]}
|
||||
}
|
||||
|
||||
func (chars *Chars) Split(delimiter string) []Chars {
|
||||
delim := []rune(delimiter)
|
||||
numChars := chars.Length()
|
||||
numDelim := len(delim)
|
||||
begin := 0
|
||||
ret := make([]Chars, 0, 1)
|
||||
|
||||
for index := 0; index < numChars; {
|
||||
if index+numDelim <= numChars {
|
||||
match := true
|
||||
for off, d := range delim {
|
||||
if chars.Get(index+off) != d {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// Found the delimiter
|
||||
if match {
|
||||
incr := Max(numDelim, 1)
|
||||
ret = append(ret, chars.Slice(begin, index+incr))
|
||||
index += incr
|
||||
begin = index
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// Impossible to find the delimiter in the remaining substring
|
||||
break
|
||||
}
|
||||
index++
|
||||
}
|
||||
if begin < numChars || len(ret) == 0 {
|
||||
ret = append(ret, chars.Slice(begin, numChars))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
@ -55,3 +55,28 @@ func TestTrimLength(t *testing.T) {
|
||||
check(" h o ", 5)
|
||||
check(" ", 0)
|
||||
}
|
||||
|
||||
func TestSplit(t *testing.T) {
|
||||
check := func(str string, delim string, tokens ...string) {
|
||||
input := ToChars([]byte(str))
|
||||
result := input.Split(delim)
|
||||
if len(result) != len(tokens) {
|
||||
t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s",
|
||||
str, len(result), len(tokens), result)
|
||||
}
|
||||
for idx, token := range tokens {
|
||||
if result[idx].ToString() != token {
|
||||
t.Errorf("Invalid Split result for '%s': %s (expected %s)",
|
||||
str, result[idx].ToString(), token)
|
||||
}
|
||||
}
|
||||
}
|
||||
check("abc:def::", ":", "abc:", "def:", ":")
|
||||
check("abc:def::", "-", "abc:def::")
|
||||
check("abc", "", "a", "b", "c")
|
||||
check("abc", "a", "a", "bc")
|
||||
check("abc", "ab", "ab", "c")
|
||||
check("abc", "abc", "abc")
|
||||
check("abc", "abcd", "abc")
|
||||
check("", "abcd", "")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user