Use codepoint offsets in identifier functions

CurrentIdentifierFinished and LastEnteredCharIsIdentifierChar incorrectly use
byte offsets with unicode lines. Convert those offsets to codepoint offsets.
This commit is contained in:
micbou 2017-06-17 23:29:13 +02:00
parent b20809332c
commit eb3c0cd8c1
No known key found for this signature in database
GPG Key ID: C7E8FD1F3BDA1E05
3 changed files with 51 additions and 7 deletions

View File

@ -65,11 +65,10 @@ def CompletionStartColumn():
def CurrentIdentifierFinished(): def CurrentIdentifierFinished():
current_column = vimsupport.CurrentColumn() line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
previous_char_index = current_column - 1 previous_char_index = current_column - 1
if previous_char_index < 0: if previous_char_index < 0:
return True return True
line = vimsupport.CurrentLineContents()
filetype = vimsupport.CurrentFiletypes()[ 0 ] filetype = vimsupport.CurrentFiletypes()[ 0 ]
regex = identifier_utils.IdentifierRegexForFiletype( filetype ) regex = identifier_utils.IdentifierRegexForFiletype( filetype )
@ -82,10 +81,9 @@ def CurrentIdentifierFinished():
def LastEnteredCharIsIdentifierChar(): def LastEnteredCharIsIdentifierChar():
current_column = vimsupport.CurrentColumn() line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
if current_column - 1 < 0: if current_column - 1 < 0:
return False return False
line = vimsupport.CurrentLineContents()
filetype = vimsupport.CurrentFiletypes()[ 0 ] filetype = vimsupport.CurrentFiletypes()[ 0 ]
return ( return (
identifier_utils.StartOfLongestIdentifierEndingAtIndex( identifier_utils.StartOfLongestIdentifierEndingAtIndex(

View File

@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test():
ok_( not base.LastEnteredCharIsIdentifierChar() ) ok_( not base.LastEnteredCharIsIdentifierChar() )
def LastEnteredCharIsIdentifierChar_Unicode_test():
with MockCurrentFiletypes():
# CurrentColumn returns a byte offset and character ø is 2 bytes length.
with MockCurrentColumnAndLineContents( 5, 'føo(' ):
ok_( not base.LastEnteredCharIsIdentifierChar() )
with MockCurrentColumnAndLineContents( 4, 'føo(' ):
ok_( base.LastEnteredCharIsIdentifierChar() )
with MockCurrentColumnAndLineContents( 3, 'føo(' ):
ok_( base.LastEnteredCharIsIdentifierChar() )
with MockCurrentColumnAndLineContents( 1, 'føo(' ):
ok_( base.LastEnteredCharIsIdentifierChar() )
def CurrentIdentifierFinished_Basic_test(): def CurrentIdentifierFinished_Basic_test():
with MockCurrentFiletypes(): with MockCurrentFiletypes():
with MockCurrentColumnAndLineContents( 3, 'ab;' ): with MockCurrentColumnAndLineContents( 3, 'ab;' ):
@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test():
def CurrentIdentifierFinished_InvalidColumn_test(): def CurrentIdentifierFinished_InvalidColumn_test():
with MockCurrentFiletypes(): with MockCurrentFiletypes():
with MockCurrentColumnAndLineContents( 5, '' ): with MockCurrentColumnAndLineContents( 5, '' ):
ok_( not base.CurrentIdentifierFinished() ) ok_( base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 5, 'abc' ): with MockCurrentColumnAndLineContents( 5, 'abc' ):
ok_( not base.CurrentIdentifierFinished() ) ok_( not base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 4, 'ab;' ):
ok_( base.CurrentIdentifierFinished() )
def CurrentIdentifierFinished_InMiddleOfLine_test(): def CurrentIdentifierFinished_InMiddleOfLine_test():
with MockCurrentFiletypes(): with MockCurrentFiletypes():
@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test():
with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ): with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
ok_( base.CurrentIdentifierFinished() ) ok_( base.CurrentIdentifierFinished() )
def CurrentIdentifierFinished_Unicode_test():
with MockCurrentFiletypes():
# CurrentColumn returns a byte offset and character ø is 2 bytes length.
with MockCurrentColumnAndLineContents( 6, 'føo ' ):
ok_( base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 5, 'føo ' ):
ok_( base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 4, 'føo ' ):
ok_( not base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 3, 'føo ' ):
ok_( not base.CurrentIdentifierFinished() )

View File

@ -29,8 +29,8 @@ import os
import json import json
import re import re
from collections import defaultdict from collections import defaultdict
from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes, from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory,
ToUnicode ) JoinLinesAsUnicode, ToBytes, ToUnicode )
from ycmd import user_options_store from ycmd import user_options_store
BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit', BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit',
@ -73,6 +73,17 @@ def CurrentLineContents():
return ToUnicode( vim.current.line ) return ToUnicode( vim.current.line )
def CurrentLineContentsAndCodepointColumn():
"""Returns the line contents as a unicode string and the 0-based current
column as a codepoint offset. If the current column is outside the line,
returns the column position at the end of the line."""
line = CurrentLineContents()
byte_column = CurrentColumn()
# ByteOffsetToCodepointOffset expects 1-based offset.
column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1
return line, column
def TextAfterCursor(): def TextAfterCursor():
"""Returns the text after CurrentColumn.""" """Returns the text after CurrentColumn."""
return ToUnicode( vim.current.line[ CurrentColumn(): ] ) return ToUnicode( vim.current.line[ CurrentColumn(): ] )