From eb3c0cd8c1aeca6a2587e0ee38fa60d0cce68175 Mon Sep 17 00:00:00 2001 From: micbou Date: Sat, 17 Jun 2017 23:29:13 +0200 Subject: [PATCH] Use codepoint offsets in identifier functions CurrentIdentifierFinished and LastEnteredCharIsIdentifierChar incorrectly use byte offsets with unicode lines. Convert those offsets to codepoint offsets. --- python/ycm/base.py | 6 ++---- python/ycm/tests/base_test.py | 37 ++++++++++++++++++++++++++++++++++- python/ycm/vimsupport.py | 15 ++++++++++++-- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/python/ycm/base.py b/python/ycm/base.py index 4bfb3663..257e09bb 100644 --- a/python/ycm/base.py +++ b/python/ycm/base.py @@ -65,11 +65,10 @@ def CompletionStartColumn(): def CurrentIdentifierFinished(): - current_column = vimsupport.CurrentColumn() + line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn() previous_char_index = current_column - 1 if previous_char_index < 0: return True - line = vimsupport.CurrentLineContents() filetype = vimsupport.CurrentFiletypes()[ 0 ] regex = identifier_utils.IdentifierRegexForFiletype( filetype ) @@ -82,10 +81,9 @@ def CurrentIdentifierFinished(): def LastEnteredCharIsIdentifierChar(): - current_column = vimsupport.CurrentColumn() + line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn() if current_column - 1 < 0: return False - line = vimsupport.CurrentLineContents() filetype = vimsupport.CurrentFiletypes()[ 0 ] return ( identifier_utils.StartOfLongestIdentifierEndingAtIndex( diff --git a/python/ycm/tests/base_test.py b/python/ycm/tests/base_test.py index 3b4fd4f0..b4a775e9 100644 --- a/python/ycm/tests/base_test.py +++ b/python/ycm/tests/base_test.py @@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test(): ok_( not base.LastEnteredCharIsIdentifierChar() ) +def LastEnteredCharIsIdentifierChar_Unicode_test(): + with MockCurrentFiletypes(): + # CurrentColumn returns a byte offset and character ø is 2 bytes length. + with MockCurrentColumnAndLineContents( 5, 'føo(' ): + ok_( not base.LastEnteredCharIsIdentifierChar() ) + + with MockCurrentColumnAndLineContents( 4, 'føo(' ): + ok_( base.LastEnteredCharIsIdentifierChar() ) + + with MockCurrentColumnAndLineContents( 3, 'føo(' ): + ok_( base.LastEnteredCharIsIdentifierChar() ) + + with MockCurrentColumnAndLineContents( 1, 'føo(' ): + ok_( base.LastEnteredCharIsIdentifierChar() ) + + def CurrentIdentifierFinished_Basic_test(): with MockCurrentFiletypes(): with MockCurrentColumnAndLineContents( 3, 'ab;' ): @@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test(): def CurrentIdentifierFinished_InvalidColumn_test(): with MockCurrentFiletypes(): with MockCurrentColumnAndLineContents( 5, '' ): - ok_( not base.CurrentIdentifierFinished() ) + ok_( base.CurrentIdentifierFinished() ) with MockCurrentColumnAndLineContents( 5, 'abc' ): ok_( not base.CurrentIdentifierFinished() ) + with MockCurrentColumnAndLineContents( 4, 'ab;' ): + ok_( base.CurrentIdentifierFinished() ) + def CurrentIdentifierFinished_InMiddleOfLine_test(): with MockCurrentFiletypes(): @@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test(): with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ): ok_( base.CurrentIdentifierFinished() ) + + +def CurrentIdentifierFinished_Unicode_test(): + with MockCurrentFiletypes(): + # CurrentColumn returns a byte offset and character ø is 2 bytes length. + with MockCurrentColumnAndLineContents( 6, 'føo ' ): + ok_( base.CurrentIdentifierFinished() ) + + with MockCurrentColumnAndLineContents( 5, 'føo ' ): + ok_( base.CurrentIdentifierFinished() ) + + with MockCurrentColumnAndLineContents( 4, 'føo ' ): + ok_( not base.CurrentIdentifierFinished() ) + + with MockCurrentColumnAndLineContents( 3, 'føo ' ): + ok_( not base.CurrentIdentifierFinished() ) diff --git a/python/ycm/vimsupport.py b/python/ycm/vimsupport.py index f2d0f0a7..1712ee7a 100644 --- a/python/ycm/vimsupport.py +++ b/python/ycm/vimsupport.py @@ -29,8 +29,8 @@ import os import json import re from collections import defaultdict -from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes, - ToUnicode ) +from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory, + JoinLinesAsUnicode, ToBytes, ToUnicode ) from ycmd import user_options_store BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit', @@ -73,6 +73,17 @@ def CurrentLineContents(): return ToUnicode( vim.current.line ) +def CurrentLineContentsAndCodepointColumn(): + """Returns the line contents as a unicode string and the 0-based current + column as a codepoint offset. If the current column is outside the line, + returns the column position at the end of the line.""" + line = CurrentLineContents() + byte_column = CurrentColumn() + # ByteOffsetToCodepointOffset expects 1-based offset. + column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1 + return line, column + + def TextAfterCursor(): """Returns the text after CurrentColumn.""" return ToUnicode( vim.current.line[ CurrentColumn(): ] )