Auto merge of #2690 - micbou:identifier-functions-codepoint, r=Valloric

[READY] Use codepoint offsets in identifier functions

`CurrentIdentifierFinished` and `LastEnteredCharIsIdentifierChar` incorrectly use byte offsets with unicode lines (`CurrentColumn` returns a byte offset and `CurrentLineContents` a unicode string). This leads to weird bugs when there is a non-ASCII character on the current line:

![unicode-identifier-bug](https://user-images.githubusercontent.com/10026824/27256590-34b27c8c-53ba-11e7-8032-b98f0c7e0b14.gif)

This is fixed by converting byte offsets to codepoint ones through the `ByteOffsetToCodepointOffset` function.

This changes the behavior of these two functions when the current column position is invalid. Both functions returned false in that case. They now return as if the current column were at the end of the line. In practice, this doesn't really matter since the position of the current column should always be valid.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/valloric/youcompleteme/2690)
<!-- Reviewable:end -->
This commit is contained in:
zzbot 2017-06-18 12:29:13 -07:00 committed by GitHub
commit 44d4f1d299
3 changed files with 51 additions and 7 deletions

View File

@ -65,11 +65,10 @@ def CompletionStartColumn():
def CurrentIdentifierFinished(): def CurrentIdentifierFinished():
current_column = vimsupport.CurrentColumn() line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
previous_char_index = current_column - 1 previous_char_index = current_column - 1
if previous_char_index < 0: if previous_char_index < 0:
return True return True
line = vimsupport.CurrentLineContents()
filetype = vimsupport.CurrentFiletypes()[ 0 ] filetype = vimsupport.CurrentFiletypes()[ 0 ]
regex = identifier_utils.IdentifierRegexForFiletype( filetype ) regex = identifier_utils.IdentifierRegexForFiletype( filetype )
@ -82,10 +81,9 @@ def CurrentIdentifierFinished():
def LastEnteredCharIsIdentifierChar(): def LastEnteredCharIsIdentifierChar():
current_column = vimsupport.CurrentColumn() line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
if current_column - 1 < 0: if current_column - 1 < 0:
return False return False
line = vimsupport.CurrentLineContents()
filetype = vimsupport.CurrentFiletypes()[ 0 ] filetype = vimsupport.CurrentFiletypes()[ 0 ]
return ( return (
identifier_utils.StartOfLongestIdentifierEndingAtIndex( identifier_utils.StartOfLongestIdentifierEndingAtIndex(

View File

@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test():
ok_( not base.LastEnteredCharIsIdentifierChar() ) ok_( not base.LastEnteredCharIsIdentifierChar() )
def LastEnteredCharIsIdentifierChar_Unicode_test():
with MockCurrentFiletypes():
# CurrentColumn returns a byte offset and character ø is 2 bytes length.
with MockCurrentColumnAndLineContents( 5, 'føo(' ):
ok_( not base.LastEnteredCharIsIdentifierChar() )
with MockCurrentColumnAndLineContents( 4, 'føo(' ):
ok_( base.LastEnteredCharIsIdentifierChar() )
with MockCurrentColumnAndLineContents( 3, 'føo(' ):
ok_( base.LastEnteredCharIsIdentifierChar() )
with MockCurrentColumnAndLineContents( 1, 'føo(' ):
ok_( base.LastEnteredCharIsIdentifierChar() )
def CurrentIdentifierFinished_Basic_test(): def CurrentIdentifierFinished_Basic_test():
with MockCurrentFiletypes(): with MockCurrentFiletypes():
with MockCurrentColumnAndLineContents( 3, 'ab;' ): with MockCurrentColumnAndLineContents( 3, 'ab;' ):
@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test():
def CurrentIdentifierFinished_InvalidColumn_test(): def CurrentIdentifierFinished_InvalidColumn_test():
with MockCurrentFiletypes(): with MockCurrentFiletypes():
with MockCurrentColumnAndLineContents( 5, '' ): with MockCurrentColumnAndLineContents( 5, '' ):
ok_( not base.CurrentIdentifierFinished() ) ok_( base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 5, 'abc' ): with MockCurrentColumnAndLineContents( 5, 'abc' ):
ok_( not base.CurrentIdentifierFinished() ) ok_( not base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 4, 'ab;' ):
ok_( base.CurrentIdentifierFinished() )
def CurrentIdentifierFinished_InMiddleOfLine_test(): def CurrentIdentifierFinished_InMiddleOfLine_test():
with MockCurrentFiletypes(): with MockCurrentFiletypes():
@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test():
with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ): with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
ok_( base.CurrentIdentifierFinished() ) ok_( base.CurrentIdentifierFinished() )
def CurrentIdentifierFinished_Unicode_test():
with MockCurrentFiletypes():
# CurrentColumn returns a byte offset and character ø is 2 bytes length.
with MockCurrentColumnAndLineContents( 6, 'føo ' ):
ok_( base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 5, 'føo ' ):
ok_( base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 4, 'føo ' ):
ok_( not base.CurrentIdentifierFinished() )
with MockCurrentColumnAndLineContents( 3, 'føo ' ):
ok_( not base.CurrentIdentifierFinished() )

View File

@ -29,8 +29,8 @@ import os
import json import json
import re import re
from collections import defaultdict from collections import defaultdict
from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes, from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory,
ToUnicode ) JoinLinesAsUnicode, ToBytes, ToUnicode )
from ycmd import user_options_store from ycmd import user_options_store
BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit', BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit',
@ -73,6 +73,17 @@ def CurrentLineContents():
return ToUnicode( vim.current.line ) return ToUnicode( vim.current.line )
def CurrentLineContentsAndCodepointColumn():
"""Returns the line contents as a unicode string and the 0-based current
column as a codepoint offset. If the current column is outside the line,
returns the column position at the end of the line."""
line = CurrentLineContents()
byte_column = CurrentColumn()
# ByteOffsetToCodepointOffset expects 1-based offset.
column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1
return line, column
def TextAfterCursor(): def TextAfterCursor():
"""Returns the text after CurrentColumn.""" """Returns the text after CurrentColumn."""
return ToUnicode( vim.current.line[ CurrentColumn(): ] ) return ToUnicode( vim.current.line[ CurrentColumn(): ] )