Auto merge of #2690 - micbou:identifier-functions-codepoint, r=Valloric
[READY] Use codepoint offsets in identifier functions `CurrentIdentifierFinished` and `LastEnteredCharIsIdentifierChar` incorrectly use byte offsets with unicode lines (`CurrentColumn` returns a byte offset and `CurrentLineContents` a unicode string). This leads to weird bugs when there is a non-ASCII character on the current line: ![unicode-identifier-bug](https://user-images.githubusercontent.com/10026824/27256590-34b27c8c-53ba-11e7-8032-b98f0c7e0b14.gif) This is fixed by converting byte offsets to codepoint ones through the `ByteOffsetToCodepointOffset` function. This changes the behavior of these two functions when the current column position is invalid. Both functions returned false in that case. They now return as if the current column were at the end of the line. In practice, this doesn't really matter since the position of the current column should always be valid. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/valloric/youcompleteme/2690) <!-- Reviewable:end -->
This commit is contained in:
commit
44d4f1d299
@ -65,11 +65,10 @@ def CompletionStartColumn():
|
|||||||
|
|
||||||
|
|
||||||
def CurrentIdentifierFinished():
|
def CurrentIdentifierFinished():
|
||||||
current_column = vimsupport.CurrentColumn()
|
line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
|
||||||
previous_char_index = current_column - 1
|
previous_char_index = current_column - 1
|
||||||
if previous_char_index < 0:
|
if previous_char_index < 0:
|
||||||
return True
|
return True
|
||||||
line = vimsupport.CurrentLineContents()
|
|
||||||
filetype = vimsupport.CurrentFiletypes()[ 0 ]
|
filetype = vimsupport.CurrentFiletypes()[ 0 ]
|
||||||
regex = identifier_utils.IdentifierRegexForFiletype( filetype )
|
regex = identifier_utils.IdentifierRegexForFiletype( filetype )
|
||||||
|
|
||||||
@ -82,10 +81,9 @@ def CurrentIdentifierFinished():
|
|||||||
|
|
||||||
|
|
||||||
def LastEnteredCharIsIdentifierChar():
|
def LastEnteredCharIsIdentifierChar():
|
||||||
current_column = vimsupport.CurrentColumn()
|
line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
|
||||||
if current_column - 1 < 0:
|
if current_column - 1 < 0:
|
||||||
return False
|
return False
|
||||||
line = vimsupport.CurrentLineContents()
|
|
||||||
filetype = vimsupport.CurrentFiletypes()[ 0 ]
|
filetype = vimsupport.CurrentFiletypes()[ 0 ]
|
||||||
return (
|
return (
|
||||||
identifier_utils.StartOfLongestIdentifierEndingAtIndex(
|
identifier_utils.StartOfLongestIdentifierEndingAtIndex(
|
||||||
|
@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test():
|
|||||||
ok_( not base.LastEnteredCharIsIdentifierChar() )
|
ok_( not base.LastEnteredCharIsIdentifierChar() )
|
||||||
|
|
||||||
|
|
||||||
|
def LastEnteredCharIsIdentifierChar_Unicode_test():
|
||||||
|
with MockCurrentFiletypes():
|
||||||
|
# CurrentColumn returns a byte offset and character ø is 2 bytes length.
|
||||||
|
with MockCurrentColumnAndLineContents( 5, 'føo(' ):
|
||||||
|
ok_( not base.LastEnteredCharIsIdentifierChar() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 4, 'føo(' ):
|
||||||
|
ok_( base.LastEnteredCharIsIdentifierChar() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 3, 'føo(' ):
|
||||||
|
ok_( base.LastEnteredCharIsIdentifierChar() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 1, 'føo(' ):
|
||||||
|
ok_( base.LastEnteredCharIsIdentifierChar() )
|
||||||
|
|
||||||
|
|
||||||
def CurrentIdentifierFinished_Basic_test():
|
def CurrentIdentifierFinished_Basic_test():
|
||||||
with MockCurrentFiletypes():
|
with MockCurrentFiletypes():
|
||||||
with MockCurrentColumnAndLineContents( 3, 'ab;' ):
|
with MockCurrentColumnAndLineContents( 3, 'ab;' ):
|
||||||
@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test():
|
|||||||
def CurrentIdentifierFinished_InvalidColumn_test():
|
def CurrentIdentifierFinished_InvalidColumn_test():
|
||||||
with MockCurrentFiletypes():
|
with MockCurrentFiletypes():
|
||||||
with MockCurrentColumnAndLineContents( 5, '' ):
|
with MockCurrentColumnAndLineContents( 5, '' ):
|
||||||
ok_( not base.CurrentIdentifierFinished() )
|
ok_( base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
with MockCurrentColumnAndLineContents( 5, 'abc' ):
|
with MockCurrentColumnAndLineContents( 5, 'abc' ):
|
||||||
ok_( not base.CurrentIdentifierFinished() )
|
ok_( not base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 4, 'ab;' ):
|
||||||
|
ok_( base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
|
|
||||||
def CurrentIdentifierFinished_InMiddleOfLine_test():
|
def CurrentIdentifierFinished_InMiddleOfLine_test():
|
||||||
with MockCurrentFiletypes():
|
with MockCurrentFiletypes():
|
||||||
@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test():
|
|||||||
|
|
||||||
with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
|
with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
|
||||||
ok_( base.CurrentIdentifierFinished() )
|
ok_( base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
|
|
||||||
|
def CurrentIdentifierFinished_Unicode_test():
|
||||||
|
with MockCurrentFiletypes():
|
||||||
|
# CurrentColumn returns a byte offset and character ø is 2 bytes length.
|
||||||
|
with MockCurrentColumnAndLineContents( 6, 'føo ' ):
|
||||||
|
ok_( base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 5, 'føo ' ):
|
||||||
|
ok_( base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 4, 'føo ' ):
|
||||||
|
ok_( not base.CurrentIdentifierFinished() )
|
||||||
|
|
||||||
|
with MockCurrentColumnAndLineContents( 3, 'føo ' ):
|
||||||
|
ok_( not base.CurrentIdentifierFinished() )
|
||||||
|
@ -29,8 +29,8 @@ import os
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes,
|
from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory,
|
||||||
ToUnicode )
|
JoinLinesAsUnicode, ToBytes, ToUnicode )
|
||||||
from ycmd import user_options_store
|
from ycmd import user_options_store
|
||||||
|
|
||||||
BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit',
|
BUFFER_COMMAND_MAP = { 'same-buffer' : 'edit',
|
||||||
@ -73,6 +73,17 @@ def CurrentLineContents():
|
|||||||
return ToUnicode( vim.current.line )
|
return ToUnicode( vim.current.line )
|
||||||
|
|
||||||
|
|
||||||
|
def CurrentLineContentsAndCodepointColumn():
|
||||||
|
"""Returns the line contents as a unicode string and the 0-based current
|
||||||
|
column as a codepoint offset. If the current column is outside the line,
|
||||||
|
returns the column position at the end of the line."""
|
||||||
|
line = CurrentLineContents()
|
||||||
|
byte_column = CurrentColumn()
|
||||||
|
# ByteOffsetToCodepointOffset expects 1-based offset.
|
||||||
|
column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1
|
||||||
|
return line, column
|
||||||
|
|
||||||
|
|
||||||
def TextAfterCursor():
|
def TextAfterCursor():
|
||||||
"""Returns the text after CurrentColumn."""
|
"""Returns the text after CurrentColumn."""
|
||||||
return ToUnicode( vim.current.line[ CurrentColumn(): ] )
|
return ToUnicode( vim.current.line[ CurrentColumn(): ] )
|
||||||
|
Loading…
Reference in New Issue
Block a user