Auto merge of #2690 - micbou:identifier-functions-codepoint, r=Valloric

[READY] Use codepoint offsets in identifier functions `CurrentIdentifierFinished` and `LastEnteredCharIsIdentifierChar` incorrectly use byte offsets with unicode lines (`CurrentColumn` returns a byte offset and `CurrentLineContents` a unicode string). This leads to weird bugs when there is a non-ASCII character on the current line: ![unicode-identifier-bug](https://user-images.githubusercontent.com/10026824/27256590-34b27c8c-53ba-11e7-8032-b98f0c7e0b14.gif) This is fixed by converting byte offsets to codepoint ones through the `ByteOffsetToCodepointOffset` function. This changes the behavior of these two functions when the current column position is invalid. Both functions returned false in that case. They now return as if the current column were at the end of the line. In practice, this doesn't really matter since the position of the current column should always be valid.  --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/valloric/youcompleteme/2690)
2017-06-18 12:29:13 -07:00 · 2017-06-18 12:29:13 -07:00 · 44d4f1d299
commit 44d4f1d299
parent 95ebf8666b eb3c0cd8c1
3 changed files with 51 additions and 7 deletions
--- a/python/ycm/base.py
+++ b/python/ycm/base.py
@ -65,11 +65,10 @@ def CompletionStartColumn():
 def CurrentIdentifierFinished():
-  current_column = vimsupport.CurrentColumn()
+  line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
  previous_char_index = current_column - 1
  if previous_char_index < 0:
    return True
  line = vimsupport.CurrentLineContents()
  filetype = vimsupport.CurrentFiletypes()[ 0 ]
  regex = identifier_utils.IdentifierRegexForFiletype( filetype )
@ -82,10 +81,9 @@ def CurrentIdentifierFinished():
 def LastEnteredCharIsIdentifierChar():
-  current_column = vimsupport.CurrentColumn()
+  line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
  if current_column - 1 < 0:
    return False
  line = vimsupport.CurrentLineContents()
  filetype = vimsupport.CurrentFiletypes()[ 0 ]
  return (
    identifier_utils.StartOfLongestIdentifierEndingAtIndex(
--- a/python/ycm/tests/base_test.py
+++ b/python/ycm/tests/base_test.py
@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test():
      ok_( not base.LastEnteredCharIsIdentifierChar() )
 def LastEnteredCharIsIdentifierChar_Unicode_test():
  with MockCurrentFiletypes():
    # CurrentColumn returns a byte offset and character ø is 2 bytes length.
    with MockCurrentColumnAndLineContents( 5, 'føo(' ):
      ok_( not base.LastEnteredCharIsIdentifierChar() )
    with MockCurrentColumnAndLineContents( 4, 'føo(' ):
      ok_( base.LastEnteredCharIsIdentifierChar() )
    with MockCurrentColumnAndLineContents( 3, 'føo(' ):
      ok_( base.LastEnteredCharIsIdentifierChar() )
    with MockCurrentColumnAndLineContents( 1, 'føo(' ):
      ok_( base.LastEnteredCharIsIdentifierChar() )
 def CurrentIdentifierFinished_Basic_test():
  with MockCurrentFiletypes():
    with MockCurrentColumnAndLineContents( 3, 'ab;' ):
@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test():
 def CurrentIdentifierFinished_InvalidColumn_test():
  with MockCurrentFiletypes():
    with MockCurrentColumnAndLineContents( 5, '' ):
-      ok_( not base.CurrentIdentifierFinished() )
+      ok_( base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 5, 'abc' ):
      ok_( not base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 4, 'ab;' ):
      ok_( base.CurrentIdentifierFinished() )
 def CurrentIdentifierFinished_InMiddleOfLine_test():
  with MockCurrentFiletypes():
@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test():
    with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
      ok_( base.CurrentIdentifierFinished() )
 def CurrentIdentifierFinished_Unicode_test():
  with MockCurrentFiletypes():
    # CurrentColumn returns a byte offset and character ø is 2 bytes length.
    with MockCurrentColumnAndLineContents( 6, 'føo ' ):
      ok_( base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 5, 'føo ' ):
      ok_( base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 4, 'føo ' ):
      ok_( not base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 3, 'føo ' ):
      ok_( not base.CurrentIdentifierFinished() )
--- a/python/ycm/vimsupport.py
+++ b/python/ycm/vimsupport.py
@ -29,8 +29,8 @@ import os
 import json
 import re
 from collections import defaultdict
-from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes,
+from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory,
-                         ToUnicode )
+                         JoinLinesAsUnicode, ToBytes, ToUnicode )
 from ycmd import user_options_store
 BUFFER_COMMAND_MAP = { 'same-buffer'      : 'edit',
@ -73,6 +73,17 @@ def CurrentLineContents():
  return ToUnicode( vim.current.line )
 def CurrentLineContentsAndCodepointColumn():
  """Returns the line contents as a unicode string and the 0-based current
  column as a codepoint offset. If the current column is outside the line,
  returns the column position at the end of the line."""
  line = CurrentLineContents()
  byte_column = CurrentColumn()
  # ByteOffsetToCodepointOffset expects 1-based offset.
  column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1
  return line, column
 def TextAfterCursor():
  """Returns the text after CurrentColumn."""
  return ToUnicode( vim.current.line[ CurrentColumn(): ] )