Use codepoint offsets in identifier functions

CurrentIdentifierFinished and LastEnteredCharIsIdentifierChar incorrectly use byte offsets with unicode lines. Convert those offsets to codepoint offsets.
2017-06-17 23:29:13 +02:00 · 2017-06-17 23:29:13 +02:00 · eb3c0cd8c1
commit eb3c0cd8c1
parent b20809332c
3 changed files with 51 additions and 7 deletions
--- a/python/ycm/base.py
+++ b/python/ycm/base.py
@ -65,11 +65,10 @@ def CompletionStartColumn():
 def CurrentIdentifierFinished():
-  current_column = vimsupport.CurrentColumn()
+  line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
  previous_char_index = current_column - 1
  if previous_char_index < 0:
    return True
  line = vimsupport.CurrentLineContents()
  filetype = vimsupport.CurrentFiletypes()[ 0 ]
  regex = identifier_utils.IdentifierRegexForFiletype( filetype )
@ -82,10 +81,9 @@ def CurrentIdentifierFinished():
 def LastEnteredCharIsIdentifierChar():
-  current_column = vimsupport.CurrentColumn()
+  line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
  if current_column - 1 < 0:
    return False
  line = vimsupport.CurrentLineContents()
  filetype = vimsupport.CurrentFiletypes()[ 0 ]
  return (
    identifier_utils.StartOfLongestIdentifierEndingAtIndex(
--- a/python/ycm/tests/base_test.py
+++ b/python/ycm/tests/base_test.py
@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test():
      ok_( not base.LastEnteredCharIsIdentifierChar() )
 def LastEnteredCharIsIdentifierChar_Unicode_test():
  with MockCurrentFiletypes():
    # CurrentColumn returns a byte offset and character ø is 2 bytes length.
    with MockCurrentColumnAndLineContents( 5, 'føo(' ):
      ok_( not base.LastEnteredCharIsIdentifierChar() )
    with MockCurrentColumnAndLineContents( 4, 'føo(' ):
      ok_( base.LastEnteredCharIsIdentifierChar() )
    with MockCurrentColumnAndLineContents( 3, 'føo(' ):
      ok_( base.LastEnteredCharIsIdentifierChar() )
    with MockCurrentColumnAndLineContents( 1, 'føo(' ):
      ok_( base.LastEnteredCharIsIdentifierChar() )
 def CurrentIdentifierFinished_Basic_test():
  with MockCurrentFiletypes():
    with MockCurrentColumnAndLineContents( 3, 'ab;' ):
@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test():
 def CurrentIdentifierFinished_InvalidColumn_test():
  with MockCurrentFiletypes():
    with MockCurrentColumnAndLineContents( 5, '' ):
-      ok_( not base.CurrentIdentifierFinished() )
+      ok_( base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 5, 'abc' ):
      ok_( not base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 4, 'ab;' ):
      ok_( base.CurrentIdentifierFinished() )
 def CurrentIdentifierFinished_InMiddleOfLine_test():
  with MockCurrentFiletypes():
@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test():
    with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
      ok_( base.CurrentIdentifierFinished() )
 def CurrentIdentifierFinished_Unicode_test():
  with MockCurrentFiletypes():
    # CurrentColumn returns a byte offset and character ø is 2 bytes length.
    with MockCurrentColumnAndLineContents( 6, 'føo ' ):
      ok_( base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 5, 'føo ' ):
      ok_( base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 4, 'føo ' ):
      ok_( not base.CurrentIdentifierFinished() )
    with MockCurrentColumnAndLineContents( 3, 'føo ' ):
      ok_( not base.CurrentIdentifierFinished() )
--- a/python/ycm/vimsupport.py
+++ b/python/ycm/vimsupport.py
@ -29,8 +29,8 @@ import os
 import json
 import re
 from collections import defaultdict
-from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes,
+from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory,
-                         ToUnicode )
+                         JoinLinesAsUnicode, ToBytes, ToUnicode )
 from ycmd import user_options_store
 BUFFER_COMMAND_MAP = { 'same-buffer'      : 'edit',
@ -73,6 +73,17 @@ def CurrentLineContents():
  return ToUnicode( vim.current.line )
 def CurrentLineContentsAndCodepointColumn():
  """Returns the line contents as a unicode string and the 0-based current
  column as a codepoint offset. If the current column is outside the line,
  returns the column position at the end of the line."""
  line = CurrentLineContents()
  byte_column = CurrentColumn()
  # ByteOffsetToCodepointOffset expects 1-based offset.
  column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1
  return line, column
 def TextAfterCursor():
  """Returns the text after CurrentColumn."""
  return ToUnicode( vim.current.line[ CurrentColumn(): ] )