New sorting rule: char match index sum

The point is that we want to prefer candidates that have the query characters "earlier" in their text, e.g. "xxabcxxx" over "xxxxxabc" for "abc" query.
2012-06-24 15:35:00 -07:00 · 2012-06-24 15:35:00 -07:00 · 9b875ca7f3
commit 9b875ca7f3
parent adb31abef2
6 changed files with 80 additions and 14 deletions
--- a/cpp/ycm/Candidate.cpp
+++ b/cpp/ycm/Candidate.cpp
@ -76,6 +76,7 @@ Candidate::Candidate( const std::string &text )
 Result Candidate::QueryMatchResult( const std::string &query ) const
 {
  LetterNode *node = root_node_.get();
+  int index_sum = 0;

  foreach ( char letter, query )
  {
@ -85,10 +86,11 @@ Result Candidate::QueryMatchResult( const std::string &query ) const
      return Result( false );

    node = list->front();
+    index_sum += node->Index();
  }

-  return Result( true, &text_, text_is_lowercase_, word_boundary_chars_,
-                 query );
+  return Result( true, &text_, text_is_lowercase_, index_sum,
+                 word_boundary_chars_, query );
 }

 } // namespace YouCompleteMe
--- a/cpp/ycm/LetterNode.cpp
+++ b/cpp/ycm/LetterNode.cpp
@ -22,24 +22,26 @@
 namespace YouCompleteMe
 {

-LetterNode::LetterNode( char letter )
+LetterNode::LetterNode( char letter, int index )
+  : is_uppercase_( IsUppercase( letter ) ),
+    is_root_node_( false ),
+    index_( index )
 {
-  is_uppercase = IsUppercase( letter );
-  is_root_node = false;
 }


+// TODO: this class needs tests
 LetterNode::LetterNode( const std::string &text )
+  : is_uppercase_( false ),
+    is_root_node_( true ),
+    index_( -1 )
 {
-  is_uppercase = false;
-  is_root_node = true;
-
  letternode_per_text_index_.resize( text.size() );

  for ( uint i = 0; i < text.size(); ++i)
  {
    char letter = text[ i ];
-    LetterNode *node = new LetterNode( letter );
+    LetterNode *node = new LetterNode( letter, i );
    letters_[ letter ].push_back( node );
    letternode_per_text_index_[ i ] = boost::shared_ptr< LetterNode >( node );
  }
--- a/cpp/ycm/LetterNode.h
+++ b/cpp/ycm/LetterNode.h
@ -34,14 +34,14 @@ namespace YouCompleteMe
 class LetterNode : boost::noncopyable
 {
 public:
-	explicit LetterNode( char letter );
+	explicit LetterNode( char letter, int index );

  // this is for root nodes
 	explicit LetterNode( const std::string &text );

  inline bool LetterIsUppercase() const
  {
-    return is_uppercase;
+    return is_uppercase_;
  }


@ -56,13 +56,19 @@ public:
    letters_[ letter ].push_front( node );
  }

+  inline int Index()
+  {
+    return index_;
+  }
+
 private:

  // TODO: rename LetterHash to LetterNodeListHash or LetterNodeListDict/Map?
  LetterHash letters_;
  std::vector< boost::shared_ptr< LetterNode > > letternode_per_text_index_;
-	bool is_uppercase;
-	bool is_root_node;
+	bool is_uppercase_;
+	bool is_root_node_;
+	int index_;
 };

 } // namespace YouCompleteMe
--- a/cpp/ycm/Result.cpp
+++ b/cpp/ycm/Result.cpp
@ -54,6 +54,7 @@ Result::Result( bool is_subsequence )
  word_boundary_char_utilization_( 0 ),
  query_is_candidate_prefix_( false ),
  text_is_lowercase_( false ),
+  char_match_index_sum_( 0 ),
  text_( NULL )
 {
 }
@ -61,6 +62,7 @@ Result::Result( bool is_subsequence )
 Result::Result( bool is_subsequence,
                const std::string *text,
                bool text_is_lowercase,
+                int char_match_index_sum,
                const std::string &word_boundary_chars,
                const std::string &query )
  :
@ -70,6 +72,7 @@ Result::Result( bool is_subsequence,
  word_boundary_char_utilization_( 0 ),
  query_is_candidate_prefix_( false ),
  text_is_lowercase_( text_is_lowercase ),
+  char_match_index_sum_( char_match_index_sum ),
  text_( text )
 {
  if ( is_subsequence )
@ -77,6 +80,7 @@ Result::Result( bool is_subsequence,
 }


+// TODO: do we need a custom copy ctor?
 Result::Result( const Result& other )
  :
  is_subsequence_( other.is_subsequence_ ),
@ -87,6 +91,7 @@ Result::Result( const Result& other )
  word_boundary_char_utilization_( other.word_boundary_char_utilization_ ),
  query_is_candidate_prefix_( other.query_is_candidate_prefix_ ),
  text_is_lowercase_( other.text_is_lowercase_ ),
+  char_match_index_sum_( other.char_match_index_sum_ ),
  text_( other.text_ )
 {
 }
@ -144,6 +149,9 @@ bool Result::operator< ( const Result &other ) const {
        other.word_boundary_char_utilization_;
  }

+  if ( char_match_index_sum_ != other.char_match_index_sum_ )
+    return char_match_index_sum_ < other.char_match_index_sum_;
+
  if ( text_->length() != other.text_->length() )
    return text_->length() < other.text_->length();

--- a/cpp/ycm/Result.h
+++ b/cpp/ycm/Result.h
@ -31,6 +31,7 @@ public:
  Result( bool is_subsequence,
          const std::string *text,
          bool text_is_lowercase,
+          int char_match_index_sum,
          const std::string &word_boundary_chars,
          const std::string &query );

@ -54,7 +55,13 @@ private:
      const std::string &word_boundary_chars );


+  // true when the characters of the query are a subsequence of the characters
+  // in the candidate text, e.g. the characters "abc" are a subsequence for
+  // "xxaygbefc" but not for "axxcb" since they occur in the correct order ('a'
+  // then 'b' then 'c') in the first string but not in the second.
  bool is_subsequence_;
+
+  // true when the first character of the query and the candidate match
  bool first_char_same_in_query_and_text_;

  // number of word boundary matches / number of chars in query
@ -62,8 +69,21 @@ private:

  // number of word boundary matches / number of all word boundary chars
  double word_boundary_char_utilization_;
+
+  // true when the query is a prefix of the candidate string, e.g. "foo" query
+  // for "foobar" candidate.
  bool query_is_candidate_prefix_;
+
+  // true when the candidate text is all lowercase, e.g. "foo" candidate.
  bool text_is_lowercase_;
+
+  // The sum of the indexes of all the letters the query "hit" in the candidate
+  // text. For instance, the result for the query "abc" in the candidate
+  // "012a45bc8" has char_match_index_sum of 3 + 6 + 7 = 16 because those are
+  // the char indexes of those letters in the candidate string.
+  int char_match_index_sum_;
+
+  // points to the full candidate text
  const std::string *text_;

 };
--- a/cpp/ycm/tests/Completer_test.cpp
+++ b/cpp/ycm/tests/Completer_test.cpp
@ -150,19 +150,47 @@ TEST( CompleterTest, QueryPrefixOfCandidateWins )
 	                          "fbaroo" ) );
 }

-TEST( CompleterTest, ShorterCandidateWins )
+TEST( CompleterTest, LowerMatchCharIndexSumWins )
 {
+	EXPECT_THAT( Completer( Candidates(
+              "ratio_of_word_boundary_chars_in_query_",
+              "first_char_same_in_query_and_text_") )
+                 .CandidatesForQuery( "charinq" ),
+              ElementsAre( "first_char_same_in_query_and_text_",
+                           "ratio_of_word_boundary_chars_in_query_") );
+
+	EXPECT_THAT( Completer( Candidates(
+               "barfooq",
+               "barquxfoo" ) ).CandidatesForQuery( "foo" ),
+	             ElementsAre( "barfooq",
+                            "barquxfoo") );
+
+	EXPECT_THAT( Completer( Candidates(
+               "xxxxxxabc",
+               "xxabcxxxx" ) ).CandidatesForQuery( "abc" ),
+	             ElementsAre( "xxabcxxxx",
+                            "xxxxxxabc") );
+
 	EXPECT_THAT( Completer( Candidates(
               "FooBarQux",
               "FaBarQux" ) ).CandidatesForQuery( "fbq" ),
 	             ElementsAre( "FaBarQux",
 	                          "FooBarQux" ) );
+}

+TEST( CompleterTest, ShorterCandidateWins )
+{
 	EXPECT_THAT( Completer( Candidates(
               "CompleterT",
               "CompleterTest" ) ).CandidatesForQuery( "co" ),
 	             ElementsAre( "CompleterT",
 	                          "CompleterTest" ) );
+
+	EXPECT_THAT( Completer( Candidates(
+               "CompleterT",
+               "CompleterTest" ) ).CandidatesForQuery( "plet" ),
+	             ElementsAre( "CompleterT",
+	                          "CompleterTest" ) );
 }

 TEST( CompleterTest, SameLowercaseCandidateWins )