From 9b875ca7f36372f7f2ac5a54f01ae1a898a5ea11 Mon Sep 17 00:00:00 2001
From: Strahinja Val Markovic <val@markovic.io>
Date: Sun, 24 Jun 2012 15:35:00 -0700
Subject: [PATCH] New sorting rule: char match index sum

The point is that we want to prefer candidates that have the query characters
"earlier" in their text, e.g. "xxabcxxx" over "xxxxxabc" for "abc" query.
---
 cpp/ycm/Candidate.cpp            |  6 ++++--
 cpp/ycm/LetterNode.cpp           | 16 +++++++++-------
 cpp/ycm/LetterNode.h             | 14 ++++++++++----
 cpp/ycm/Result.cpp               |  8 ++++++++
 cpp/ycm/Result.h                 | 20 ++++++++++++++++++++
 cpp/ycm/tests/Completer_test.cpp | 30 +++++++++++++++++++++++++++++-
 6 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/cpp/ycm/Candidate.cpp b/cpp/ycm/Candidate.cpp
index 28d778a4..e4479eaa 100644
--- a/cpp/ycm/Candidate.cpp
+++ b/cpp/ycm/Candidate.cpp
@@ -76,6 +76,7 @@ Candidate::Candidate( const std::string &text )
 Result Candidate::QueryMatchResult( const std::string &query ) const
 {
   LetterNode *node = root_node_.get();
+  int index_sum = 0;
 
   foreach ( char letter, query )
   {
@@ -85,10 +86,11 @@ Result Candidate::QueryMatchResult( const std::string &query ) const
       return Result( false );
 
     node = list->front();
+    index_sum += node->Index();
   }
 
-  return Result( true, &text_, text_is_lowercase_, word_boundary_chars_,
-                 query );
+  return Result( true, &text_, text_is_lowercase_, index_sum,
+                 word_boundary_chars_, query );
 }
 
 } // namespace YouCompleteMe
diff --git a/cpp/ycm/LetterNode.cpp b/cpp/ycm/LetterNode.cpp
index 3a2ba6ed..6fc7ed75 100644
--- a/cpp/ycm/LetterNode.cpp
+++ b/cpp/ycm/LetterNode.cpp
@@ -22,24 +22,26 @@
 namespace YouCompleteMe
 {
 
-LetterNode::LetterNode( char letter )
+LetterNode::LetterNode( char letter, int index )
+  : is_uppercase_( IsUppercase( letter ) ),
+    is_root_node_( false ),
+    index_( index )
 {
-  is_uppercase = IsUppercase( letter );
-  is_root_node = false;
 }
 
 
+// TODO: this class needs tests
 LetterNode::LetterNode( const std::string &text )
+  : is_uppercase_( false ),
+    is_root_node_( true ),
+    index_( -1 )
 {
-  is_uppercase = false;
-  is_root_node = true;
-
   letternode_per_text_index_.resize( text.size() );
 
   for ( uint i = 0; i < text.size(); ++i)
   {
     char letter = text[ i ];
-    LetterNode *node = new LetterNode( letter );
+    LetterNode *node = new LetterNode( letter, i );
     letters_[ letter ].push_back( node );
     letternode_per_text_index_[ i ] = boost::shared_ptr< LetterNode >( node );
   }
diff --git a/cpp/ycm/LetterNode.h b/cpp/ycm/LetterNode.h
index 068dc52a..3091e489 100644
--- a/cpp/ycm/LetterNode.h
+++ b/cpp/ycm/LetterNode.h
@@ -34,14 +34,14 @@ namespace YouCompleteMe
 class LetterNode : boost::noncopyable
 {
 public:
-	explicit LetterNode( char letter );
+	explicit LetterNode( char letter, int index );
 
   // this is for root nodes
 	explicit LetterNode( const std::string &text );
 
   inline bool LetterIsUppercase() const
   {
-    return is_uppercase;
+    return is_uppercase_;
   }
 
 
@@ -56,13 +56,19 @@ public:
     letters_[ letter ].push_front( node );
   }
 
+  inline int Index()
+  {
+    return index_;
+  }
+
 private:
 
   // TODO: rename LetterHash to LetterNodeListHash or LetterNodeListDict/Map?
   LetterHash letters_;
   std::vector< boost::shared_ptr< LetterNode > > letternode_per_text_index_;
-	bool is_uppercase;
-	bool is_root_node;
+	bool is_uppercase_;
+	bool is_root_node_;
+	int index_;
 };
 
 } // namespace YouCompleteMe
diff --git a/cpp/ycm/Result.cpp b/cpp/ycm/Result.cpp
index 340c7f55..891f2084 100644
--- a/cpp/ycm/Result.cpp
+++ b/cpp/ycm/Result.cpp
@@ -54,6 +54,7 @@ Result::Result( bool is_subsequence )
   word_boundary_char_utilization_( 0 ),
   query_is_candidate_prefix_( false ),
   text_is_lowercase_( false ),
+  char_match_index_sum_( 0 ),
   text_( NULL )
 {
 }
@@ -61,6 +62,7 @@ Result::Result( bool is_subsequence )
 Result::Result( bool is_subsequence,
                 const std::string *text,
                 bool text_is_lowercase,
+                int char_match_index_sum,
                 const std::string &word_boundary_chars,
                 const std::string &query )
   :
@@ -70,6 +72,7 @@ Result::Result( bool is_subsequence,
   word_boundary_char_utilization_( 0 ),
   query_is_candidate_prefix_( false ),
   text_is_lowercase_( text_is_lowercase ),
+  char_match_index_sum_( char_match_index_sum ),
   text_( text )
 {
   if ( is_subsequence )
@@ -77,6 +80,7 @@ Result::Result( bool is_subsequence,
 }
 
 
+// TODO: do we need a custom copy ctor?
 Result::Result( const Result& other )
   :
   is_subsequence_( other.is_subsequence_ ),
@@ -87,6 +91,7 @@ Result::Result( const Result& other )
   word_boundary_char_utilization_( other.word_boundary_char_utilization_ ),
   query_is_candidate_prefix_( other.query_is_candidate_prefix_ ),
   text_is_lowercase_( other.text_is_lowercase_ ),
+  char_match_index_sum_( other.char_match_index_sum_ ),
   text_( other.text_ )
 {
 }
@@ -144,6 +149,9 @@ bool Result::operator< ( const Result &other ) const {
         other.word_boundary_char_utilization_;
   }
 
+  if ( char_match_index_sum_ != other.char_match_index_sum_ )
+    return char_match_index_sum_ < other.char_match_index_sum_;
+
   if ( text_->length() != other.text_->length() )
     return text_->length() < other.text_->length();
 
diff --git a/cpp/ycm/Result.h b/cpp/ycm/Result.h
index c991f39f..b05efb16 100644
--- a/cpp/ycm/Result.h
+++ b/cpp/ycm/Result.h
@@ -31,6 +31,7 @@ public:
   Result( bool is_subsequence,
           const std::string *text,
           bool text_is_lowercase,
+          int char_match_index_sum,
           const std::string &word_boundary_chars,
           const std::string &query );
 
@@ -54,7 +55,13 @@ private:
       const std::string &word_boundary_chars );
 
 
+  // true when the characters of the query are a subsequence of the characters
+  // in the candidate text, e.g. the characters "abc" are a subsequence for
+  // "xxaygbefc" but not for "axxcb" since they occur in the correct order ('a'
+  // then 'b' then 'c') in the first string but not in the second.
   bool is_subsequence_;
+
+  // true when the first character of the query and the candidate match
   bool first_char_same_in_query_and_text_;
 
   // number of word boundary matches / number of chars in query
@@ -62,8 +69,21 @@ private:
 
   // number of word boundary matches / number of all word boundary chars
   double word_boundary_char_utilization_;
+
+  // true when the query is a prefix of the candidate string, e.g. "foo" query
+  // for "foobar" candidate.
   bool query_is_candidate_prefix_;
+
+  // true when the candidate text is all lowercase, e.g. "foo" candidate.
   bool text_is_lowercase_;
+
+  // The sum of the indexes of all the letters the query "hit" in the candidate
+  // text. For instance, the result for the query "abc" in the candidate
+  // "012a45bc8" has char_match_index_sum of 3 + 6 + 7 = 16 because those are
+  // the char indexes of those letters in the candidate string.
+  int char_match_index_sum_;
+
+  // points to the full candidate text
   const std::string *text_;
 
 };
diff --git a/cpp/ycm/tests/Completer_test.cpp b/cpp/ycm/tests/Completer_test.cpp
index ae5e5f03..ac9f1c7f 100644
--- a/cpp/ycm/tests/Completer_test.cpp
+++ b/cpp/ycm/tests/Completer_test.cpp
@@ -150,19 +150,47 @@ TEST( CompleterTest, QueryPrefixOfCandidateWins )
 	                          "fbaroo" ) );
 }
 
-TEST( CompleterTest, ShorterCandidateWins )
+TEST( CompleterTest, LowerMatchCharIndexSumWins )
 {
+	EXPECT_THAT( Completer( Candidates(
+              "ratio_of_word_boundary_chars_in_query_",
+              "first_char_same_in_query_and_text_") )
+                 .CandidatesForQuery( "charinq" ),
+              ElementsAre( "first_char_same_in_query_and_text_",
+                           "ratio_of_word_boundary_chars_in_query_") );
+
+	EXPECT_THAT( Completer( Candidates(
+               "barfooq",
+               "barquxfoo" ) ).CandidatesForQuery( "foo" ),
+	             ElementsAre( "barfooq",
+                            "barquxfoo") );
+
+	EXPECT_THAT( Completer( Candidates(
+               "xxxxxxabc",
+               "xxabcxxxx" ) ).CandidatesForQuery( "abc" ),
+	             ElementsAre( "xxabcxxxx",
+                            "xxxxxxabc") );
+
 	EXPECT_THAT( Completer( Candidates(
                "FooBarQux",
                "FaBarQux" ) ).CandidatesForQuery( "fbq" ),
 	             ElementsAre( "FaBarQux",
 	                          "FooBarQux" ) );
+}
 
+TEST( CompleterTest, ShorterCandidateWins )
+{
 	EXPECT_THAT( Completer( Candidates(
                "CompleterT",
                "CompleterTest" ) ).CandidatesForQuery( "co" ),
 	             ElementsAre( "CompleterT",
 	                          "CompleterTest" ) );
+
+	EXPECT_THAT( Completer( Candidates(
+               "CompleterT",
+               "CompleterTest" ) ).CandidatesForQuery( "plet" ),
+	             ElementsAre( "CompleterT",
+	                          "CompleterTest" ) );
 }
 
 TEST( CompleterTest, SameLowercaseCandidateWins )