From 9b875ca7f36372f7f2ac5a54f01ae1a898a5ea11 Mon Sep 17 00:00:00 2001 From: Strahinja Val Markovic Date: Sun, 24 Jun 2012 15:35:00 -0700 Subject: [PATCH] New sorting rule: char match index sum The point is that we want to prefer candidates that have the query characters "earlier" in their text, e.g. "xxabcxxx" over "xxxxxabc" for "abc" query. --- cpp/ycm/Candidate.cpp | 6 ++++-- cpp/ycm/LetterNode.cpp | 16 +++++++++------- cpp/ycm/LetterNode.h | 14 ++++++++++---- cpp/ycm/Result.cpp | 8 ++++++++ cpp/ycm/Result.h | 20 ++++++++++++++++++++ cpp/ycm/tests/Completer_test.cpp | 30 +++++++++++++++++++++++++++++- 6 files changed, 80 insertions(+), 14 deletions(-) diff --git a/cpp/ycm/Candidate.cpp b/cpp/ycm/Candidate.cpp index 28d778a4..e4479eaa 100644 --- a/cpp/ycm/Candidate.cpp +++ b/cpp/ycm/Candidate.cpp @@ -76,6 +76,7 @@ Candidate::Candidate( const std::string &text ) Result Candidate::QueryMatchResult( const std::string &query ) const { LetterNode *node = root_node_.get(); + int index_sum = 0; foreach ( char letter, query ) { @@ -85,10 +86,11 @@ Result Candidate::QueryMatchResult( const std::string &query ) const return Result( false ); node = list->front(); + index_sum += node->Index(); } - return Result( true, &text_, text_is_lowercase_, word_boundary_chars_, - query ); + return Result( true, &text_, text_is_lowercase_, index_sum, + word_boundary_chars_, query ); } } // namespace YouCompleteMe diff --git a/cpp/ycm/LetterNode.cpp b/cpp/ycm/LetterNode.cpp index 3a2ba6ed..6fc7ed75 100644 --- a/cpp/ycm/LetterNode.cpp +++ b/cpp/ycm/LetterNode.cpp @@ -22,24 +22,26 @@ namespace YouCompleteMe { -LetterNode::LetterNode( char letter ) +LetterNode::LetterNode( char letter, int index ) + : is_uppercase_( IsUppercase( letter ) ), + is_root_node_( false ), + index_( index ) { - is_uppercase = IsUppercase( letter ); - is_root_node = false; } +// TODO: this class needs tests LetterNode::LetterNode( const std::string &text ) + : is_uppercase_( false ), + is_root_node_( true ), + index_( -1 ) { - is_uppercase = false; - is_root_node = true; - letternode_per_text_index_.resize( text.size() ); for ( uint i = 0; i < text.size(); ++i) { char letter = text[ i ]; - LetterNode *node = new LetterNode( letter ); + LetterNode *node = new LetterNode( letter, i ); letters_[ letter ].push_back( node ); letternode_per_text_index_[ i ] = boost::shared_ptr< LetterNode >( node ); } diff --git a/cpp/ycm/LetterNode.h b/cpp/ycm/LetterNode.h index 068dc52a..3091e489 100644 --- a/cpp/ycm/LetterNode.h +++ b/cpp/ycm/LetterNode.h @@ -34,14 +34,14 @@ namespace YouCompleteMe class LetterNode : boost::noncopyable { public: - explicit LetterNode( char letter ); + explicit LetterNode( char letter, int index ); // this is for root nodes explicit LetterNode( const std::string &text ); inline bool LetterIsUppercase() const { - return is_uppercase; + return is_uppercase_; } @@ -56,13 +56,19 @@ public: letters_[ letter ].push_front( node ); } + inline int Index() + { + return index_; + } + private: // TODO: rename LetterHash to LetterNodeListHash or LetterNodeListDict/Map? LetterHash letters_; std::vector< boost::shared_ptr< LetterNode > > letternode_per_text_index_; - bool is_uppercase; - bool is_root_node; + bool is_uppercase_; + bool is_root_node_; + int index_; }; } // namespace YouCompleteMe diff --git a/cpp/ycm/Result.cpp b/cpp/ycm/Result.cpp index 340c7f55..891f2084 100644 --- a/cpp/ycm/Result.cpp +++ b/cpp/ycm/Result.cpp @@ -54,6 +54,7 @@ Result::Result( bool is_subsequence ) word_boundary_char_utilization_( 0 ), query_is_candidate_prefix_( false ), text_is_lowercase_( false ), + char_match_index_sum_( 0 ), text_( NULL ) { } @@ -61,6 +62,7 @@ Result::Result( bool is_subsequence ) Result::Result( bool is_subsequence, const std::string *text, bool text_is_lowercase, + int char_match_index_sum, const std::string &word_boundary_chars, const std::string &query ) : @@ -70,6 +72,7 @@ Result::Result( bool is_subsequence, word_boundary_char_utilization_( 0 ), query_is_candidate_prefix_( false ), text_is_lowercase_( text_is_lowercase ), + char_match_index_sum_( char_match_index_sum ), text_( text ) { if ( is_subsequence ) @@ -77,6 +80,7 @@ Result::Result( bool is_subsequence, } +// TODO: do we need a custom copy ctor? Result::Result( const Result& other ) : is_subsequence_( other.is_subsequence_ ), @@ -87,6 +91,7 @@ Result::Result( const Result& other ) word_boundary_char_utilization_( other.word_boundary_char_utilization_ ), query_is_candidate_prefix_( other.query_is_candidate_prefix_ ), text_is_lowercase_( other.text_is_lowercase_ ), + char_match_index_sum_( other.char_match_index_sum_ ), text_( other.text_ ) { } @@ -144,6 +149,9 @@ bool Result::operator< ( const Result &other ) const { other.word_boundary_char_utilization_; } + if ( char_match_index_sum_ != other.char_match_index_sum_ ) + return char_match_index_sum_ < other.char_match_index_sum_; + if ( text_->length() != other.text_->length() ) return text_->length() < other.text_->length(); diff --git a/cpp/ycm/Result.h b/cpp/ycm/Result.h index c991f39f..b05efb16 100644 --- a/cpp/ycm/Result.h +++ b/cpp/ycm/Result.h @@ -31,6 +31,7 @@ public: Result( bool is_subsequence, const std::string *text, bool text_is_lowercase, + int char_match_index_sum, const std::string &word_boundary_chars, const std::string &query ); @@ -54,7 +55,13 @@ private: const std::string &word_boundary_chars ); + // true when the characters of the query are a subsequence of the characters + // in the candidate text, e.g. the characters "abc" are a subsequence for + // "xxaygbefc" but not for "axxcb" since they occur in the correct order ('a' + // then 'b' then 'c') in the first string but not in the second. bool is_subsequence_; + + // true when the first character of the query and the candidate match bool first_char_same_in_query_and_text_; // number of word boundary matches / number of chars in query @@ -62,8 +69,21 @@ private: // number of word boundary matches / number of all word boundary chars double word_boundary_char_utilization_; + + // true when the query is a prefix of the candidate string, e.g. "foo" query + // for "foobar" candidate. bool query_is_candidate_prefix_; + + // true when the candidate text is all lowercase, e.g. "foo" candidate. bool text_is_lowercase_; + + // The sum of the indexes of all the letters the query "hit" in the candidate + // text. For instance, the result for the query "abc" in the candidate + // "012a45bc8" has char_match_index_sum of 3 + 6 + 7 = 16 because those are + // the char indexes of those letters in the candidate string. + int char_match_index_sum_; + + // points to the full candidate text const std::string *text_; }; diff --git a/cpp/ycm/tests/Completer_test.cpp b/cpp/ycm/tests/Completer_test.cpp index ae5e5f03..ac9f1c7f 100644 --- a/cpp/ycm/tests/Completer_test.cpp +++ b/cpp/ycm/tests/Completer_test.cpp @@ -150,19 +150,47 @@ TEST( CompleterTest, QueryPrefixOfCandidateWins ) "fbaroo" ) ); } -TEST( CompleterTest, ShorterCandidateWins ) +TEST( CompleterTest, LowerMatchCharIndexSumWins ) { + EXPECT_THAT( Completer( Candidates( + "ratio_of_word_boundary_chars_in_query_", + "first_char_same_in_query_and_text_") ) + .CandidatesForQuery( "charinq" ), + ElementsAre( "first_char_same_in_query_and_text_", + "ratio_of_word_boundary_chars_in_query_") ); + + EXPECT_THAT( Completer( Candidates( + "barfooq", + "barquxfoo" ) ).CandidatesForQuery( "foo" ), + ElementsAre( "barfooq", + "barquxfoo") ); + + EXPECT_THAT( Completer( Candidates( + "xxxxxxabc", + "xxabcxxxx" ) ).CandidatesForQuery( "abc" ), + ElementsAre( "xxabcxxxx", + "xxxxxxabc") ); + EXPECT_THAT( Completer( Candidates( "FooBarQux", "FaBarQux" ) ).CandidatesForQuery( "fbq" ), ElementsAre( "FaBarQux", "FooBarQux" ) ); +} +TEST( CompleterTest, ShorterCandidateWins ) +{ EXPECT_THAT( Completer( Candidates( "CompleterT", "CompleterTest" ) ).CandidatesForQuery( "co" ), ElementsAre( "CompleterT", "CompleterTest" ) ); + + EXPECT_THAT( Completer( Candidates( + "CompleterT", + "CompleterTest" ) ).CandidatesForQuery( "plet" ), + ElementsAre( "CompleterT", + "CompleterTest" ) ); } TEST( CompleterTest, SameLowercaseCandidateWins )