New sorting rule: char match index sum

The point is that we want to prefer candidates that have the query characters
"earlier" in their text, e.g. "xxabcxxx" over "xxxxxabc" for "abc" query.
This commit is contained in:
Strahinja Val Markovic 2012-06-24 15:35:00 -07:00
parent adb31abef2
commit 9b875ca7f3
6 changed files with 80 additions and 14 deletions

View File

@ -76,6 +76,7 @@ Candidate::Candidate( const std::string &text )
Result Candidate::QueryMatchResult( const std::string &query ) const
{
LetterNode *node = root_node_.get();
int index_sum = 0;
foreach ( char letter, query )
{
@ -85,10 +86,11 @@ Result Candidate::QueryMatchResult( const std::string &query ) const
return Result( false );
node = list->front();
index_sum += node->Index();
}
return Result( true, &text_, text_is_lowercase_, word_boundary_chars_,
query );
return Result( true, &text_, text_is_lowercase_, index_sum,
word_boundary_chars_, query );
}
} // namespace YouCompleteMe

View File

@ -22,24 +22,26 @@
namespace YouCompleteMe
{
LetterNode::LetterNode( char letter )
LetterNode::LetterNode( char letter, int index )
: is_uppercase_( IsUppercase( letter ) ),
is_root_node_( false ),
index_( index )
{
is_uppercase = IsUppercase( letter );
is_root_node = false;
}
// TODO: this class needs tests
LetterNode::LetterNode( const std::string &text )
: is_uppercase_( false ),
is_root_node_( true ),
index_( -1 )
{
is_uppercase = false;
is_root_node = true;
letternode_per_text_index_.resize( text.size() );
for ( uint i = 0; i < text.size(); ++i)
{
char letter = text[ i ];
LetterNode *node = new LetterNode( letter );
LetterNode *node = new LetterNode( letter, i );
letters_[ letter ].push_back( node );
letternode_per_text_index_[ i ] = boost::shared_ptr< LetterNode >( node );
}

View File

@ -34,14 +34,14 @@ namespace YouCompleteMe
class LetterNode : boost::noncopyable
{
public:
explicit LetterNode( char letter );
explicit LetterNode( char letter, int index );
// this is for root nodes
explicit LetterNode( const std::string &text );
inline bool LetterIsUppercase() const
{
return is_uppercase;
return is_uppercase_;
}
@ -56,13 +56,19 @@ public:
letters_[ letter ].push_front( node );
}
inline int Index()
{
return index_;
}
private:
// TODO: rename LetterHash to LetterNodeListHash or LetterNodeListDict/Map?
LetterHash letters_;
std::vector< boost::shared_ptr< LetterNode > > letternode_per_text_index_;
bool is_uppercase;
bool is_root_node;
bool is_uppercase_;
bool is_root_node_;
int index_;
};
} // namespace YouCompleteMe

View File

@ -54,6 +54,7 @@ Result::Result( bool is_subsequence )
word_boundary_char_utilization_( 0 ),
query_is_candidate_prefix_( false ),
text_is_lowercase_( false ),
char_match_index_sum_( 0 ),
text_( NULL )
{
}
@ -61,6 +62,7 @@ Result::Result( bool is_subsequence )
Result::Result( bool is_subsequence,
const std::string *text,
bool text_is_lowercase,
int char_match_index_sum,
const std::string &word_boundary_chars,
const std::string &query )
:
@ -70,6 +72,7 @@ Result::Result( bool is_subsequence,
word_boundary_char_utilization_( 0 ),
query_is_candidate_prefix_( false ),
text_is_lowercase_( text_is_lowercase ),
char_match_index_sum_( char_match_index_sum ),
text_( text )
{
if ( is_subsequence )
@ -77,6 +80,7 @@ Result::Result( bool is_subsequence,
}
// TODO: do we need a custom copy ctor?
Result::Result( const Result& other )
:
is_subsequence_( other.is_subsequence_ ),
@ -87,6 +91,7 @@ Result::Result( const Result& other )
word_boundary_char_utilization_( other.word_boundary_char_utilization_ ),
query_is_candidate_prefix_( other.query_is_candidate_prefix_ ),
text_is_lowercase_( other.text_is_lowercase_ ),
char_match_index_sum_( other.char_match_index_sum_ ),
text_( other.text_ )
{
}
@ -144,6 +149,9 @@ bool Result::operator< ( const Result &other ) const {
other.word_boundary_char_utilization_;
}
if ( char_match_index_sum_ != other.char_match_index_sum_ )
return char_match_index_sum_ < other.char_match_index_sum_;
if ( text_->length() != other.text_->length() )
return text_->length() < other.text_->length();

View File

@ -31,6 +31,7 @@ public:
Result( bool is_subsequence,
const std::string *text,
bool text_is_lowercase,
int char_match_index_sum,
const std::string &word_boundary_chars,
const std::string &query );
@ -54,7 +55,13 @@ private:
const std::string &word_boundary_chars );
// true when the characters of the query are a subsequence of the characters
// in the candidate text, e.g. the characters "abc" are a subsequence for
// "xxaygbefc" but not for "axxcb" since they occur in the correct order ('a'
// then 'b' then 'c') in the first string but not in the second.
bool is_subsequence_;
// true when the first character of the query and the candidate match
bool first_char_same_in_query_and_text_;
// number of word boundary matches / number of chars in query
@ -62,8 +69,21 @@ private:
// number of word boundary matches / number of all word boundary chars
double word_boundary_char_utilization_;
// true when the query is a prefix of the candidate string, e.g. "foo" query
// for "foobar" candidate.
bool query_is_candidate_prefix_;
// true when the candidate text is all lowercase, e.g. "foo" candidate.
bool text_is_lowercase_;
// The sum of the indexes of all the letters the query "hit" in the candidate
// text. For instance, the result for the query "abc" in the candidate
// "012a45bc8" has char_match_index_sum of 3 + 6 + 7 = 16 because those are
// the char indexes of those letters in the candidate string.
int char_match_index_sum_;
// points to the full candidate text
const std::string *text_;
};

View File

@ -150,19 +150,47 @@ TEST( CompleterTest, QueryPrefixOfCandidateWins )
"fbaroo" ) );
}
TEST( CompleterTest, ShorterCandidateWins )
TEST( CompleterTest, LowerMatchCharIndexSumWins )
{
EXPECT_THAT( Completer( Candidates(
"ratio_of_word_boundary_chars_in_query_",
"first_char_same_in_query_and_text_") )
.CandidatesForQuery( "charinq" ),
ElementsAre( "first_char_same_in_query_and_text_",
"ratio_of_word_boundary_chars_in_query_") );
EXPECT_THAT( Completer( Candidates(
"barfooq",
"barquxfoo" ) ).CandidatesForQuery( "foo" ),
ElementsAre( "barfooq",
"barquxfoo") );
EXPECT_THAT( Completer( Candidates(
"xxxxxxabc",
"xxabcxxxx" ) ).CandidatesForQuery( "abc" ),
ElementsAre( "xxabcxxxx",
"xxxxxxabc") );
EXPECT_THAT( Completer( Candidates(
"FooBarQux",
"FaBarQux" ) ).CandidatesForQuery( "fbq" ),
ElementsAre( "FaBarQux",
"FooBarQux" ) );
}
TEST( CompleterTest, ShorterCandidateWins )
{
EXPECT_THAT( Completer( Candidates(
"CompleterT",
"CompleterTest" ) ).CandidatesForQuery( "co" ),
ElementsAre( "CompleterT",
"CompleterTest" ) );
EXPECT_THAT( Completer( Candidates(
"CompleterT",
"CompleterTest" ) ).CandidatesForQuery( "plet" ),
ElementsAre( "CompleterT",
"CompleterTest" ) );
}
TEST( CompleterTest, SameLowercaseCandidateWins )