New sorting rule: char match index sum
The point is that we want to prefer candidates that have the query characters "earlier" in their text, e.g. "xxabcxxx" over "xxxxxabc" for "abc" query.
This commit is contained in:
parent
adb31abef2
commit
9b875ca7f3
@ -76,6 +76,7 @@ Candidate::Candidate( const std::string &text )
|
|||||||
Result Candidate::QueryMatchResult( const std::string &query ) const
|
Result Candidate::QueryMatchResult( const std::string &query ) const
|
||||||
{
|
{
|
||||||
LetterNode *node = root_node_.get();
|
LetterNode *node = root_node_.get();
|
||||||
|
int index_sum = 0;
|
||||||
|
|
||||||
foreach ( char letter, query )
|
foreach ( char letter, query )
|
||||||
{
|
{
|
||||||
@ -85,10 +86,11 @@ Result Candidate::QueryMatchResult( const std::string &query ) const
|
|||||||
return Result( false );
|
return Result( false );
|
||||||
|
|
||||||
node = list->front();
|
node = list->front();
|
||||||
|
index_sum += node->Index();
|
||||||
}
|
}
|
||||||
|
|
||||||
return Result( true, &text_, text_is_lowercase_, word_boundary_chars_,
|
return Result( true, &text_, text_is_lowercase_, index_sum,
|
||||||
query );
|
word_boundary_chars_, query );
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace YouCompleteMe
|
} // namespace YouCompleteMe
|
||||||
|
@ -22,24 +22,26 @@
|
|||||||
namespace YouCompleteMe
|
namespace YouCompleteMe
|
||||||
{
|
{
|
||||||
|
|
||||||
LetterNode::LetterNode( char letter )
|
LetterNode::LetterNode( char letter, int index )
|
||||||
|
: is_uppercase_( IsUppercase( letter ) ),
|
||||||
|
is_root_node_( false ),
|
||||||
|
index_( index )
|
||||||
{
|
{
|
||||||
is_uppercase = IsUppercase( letter );
|
|
||||||
is_root_node = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: this class needs tests
|
||||||
LetterNode::LetterNode( const std::string &text )
|
LetterNode::LetterNode( const std::string &text )
|
||||||
|
: is_uppercase_( false ),
|
||||||
|
is_root_node_( true ),
|
||||||
|
index_( -1 )
|
||||||
{
|
{
|
||||||
is_uppercase = false;
|
|
||||||
is_root_node = true;
|
|
||||||
|
|
||||||
letternode_per_text_index_.resize( text.size() );
|
letternode_per_text_index_.resize( text.size() );
|
||||||
|
|
||||||
for ( uint i = 0; i < text.size(); ++i)
|
for ( uint i = 0; i < text.size(); ++i)
|
||||||
{
|
{
|
||||||
char letter = text[ i ];
|
char letter = text[ i ];
|
||||||
LetterNode *node = new LetterNode( letter );
|
LetterNode *node = new LetterNode( letter, i );
|
||||||
letters_[ letter ].push_back( node );
|
letters_[ letter ].push_back( node );
|
||||||
letternode_per_text_index_[ i ] = boost::shared_ptr< LetterNode >( node );
|
letternode_per_text_index_[ i ] = boost::shared_ptr< LetterNode >( node );
|
||||||
}
|
}
|
||||||
|
@ -34,14 +34,14 @@ namespace YouCompleteMe
|
|||||||
class LetterNode : boost::noncopyable
|
class LetterNode : boost::noncopyable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit LetterNode( char letter );
|
explicit LetterNode( char letter, int index );
|
||||||
|
|
||||||
// this is for root nodes
|
// this is for root nodes
|
||||||
explicit LetterNode( const std::string &text );
|
explicit LetterNode( const std::string &text );
|
||||||
|
|
||||||
inline bool LetterIsUppercase() const
|
inline bool LetterIsUppercase() const
|
||||||
{
|
{
|
||||||
return is_uppercase;
|
return is_uppercase_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -56,13 +56,19 @@ public:
|
|||||||
letters_[ letter ].push_front( node );
|
letters_[ letter ].push_front( node );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int Index()
|
||||||
|
{
|
||||||
|
return index_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
// TODO: rename LetterHash to LetterNodeListHash or LetterNodeListDict/Map?
|
// TODO: rename LetterHash to LetterNodeListHash or LetterNodeListDict/Map?
|
||||||
LetterHash letters_;
|
LetterHash letters_;
|
||||||
std::vector< boost::shared_ptr< LetterNode > > letternode_per_text_index_;
|
std::vector< boost::shared_ptr< LetterNode > > letternode_per_text_index_;
|
||||||
bool is_uppercase;
|
bool is_uppercase_;
|
||||||
bool is_root_node;
|
bool is_root_node_;
|
||||||
|
int index_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace YouCompleteMe
|
} // namespace YouCompleteMe
|
||||||
|
@ -54,6 +54,7 @@ Result::Result( bool is_subsequence )
|
|||||||
word_boundary_char_utilization_( 0 ),
|
word_boundary_char_utilization_( 0 ),
|
||||||
query_is_candidate_prefix_( false ),
|
query_is_candidate_prefix_( false ),
|
||||||
text_is_lowercase_( false ),
|
text_is_lowercase_( false ),
|
||||||
|
char_match_index_sum_( 0 ),
|
||||||
text_( NULL )
|
text_( NULL )
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -61,6 +62,7 @@ Result::Result( bool is_subsequence )
|
|||||||
Result::Result( bool is_subsequence,
|
Result::Result( bool is_subsequence,
|
||||||
const std::string *text,
|
const std::string *text,
|
||||||
bool text_is_lowercase,
|
bool text_is_lowercase,
|
||||||
|
int char_match_index_sum,
|
||||||
const std::string &word_boundary_chars,
|
const std::string &word_boundary_chars,
|
||||||
const std::string &query )
|
const std::string &query )
|
||||||
:
|
:
|
||||||
@ -70,6 +72,7 @@ Result::Result( bool is_subsequence,
|
|||||||
word_boundary_char_utilization_( 0 ),
|
word_boundary_char_utilization_( 0 ),
|
||||||
query_is_candidate_prefix_( false ),
|
query_is_candidate_prefix_( false ),
|
||||||
text_is_lowercase_( text_is_lowercase ),
|
text_is_lowercase_( text_is_lowercase ),
|
||||||
|
char_match_index_sum_( char_match_index_sum ),
|
||||||
text_( text )
|
text_( text )
|
||||||
{
|
{
|
||||||
if ( is_subsequence )
|
if ( is_subsequence )
|
||||||
@ -77,6 +80,7 @@ Result::Result( bool is_subsequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: do we need a custom copy ctor?
|
||||||
Result::Result( const Result& other )
|
Result::Result( const Result& other )
|
||||||
:
|
:
|
||||||
is_subsequence_( other.is_subsequence_ ),
|
is_subsequence_( other.is_subsequence_ ),
|
||||||
@ -87,6 +91,7 @@ Result::Result( const Result& other )
|
|||||||
word_boundary_char_utilization_( other.word_boundary_char_utilization_ ),
|
word_boundary_char_utilization_( other.word_boundary_char_utilization_ ),
|
||||||
query_is_candidate_prefix_( other.query_is_candidate_prefix_ ),
|
query_is_candidate_prefix_( other.query_is_candidate_prefix_ ),
|
||||||
text_is_lowercase_( other.text_is_lowercase_ ),
|
text_is_lowercase_( other.text_is_lowercase_ ),
|
||||||
|
char_match_index_sum_( other.char_match_index_sum_ ),
|
||||||
text_( other.text_ )
|
text_( other.text_ )
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -144,6 +149,9 @@ bool Result::operator< ( const Result &other ) const {
|
|||||||
other.word_boundary_char_utilization_;
|
other.word_boundary_char_utilization_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( char_match_index_sum_ != other.char_match_index_sum_ )
|
||||||
|
return char_match_index_sum_ < other.char_match_index_sum_;
|
||||||
|
|
||||||
if ( text_->length() != other.text_->length() )
|
if ( text_->length() != other.text_->length() )
|
||||||
return text_->length() < other.text_->length();
|
return text_->length() < other.text_->length();
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ public:
|
|||||||
Result( bool is_subsequence,
|
Result( bool is_subsequence,
|
||||||
const std::string *text,
|
const std::string *text,
|
||||||
bool text_is_lowercase,
|
bool text_is_lowercase,
|
||||||
|
int char_match_index_sum,
|
||||||
const std::string &word_boundary_chars,
|
const std::string &word_boundary_chars,
|
||||||
const std::string &query );
|
const std::string &query );
|
||||||
|
|
||||||
@ -54,7 +55,13 @@ private:
|
|||||||
const std::string &word_boundary_chars );
|
const std::string &word_boundary_chars );
|
||||||
|
|
||||||
|
|
||||||
|
// true when the characters of the query are a subsequence of the characters
|
||||||
|
// in the candidate text, e.g. the characters "abc" are a subsequence for
|
||||||
|
// "xxaygbefc" but not for "axxcb" since they occur in the correct order ('a'
|
||||||
|
// then 'b' then 'c') in the first string but not in the second.
|
||||||
bool is_subsequence_;
|
bool is_subsequence_;
|
||||||
|
|
||||||
|
// true when the first character of the query and the candidate match
|
||||||
bool first_char_same_in_query_and_text_;
|
bool first_char_same_in_query_and_text_;
|
||||||
|
|
||||||
// number of word boundary matches / number of chars in query
|
// number of word boundary matches / number of chars in query
|
||||||
@ -62,8 +69,21 @@ private:
|
|||||||
|
|
||||||
// number of word boundary matches / number of all word boundary chars
|
// number of word boundary matches / number of all word boundary chars
|
||||||
double word_boundary_char_utilization_;
|
double word_boundary_char_utilization_;
|
||||||
|
|
||||||
|
// true when the query is a prefix of the candidate string, e.g. "foo" query
|
||||||
|
// for "foobar" candidate.
|
||||||
bool query_is_candidate_prefix_;
|
bool query_is_candidate_prefix_;
|
||||||
|
|
||||||
|
// true when the candidate text is all lowercase, e.g. "foo" candidate.
|
||||||
bool text_is_lowercase_;
|
bool text_is_lowercase_;
|
||||||
|
|
||||||
|
// The sum of the indexes of all the letters the query "hit" in the candidate
|
||||||
|
// text. For instance, the result for the query "abc" in the candidate
|
||||||
|
// "012a45bc8" has char_match_index_sum of 3 + 6 + 7 = 16 because those are
|
||||||
|
// the char indexes of those letters in the candidate string.
|
||||||
|
int char_match_index_sum_;
|
||||||
|
|
||||||
|
// points to the full candidate text
|
||||||
const std::string *text_;
|
const std::string *text_;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -150,19 +150,47 @@ TEST( CompleterTest, QueryPrefixOfCandidateWins )
|
|||||||
"fbaroo" ) );
|
"fbaroo" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST( CompleterTest, ShorterCandidateWins )
|
TEST( CompleterTest, LowerMatchCharIndexSumWins )
|
||||||
{
|
{
|
||||||
|
EXPECT_THAT( Completer( Candidates(
|
||||||
|
"ratio_of_word_boundary_chars_in_query_",
|
||||||
|
"first_char_same_in_query_and_text_") )
|
||||||
|
.CandidatesForQuery( "charinq" ),
|
||||||
|
ElementsAre( "first_char_same_in_query_and_text_",
|
||||||
|
"ratio_of_word_boundary_chars_in_query_") );
|
||||||
|
|
||||||
|
EXPECT_THAT( Completer( Candidates(
|
||||||
|
"barfooq",
|
||||||
|
"barquxfoo" ) ).CandidatesForQuery( "foo" ),
|
||||||
|
ElementsAre( "barfooq",
|
||||||
|
"barquxfoo") );
|
||||||
|
|
||||||
|
EXPECT_THAT( Completer( Candidates(
|
||||||
|
"xxxxxxabc",
|
||||||
|
"xxabcxxxx" ) ).CandidatesForQuery( "abc" ),
|
||||||
|
ElementsAre( "xxabcxxxx",
|
||||||
|
"xxxxxxabc") );
|
||||||
|
|
||||||
EXPECT_THAT( Completer( Candidates(
|
EXPECT_THAT( Completer( Candidates(
|
||||||
"FooBarQux",
|
"FooBarQux",
|
||||||
"FaBarQux" ) ).CandidatesForQuery( "fbq" ),
|
"FaBarQux" ) ).CandidatesForQuery( "fbq" ),
|
||||||
ElementsAre( "FaBarQux",
|
ElementsAre( "FaBarQux",
|
||||||
"FooBarQux" ) );
|
"FooBarQux" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST( CompleterTest, ShorterCandidateWins )
|
||||||
|
{
|
||||||
EXPECT_THAT( Completer( Candidates(
|
EXPECT_THAT( Completer( Candidates(
|
||||||
"CompleterT",
|
"CompleterT",
|
||||||
"CompleterTest" ) ).CandidatesForQuery( "co" ),
|
"CompleterTest" ) ).CandidatesForQuery( "co" ),
|
||||||
ElementsAre( "CompleterT",
|
ElementsAre( "CompleterT",
|
||||||
"CompleterTest" ) );
|
"CompleterTest" ) );
|
||||||
|
|
||||||
|
EXPECT_THAT( Completer( Candidates(
|
||||||
|
"CompleterT",
|
||||||
|
"CompleterTest" ) ).CandidatesForQuery( "plet" ),
|
||||||
|
ElementsAre( "CompleterT",
|
||||||
|
"CompleterTest" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST( CompleterTest, SameLowercaseCandidateWins )
|
TEST( CompleterTest, SameLowercaseCandidateWins )
|
||||||
|
Loading…
x
Reference in New Issue
Block a user