Now extracting identifiers in C++ code

This commit is contained in:
Strahinja Val Markovic 2012-07-23 20:17:59 -07:00
parent 48d95bcd20
commit b120d6a5ba
8 changed files with 243 additions and 41 deletions

View File

@ -20,6 +20,7 @@
#include "CandidateRepository.h" #include "CandidateRepository.h"
#include "Candidate.h" #include "Candidate.h"
#include "Utils.h" #include "Utils.h"
#include "IdentifierUtils.h"
#include <boost/unordered_set.hpp> #include <boost/unordered_set.hpp>
#include <boost/bind.hpp> #include <boost/bind.hpp>
@ -51,6 +52,7 @@ void ThreadMain( LatestTask &latest_task )
} }
} }
} // unnamed namespace } // unnamed namespace
@ -66,7 +68,7 @@ IdentifierCompleter::IdentifierCompleter(
: candidate_repository_( CandidateRepository::Instance() ), : candidate_repository_( CandidateRepository::Instance() ),
threading_enabled_( false ) threading_enabled_( false )
{ {
AddCandidatesToDatabase( candidates, "", "", true ); AddCandidatesToDatabase( candidates, "", "" );
} }
@ -77,7 +79,7 @@ IdentifierCompleter::IdentifierCompleter(
: candidate_repository_( CandidateRepository::Instance() ), : candidate_repository_( CandidateRepository::Instance() ),
threading_enabled_( false ) threading_enabled_( false )
{ {
AddCandidatesToDatabase( candidates, filetype, filepath, true ); AddCandidatesToDatabase( candidates, filetype, filepath );
} }
@ -93,15 +95,11 @@ void IdentifierCompleter::EnableThreading()
void IdentifierCompleter::AddCandidatesToDatabase( void IdentifierCompleter::AddCandidatesToDatabase(
const std::vector< std::string > &new_candidates, const std::vector< std::string > &new_candidates,
const std::string &filetype, const std::string &filetype,
const std::string &filepath, const std::string &filepath )
bool clear_database )
{ {
std::list< const Candidate *> &candidates = std::list< const Candidate *> &candidates =
GetCandidateList( filetype, filepath ); GetCandidateList( filetype, filepath );
if ( clear_database )
candidates.clear();
std::vector< const Candidate* > repository_candidates = std::vector< const Candidate* > repository_candidates =
candidate_repository_.GetCandidatesForStrings( new_candidates ); candidate_repository_.GetCandidatesForStrings( new_candidates );
@ -111,6 +109,28 @@ void IdentifierCompleter::AddCandidatesToDatabase(
} }
void IdentifierCompleter::AddCandidatesToDatabaseFromBuffer(
const std::string &buffer_contents,
const std::string &filetype,
const std::string &filepath )
{
ClearCandidatesStoredForFile( filetype, filepath );
AddCandidatesToDatabase(
ExtractIdentifiersFromText( RemoveIdentifierFreeText( buffer_contents ) ),
filetype,
filepath );
}
void IdentifierCompleter::ClearCandidatesStoredForFile(
const std::string &filetype,
const std::string &filepath )
{
GetCandidateList( filetype, filepath ).clear();
}
std::vector< std::string > IdentifierCompleter::CandidatesForQuery( std::vector< std::string > IdentifierCompleter::CandidatesForQuery(
const std::string &query ) const const std::string &query ) const
{ {

View File

@ -70,8 +70,14 @@ public:
void AddCandidatesToDatabase( void AddCandidatesToDatabase(
const std::vector< std::string > &new_candidates, const std::vector< std::string > &new_candidates,
const std::string &filetype, const std::string &filetype,
const std::string &filepath, const std::string &filepath );
bool clear_database );
void AddCandidatesToDatabaseFromBuffer( const std::string &buffer_contents,
const std::string &filetype,
const std::string &filepath );
void ClearCandidatesStoredForFile( const std::string &filetype,
const std::string &filepath );
// Only provided for tests! // Only provided for tests!
std::vector< std::string > CandidatesForQuery( std::vector< std::string > CandidatesForQuery(

View File

@ -0,0 +1,68 @@
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#include "IdentifierUtils.h"
#include "standard.h"
#include <boost/regex.hpp>
#include <boost/algorithm/string/regex.hpp>
namespace YouCompleteMe
{
const char* COMMENT_AND_STRING_REGEX =
"//.*?$" // Anything following '//'
"|"
"#.*?$" // Anything following '#'
"|"
"/\\*.*?\\*/" // C-style comments, '/* ... */'
"|"
"'[^']*'" // Anything inside single quotes, '...'
"|"
"\"[^\"]*\""; // Anything inside double quotes, "..."
const char* IDENTIFIER_REGEX = "[_a-zA-Z]\\w*";
std::string RemoveIdentifierFreeText( const std::string &text )
{
std::string new_text = text;
boost::erase_all_regex( new_text, boost::regex( COMMENT_AND_STRING_REGEX ) );
return new_text;
}
std::vector< std::string > ExtractIdentifiersFromText(
const std::string &text )
{
std::string::const_iterator start = text.begin();
std::string::const_iterator end = text.end();
boost::match_results< std::string::const_iterator > matches;
boost::regex expression( IDENTIFIER_REGEX );
std::vector< std::string > identifiers;
while ( boost::regex_search( start, end, matches, expression ) )
{
identifiers.push_back( matches[ 0 ] );
start = matches[ 0 ].second;
}
return identifiers;
}
} // namespace YouCompleteMe

34
cpp/ycm/IdentifierUtils.h Normal file
View File

@ -0,0 +1,34 @@
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#ifndef IDENTIFIERUTILS_CPP_WFFUZNET
#define IDENTIFIERUTILS_CPP_WFFUZNET
#include <vector>
#include <string>
namespace YouCompleteMe
{
std::string RemoveIdentifierFreeText( const std::string &text );
std::vector< std::string > ExtractIdentifiersFromText(
const std::string &text );
} // namespace YouCompleteMe
#endif /* end of include guard: IDENTIFIERUTILS_CPP_WFFUZNET */

View File

@ -59,6 +59,8 @@ BOOST_PYTHON_MODULE(indexer)
.def( "EnableThreading", &IdentifierCompleter::EnableThreading ) .def( "EnableThreading", &IdentifierCompleter::EnableThreading )
.def( "AddCandidatesToDatabase", .def( "AddCandidatesToDatabase",
&IdentifierCompleter::AddCandidatesToDatabase ) &IdentifierCompleter::AddCandidatesToDatabase )
.def( "AddCandidatesToDatabaseFromBuffer",
&IdentifierCompleter::AddCandidatesToDatabaseFromBuffer )
.def( "CandidatesForQueryAndTypeAsync", .def( "CandidatesForQueryAndTypeAsync",
&IdentifierCompleter::CandidatesForQueryAndTypeAsync ); &IdentifierCompleter::CandidatesForQueryAndTypeAsync );

View File

@ -22,7 +22,6 @@
#include <gmock/gmock.h> #include <gmock/gmock.h>
using ::testing::ElementsAre; using ::testing::ElementsAre;
using ::testing::WhenSorted;
namespace YouCompleteMe namespace YouCompleteMe
{ {

View File

@ -0,0 +1,99 @@
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#include "IdentifierUtils.h"
// #include "Utils.h"
// #include "TestUtils.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
using ::testing::ElementsAre;
using ::testing::WhenSorted;
namespace YouCompleteMe
{
TEST( IdentifierUtilsTest, RemoveIdentifierFreeTextWorks )
{
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar //foo \n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar #foo \n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar /* foo \n"
" foo2 */\n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar 'foo'\n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar \"foo\"\n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
}
TEST( IdentifierUtilsTest, ExtractIdentifiersFromTextWorks )
{
EXPECT_THAT( ExtractIdentifiersFromText(
"foo $_bar &BazGoo FOO= !!! '-' - _ (x) one-two !moo [qqq]" ),
ElementsAre( "foo",
"_bar",
"BazGoo",
"FOO",
"_",
"x",
"one",
"two",
"moo",
"qqq" ) );
}
} // namespace YouCompleteMe

View File

@ -50,7 +50,6 @@ class IdentifierCompleter( Completer ):
def __init__( self ): def __init__( self ):
self.completer = indexer.IdentifierCompleter() self.completer = indexer.IdentifierCompleter()
self.completer.EnableThreading() self.completer.EnableThreading()
self.pattern = re.compile( r"[_a-zA-Z]\w*" )
def CandidatesForQueryAsync( self, query ): def CandidatesForQueryAsync( self, query ):
@ -71,8 +70,7 @@ class IdentifierCompleter( Completer ):
vector.append( identifier ) vector.append( identifier )
self.completer.AddCandidatesToDatabase( vector, self.completer.AddCandidatesToDatabase( vector,
filetype, filetype,
filepath, filepath )
False )
def AddPreviousIdentifier( self ): def AddPreviousIdentifier( self ):
@ -80,22 +78,16 @@ class IdentifierCompleter( Completer ):
def AddBufferIdentifiers( self ): def AddBufferIdentifiers( self ):
text = "\n".join( vim.current.buffer )
text = RemoveIdentFreeText( text )
idents = re.findall( self.pattern, text )
filetype = vim.eval( "&filetype" ) filetype = vim.eval( "&filetype" )
filepath = vim.eval( "expand('%:p')" ) filepath = vim.eval( "expand('%:p')" )
if not filetype or not filepath: if not filetype or not filepath:
return return
vector = indexer.StringVec() text = "\n".join( vim.current.buffer )
vector.extend( idents ) self.completer.AddCandidatesToDatabaseFromBuffer( text,
self.completer.AddCandidatesToDatabase( vector, filetype,
filetype, filepath )
filepath,
True )
def OnFileEnter( self ): def OnFileEnter( self ):
@ -328,21 +320,3 @@ def ShouldAddIdentifier():
def SanitizeQuery( query ): def SanitizeQuery( query ):
return query.strip() return query.strip()
def RemoveIdentFreeText( text ):
"""Removes commented-out code and code in quotes."""
# TODO: do we still need this sub-func?
def replacer( match ):
s = match.group( 0 )
if s.startswith( '/' ):
return ""
else:
return s
pattern = re.compile(
r'//.*?$|#.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE )
return re.sub( pattern, replacer, text )