Now extracting identifiers in C++ code

This commit is contained in:
Strahinja Val Markovic 2012-07-23 20:17:59 -07:00
parent 48d95bcd20
commit b120d6a5ba
8 changed files with 243 additions and 41 deletions

View File

@ -20,6 +20,7 @@
#include "CandidateRepository.h"
#include "Candidate.h"
#include "Utils.h"
#include "IdentifierUtils.h"
#include <boost/unordered_set.hpp>
#include <boost/bind.hpp>
@ -51,6 +52,7 @@ void ThreadMain( LatestTask &latest_task )
}
}
} // unnamed namespace
@ -66,7 +68,7 @@ IdentifierCompleter::IdentifierCompleter(
: candidate_repository_( CandidateRepository::Instance() ),
threading_enabled_( false )
{
AddCandidatesToDatabase( candidates, "", "", true );
AddCandidatesToDatabase( candidates, "", "" );
}
@ -77,7 +79,7 @@ IdentifierCompleter::IdentifierCompleter(
: candidate_repository_( CandidateRepository::Instance() ),
threading_enabled_( false )
{
AddCandidatesToDatabase( candidates, filetype, filepath, true );
AddCandidatesToDatabase( candidates, filetype, filepath );
}
@ -93,15 +95,11 @@ void IdentifierCompleter::EnableThreading()
void IdentifierCompleter::AddCandidatesToDatabase(
const std::vector< std::string > &new_candidates,
const std::string &filetype,
const std::string &filepath,
bool clear_database )
const std::string &filepath )
{
std::list< const Candidate *> &candidates =
GetCandidateList( filetype, filepath );
if ( clear_database )
candidates.clear();
std::vector< const Candidate* > repository_candidates =
candidate_repository_.GetCandidatesForStrings( new_candidates );
@ -111,6 +109,28 @@ void IdentifierCompleter::AddCandidatesToDatabase(
}
void IdentifierCompleter::AddCandidatesToDatabaseFromBuffer(
const std::string &buffer_contents,
const std::string &filetype,
const std::string &filepath )
{
ClearCandidatesStoredForFile( filetype, filepath );
AddCandidatesToDatabase(
ExtractIdentifiersFromText( RemoveIdentifierFreeText( buffer_contents ) ),
filetype,
filepath );
}
void IdentifierCompleter::ClearCandidatesStoredForFile(
const std::string &filetype,
const std::string &filepath )
{
GetCandidateList( filetype, filepath ).clear();
}
std::vector< std::string > IdentifierCompleter::CandidatesForQuery(
const std::string &query ) const
{

View File

@ -70,8 +70,14 @@ public:
void AddCandidatesToDatabase(
const std::vector< std::string > &new_candidates,
const std::string &filetype,
const std::string &filepath,
bool clear_database );
const std::string &filepath );
void AddCandidatesToDatabaseFromBuffer( const std::string &buffer_contents,
const std::string &filetype,
const std::string &filepath );
void ClearCandidatesStoredForFile( const std::string &filetype,
const std::string &filepath );
// Only provided for tests!
std::vector< std::string > CandidatesForQuery(

View File

@ -0,0 +1,68 @@
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#include "IdentifierUtils.h"
#include "standard.h"
#include <boost/regex.hpp>
#include <boost/algorithm/string/regex.hpp>
namespace YouCompleteMe
{
const char* COMMENT_AND_STRING_REGEX =
"//.*?$" // Anything following '//'
"|"
"#.*?$" // Anything following '#'
"|"
"/\\*.*?\\*/" // C-style comments, '/* ... */'
"|"
"'[^']*'" // Anything inside single quotes, '...'
"|"
"\"[^\"]*\""; // Anything inside double quotes, "..."
const char* IDENTIFIER_REGEX = "[_a-zA-Z]\\w*";
std::string RemoveIdentifierFreeText( const std::string &text )
{
std::string new_text = text;
boost::erase_all_regex( new_text, boost::regex( COMMENT_AND_STRING_REGEX ) );
return new_text;
}
std::vector< std::string > ExtractIdentifiersFromText(
const std::string &text )
{
std::string::const_iterator start = text.begin();
std::string::const_iterator end = text.end();
boost::match_results< std::string::const_iterator > matches;
boost::regex expression( IDENTIFIER_REGEX );
std::vector< std::string > identifiers;
while ( boost::regex_search( start, end, matches, expression ) )
{
identifiers.push_back( matches[ 0 ] );
start = matches[ 0 ].second;
}
return identifiers;
}
} // namespace YouCompleteMe

34
cpp/ycm/IdentifierUtils.h Normal file
View File

@ -0,0 +1,34 @@
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#ifndef IDENTIFIERUTILS_CPP_WFFUZNET
#define IDENTIFIERUTILS_CPP_WFFUZNET
#include <vector>
#include <string>
namespace YouCompleteMe
{
std::string RemoveIdentifierFreeText( const std::string &text );
std::vector< std::string > ExtractIdentifiersFromText(
const std::string &text );
} // namespace YouCompleteMe
#endif /* end of include guard: IDENTIFIERUTILS_CPP_WFFUZNET */

View File

@ -59,6 +59,8 @@ BOOST_PYTHON_MODULE(indexer)
.def( "EnableThreading", &IdentifierCompleter::EnableThreading )
.def( "AddCandidatesToDatabase",
&IdentifierCompleter::AddCandidatesToDatabase )
.def( "AddCandidatesToDatabaseFromBuffer",
&IdentifierCompleter::AddCandidatesToDatabaseFromBuffer )
.def( "CandidatesForQueryAndTypeAsync",
&IdentifierCompleter::CandidatesForQueryAndTypeAsync );

View File

@ -22,7 +22,6 @@
#include <gmock/gmock.h>
using ::testing::ElementsAre;
using ::testing::WhenSorted;
namespace YouCompleteMe
{

View File

@ -0,0 +1,99 @@
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#include "IdentifierUtils.h"
// #include "Utils.h"
// #include "TestUtils.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
using ::testing::ElementsAre;
using ::testing::WhenSorted;
namespace YouCompleteMe
{
TEST( IdentifierUtilsTest, RemoveIdentifierFreeTextWorks )
{
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar //foo \n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar #foo \n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar /* foo \n"
" foo2 */\n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar 'foo'\n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
EXPECT_STREQ( RemoveIdentifierFreeText(
"foo \n"
"bar \"foo\"\n"
"qux"
).c_str(),
"foo \n"
"bar \n"
"qux" );
}
TEST( IdentifierUtilsTest, ExtractIdentifiersFromTextWorks )
{
EXPECT_THAT( ExtractIdentifiersFromText(
"foo $_bar &BazGoo FOO= !!! '-' - _ (x) one-two !moo [qqq]" ),
ElementsAre( "foo",
"_bar",
"BazGoo",
"FOO",
"_",
"x",
"one",
"two",
"moo",
"qqq" ) );
}
} // namespace YouCompleteMe

View File

@ -50,7 +50,6 @@ class IdentifierCompleter( Completer ):
def __init__( self ):
self.completer = indexer.IdentifierCompleter()
self.completer.EnableThreading()
self.pattern = re.compile( r"[_a-zA-Z]\w*" )
def CandidatesForQueryAsync( self, query ):
@ -71,8 +70,7 @@ class IdentifierCompleter( Completer ):
vector.append( identifier )
self.completer.AddCandidatesToDatabase( vector,
filetype,
filepath,
False )
filepath )
def AddPreviousIdentifier( self ):
@ -80,22 +78,16 @@ class IdentifierCompleter( Completer ):
def AddBufferIdentifiers( self ):
text = "\n".join( vim.current.buffer )
text = RemoveIdentFreeText( text )
idents = re.findall( self.pattern, text )
filetype = vim.eval( "&filetype" )
filepath = vim.eval( "expand('%:p')" )
if not filetype or not filepath:
return
vector = indexer.StringVec()
vector.extend( idents )
self.completer.AddCandidatesToDatabase( vector,
text = "\n".join( vim.current.buffer )
self.completer.AddCandidatesToDatabaseFromBuffer( text,
filetype,
filepath,
True )
filepath )
def OnFileEnter( self ):
@ -328,21 +320,3 @@ def ShouldAddIdentifier():
def SanitizeQuery( query ):
return query.strip()
def RemoveIdentFreeText( text ):
"""Removes commented-out code and code in quotes."""
# TODO: do we still need this sub-func?
def replacer( match ):
s = match.group( 0 )
if s.startswith( '/' ):
return ""
else:
return s
pattern = re.compile(
r'//.*?$|#.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE )
return re.sub( pattern, replacer, text )