From b120d6a5ba3e900dbaec7279c65d173e079e42dc Mon Sep 17 00:00:00 2001 From: Strahinja Val Markovic Date: Mon, 23 Jul 2012 20:17:59 -0700 Subject: [PATCH] Now extracting identifiers in C++ code --- cpp/ycm/IdentifierCompleter.cpp | 34 +++++++-- cpp/ycm/IdentifierCompleter.h | 10 ++- cpp/ycm/IdentifierUtils.cpp | 68 ++++++++++++++++++ cpp/ycm/IdentifierUtils.h | 34 +++++++++ cpp/ycm/indexer.cpp | 2 + cpp/ycm/tests/ClangUtils_test.cpp | 1 - cpp/ycm/tests/IdentifierUtils_test.cpp | 99 ++++++++++++++++++++++++++ python/ycm.py | 36 ++-------- 8 files changed, 243 insertions(+), 41 deletions(-) create mode 100644 cpp/ycm/IdentifierUtils.cpp create mode 100644 cpp/ycm/IdentifierUtils.h create mode 100644 cpp/ycm/tests/IdentifierUtils_test.cpp diff --git a/cpp/ycm/IdentifierCompleter.cpp b/cpp/ycm/IdentifierCompleter.cpp index f7790158..515f9b22 100644 --- a/cpp/ycm/IdentifierCompleter.cpp +++ b/cpp/ycm/IdentifierCompleter.cpp @@ -20,6 +20,7 @@ #include "CandidateRepository.h" #include "Candidate.h" #include "Utils.h" +#include "IdentifierUtils.h" #include #include @@ -51,6 +52,7 @@ void ThreadMain( LatestTask &latest_task ) } } + } // unnamed namespace @@ -66,7 +68,7 @@ IdentifierCompleter::IdentifierCompleter( : candidate_repository_( CandidateRepository::Instance() ), threading_enabled_( false ) { - AddCandidatesToDatabase( candidates, "", "", true ); + AddCandidatesToDatabase( candidates, "", "" ); } @@ -77,7 +79,7 @@ IdentifierCompleter::IdentifierCompleter( : candidate_repository_( CandidateRepository::Instance() ), threading_enabled_( false ) { - AddCandidatesToDatabase( candidates, filetype, filepath, true ); + AddCandidatesToDatabase( candidates, filetype, filepath ); } @@ -93,15 +95,11 @@ void IdentifierCompleter::EnableThreading() void IdentifierCompleter::AddCandidatesToDatabase( const std::vector< std::string > &new_candidates, const std::string &filetype, - const std::string &filepath, - bool clear_database ) + const std::string &filepath ) { std::list< const Candidate *> &candidates = GetCandidateList( filetype, filepath ); - if ( clear_database ) - candidates.clear(); - std::vector< const Candidate* > repository_candidates = candidate_repository_.GetCandidatesForStrings( new_candidates ); @@ -111,6 +109,28 @@ void IdentifierCompleter::AddCandidatesToDatabase( } +void IdentifierCompleter::AddCandidatesToDatabaseFromBuffer( + const std::string &buffer_contents, + const std::string &filetype, + const std::string &filepath ) +{ + ClearCandidatesStoredForFile( filetype, filepath ); + + AddCandidatesToDatabase( + ExtractIdentifiersFromText( RemoveIdentifierFreeText( buffer_contents ) ), + filetype, + filepath ); +} + + +void IdentifierCompleter::ClearCandidatesStoredForFile( + const std::string &filetype, + const std::string &filepath ) +{ + GetCandidateList( filetype, filepath ).clear(); +} + + std::vector< std::string > IdentifierCompleter::CandidatesForQuery( const std::string &query ) const { diff --git a/cpp/ycm/IdentifierCompleter.h b/cpp/ycm/IdentifierCompleter.h index 5139cfe8..2c8e04c0 100644 --- a/cpp/ycm/IdentifierCompleter.h +++ b/cpp/ycm/IdentifierCompleter.h @@ -70,8 +70,14 @@ public: void AddCandidatesToDatabase( const std::vector< std::string > &new_candidates, const std::string &filetype, - const std::string &filepath, - bool clear_database ); + const std::string &filepath ); + + void AddCandidatesToDatabaseFromBuffer( const std::string &buffer_contents, + const std::string &filetype, + const std::string &filepath ); + + void ClearCandidatesStoredForFile( const std::string &filetype, + const std::string &filepath ); // Only provided for tests! std::vector< std::string > CandidatesForQuery( diff --git a/cpp/ycm/IdentifierUtils.cpp b/cpp/ycm/IdentifierUtils.cpp new file mode 100644 index 00000000..47b4d65a --- /dev/null +++ b/cpp/ycm/IdentifierUtils.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2011, 2012 Strahinja Val Markovic +// +// This file is part of YouCompleteMe. +// +// YouCompleteMe is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// YouCompleteMe is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with YouCompleteMe. If not, see . + +#include "IdentifierUtils.h" +#include "standard.h" + +#include +#include + +namespace YouCompleteMe +{ + +const char* COMMENT_AND_STRING_REGEX = + "//.*?$" // Anything following '//' + "|" + "#.*?$" // Anything following '#' + "|" + "/\\*.*?\\*/" // C-style comments, '/* ... */' + "|" + "'[^']*'" // Anything inside single quotes, '...' + "|" + "\"[^\"]*\""; // Anything inside double quotes, "..." + +const char* IDENTIFIER_REGEX = "[_a-zA-Z]\\w*"; + + +std::string RemoveIdentifierFreeText( const std::string &text ) +{ + std::string new_text = text; + boost::erase_all_regex( new_text, boost::regex( COMMENT_AND_STRING_REGEX ) ); + return new_text; +} + + +std::vector< std::string > ExtractIdentifiersFromText( + const std::string &text ) +{ + std::string::const_iterator start = text.begin(); + std::string::const_iterator end = text.end(); + + boost::match_results< std::string::const_iterator > matches; + boost::regex expression( IDENTIFIER_REGEX ); + + std::vector< std::string > identifiers; + while ( boost::regex_search( start, end, matches, expression ) ) + { + identifiers.push_back( matches[ 0 ] ); + start = matches[ 0 ].second; + } + + return identifiers; +} + +} // namespace YouCompleteMe diff --git a/cpp/ycm/IdentifierUtils.h b/cpp/ycm/IdentifierUtils.h new file mode 100644 index 00000000..66c1a370 --- /dev/null +++ b/cpp/ycm/IdentifierUtils.h @@ -0,0 +1,34 @@ +// Copyright (C) 2011, 2012 Strahinja Val Markovic +// +// This file is part of YouCompleteMe. +// +// YouCompleteMe is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// YouCompleteMe is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with YouCompleteMe. If not, see . + +#ifndef IDENTIFIERUTILS_CPP_WFFUZNET +#define IDENTIFIERUTILS_CPP_WFFUZNET + +#include +#include + +namespace YouCompleteMe +{ + +std::string RemoveIdentifierFreeText( const std::string &text ); + +std::vector< std::string > ExtractIdentifiersFromText( + const std::string &text ); + +} // namespace YouCompleteMe + +#endif /* end of include guard: IDENTIFIERUTILS_CPP_WFFUZNET */ diff --git a/cpp/ycm/indexer.cpp b/cpp/ycm/indexer.cpp index 6f800bb3..5dc8cb7b 100644 --- a/cpp/ycm/indexer.cpp +++ b/cpp/ycm/indexer.cpp @@ -59,6 +59,8 @@ BOOST_PYTHON_MODULE(indexer) .def( "EnableThreading", &IdentifierCompleter::EnableThreading ) .def( "AddCandidatesToDatabase", &IdentifierCompleter::AddCandidatesToDatabase ) + .def( "AddCandidatesToDatabaseFromBuffer", + &IdentifierCompleter::AddCandidatesToDatabaseFromBuffer ) .def( "CandidatesForQueryAndTypeAsync", &IdentifierCompleter::CandidatesForQueryAndTypeAsync ); diff --git a/cpp/ycm/tests/ClangUtils_test.cpp b/cpp/ycm/tests/ClangUtils_test.cpp index bec55840..894c9357 100644 --- a/cpp/ycm/tests/ClangUtils_test.cpp +++ b/cpp/ycm/tests/ClangUtils_test.cpp @@ -22,7 +22,6 @@ #include using ::testing::ElementsAre; -using ::testing::WhenSorted; namespace YouCompleteMe { diff --git a/cpp/ycm/tests/IdentifierUtils_test.cpp b/cpp/ycm/tests/IdentifierUtils_test.cpp new file mode 100644 index 00000000..18e0cce8 --- /dev/null +++ b/cpp/ycm/tests/IdentifierUtils_test.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2011, 2012 Strahinja Val Markovic +// +// This file is part of YouCompleteMe. +// +// YouCompleteMe is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// YouCompleteMe is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with YouCompleteMe. If not, see . + +#include "IdentifierUtils.h" +// #include "Utils.h" +// #include "TestUtils.h" +#include +#include + +using ::testing::ElementsAre; +using ::testing::WhenSorted; + +namespace YouCompleteMe +{ + + +TEST( IdentifierUtilsTest, RemoveIdentifierFreeTextWorks ) +{ + EXPECT_STREQ( RemoveIdentifierFreeText( + "foo \n" + "bar //foo \n" + "qux" + ).c_str(), + "foo \n" + "bar \n" + "qux" ); + + EXPECT_STREQ( RemoveIdentifierFreeText( + "foo \n" + "bar #foo \n" + "qux" + ).c_str(), + "foo \n" + "bar \n" + "qux" ); + + EXPECT_STREQ( RemoveIdentifierFreeText( + "foo \n" + "bar /* foo \n" + " foo2 */\n" + "qux" + ).c_str(), + "foo \n" + "bar \n" + "qux" ); + + EXPECT_STREQ( RemoveIdentifierFreeText( + "foo \n" + "bar 'foo'\n" + "qux" + ).c_str(), + "foo \n" + "bar \n" + "qux" ); + + EXPECT_STREQ( RemoveIdentifierFreeText( + "foo \n" + "bar \"foo\"\n" + "qux" + ).c_str(), + "foo \n" + "bar \n" + "qux" ); +} + + +TEST( IdentifierUtilsTest, ExtractIdentifiersFromTextWorks ) +{ + EXPECT_THAT( ExtractIdentifiersFromText( + "foo $_bar &BazGoo FOO= !!! '-' - _ (x) one-two !moo [qqq]" ), + ElementsAre( "foo", + "_bar", + "BazGoo", + "FOO", + "_", + "x", + "one", + "two", + "moo", + "qqq" ) ); + +} + +} // namespace YouCompleteMe + diff --git a/python/ycm.py b/python/ycm.py index 62dd66e6..607aac35 100644 --- a/python/ycm.py +++ b/python/ycm.py @@ -50,7 +50,6 @@ class IdentifierCompleter( Completer ): def __init__( self ): self.completer = indexer.IdentifierCompleter() self.completer.EnableThreading() - self.pattern = re.compile( r"[_a-zA-Z]\w*" ) def CandidatesForQueryAsync( self, query ): @@ -71,8 +70,7 @@ class IdentifierCompleter( Completer ): vector.append( identifier ) self.completer.AddCandidatesToDatabase( vector, filetype, - filepath, - False ) + filepath ) def AddPreviousIdentifier( self ): @@ -80,22 +78,16 @@ class IdentifierCompleter( Completer ): def AddBufferIdentifiers( self ): - text = "\n".join( vim.current.buffer ) - text = RemoveIdentFreeText( text ) - - idents = re.findall( self.pattern, text ) filetype = vim.eval( "&filetype" ) filepath = vim.eval( "expand('%:p')" ) if not filetype or not filepath: return - vector = indexer.StringVec() - vector.extend( idents ) - self.completer.AddCandidatesToDatabase( vector, - filetype, - filepath, - True ) + text = "\n".join( vim.current.buffer ) + self.completer.AddCandidatesToDatabaseFromBuffer( text, + filetype, + filepath ) def OnFileEnter( self ): @@ -328,21 +320,3 @@ def ShouldAddIdentifier(): def SanitizeQuery( query ): return query.strip() - - -def RemoveIdentFreeText( text ): - """Removes commented-out code and code in quotes.""" - - # TODO: do we still need this sub-func? - def replacer( match ): - s = match.group( 0 ) - if s.startswith( '/' ): - return "" - else: - return s - - pattern = re.compile( - r'//.*?$|#.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', - re.DOTALL | re.MULTILINE ) - - return re.sub( pattern, replacer, text )