// Copyright (C) 2011, 2012 Strahinja Val Markovic // // This file is part of YouCompleteMe. // // YouCompleteMe is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // YouCompleteMe is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with YouCompleteMe. If not, see . #include "IdentifierUtils.h" #include "Utils.h" #include "standard.h" #include #include #include #include namespace YouCompleteMe { namespace fs = boost::filesystem; namespace { const char *COMMENT_AND_STRING_REGEX = "//.*?$" // Anything following '//' "|" "#.*?$" // Anything following '#' "|" "/\\*.*?\\*/" // C-style comments, '/* ... */' "|" // Anything inside single quotes, '...', but mind: // 1. that the starting single quote is not escaped // 2. the escaped slash (\\) // 3. the escaped single quote inside the string // "(? { bool operator()( const std::string &a, const std::string &b ) const { return a == b; } }; // List of languages Exuberant Ctags supports: // ctags --list-languages // To map a language name to a filetype, see this file: // :e $VIMRUNTIME/filetype.vim // This is a map of const char* and not std::string to prevent issues with // static initialization. const boost::unordered_map< const char*, const char*, boost::hash< std::string >, StringEqualityComparer > LANG_TO_FILETYPE = boost::assign::map_list_of ( "Ant" , "ant" ) ( "Asm" , "asm" ) ( "Awk" , "awk" ) ( "Basic" , "basic" ) ( "C++" , "cpp" ) ( "C#" , "cs" ) ( "C" , "c" ) ( "COBOL" , "cobol" ) ( "DosBatch" , "dosbatch" ) ( "Eiffel" , "eiffel" ) ( "Erlang" , "erlang" ) ( "Fortran" , "fortran" ) ( "HTML" , "html" ) ( "Java" , "java" ) ( "JavaScript" , "javascript" ) ( "Lisp" , "lisp" ) ( "Lua" , "lua" ) ( "Make" , "make" ) ( "MatLab" , "matlab" ) ( "OCaml" , "ocaml" ) ( "Pascal" , "pascal" ) ( "Perl" , "perl" ) ( "PHP" , "php" ) ( "Python" , "python" ) ( "REXX" , "rexx" ) ( "Ruby" , "ruby" ) ( "Scheme" , "scheme" ) ( "Sh" , "sh" ) ( "SLang" , "slang" ) ( "SML" , "sml" ) ( "SQL" , "sql" ) ( "Tcl" , "tcl" ) ( "Tex" , "tex" ) ( "Vera" , "vera" ) ( "Verilog" , "verilog" ) ( "VHDL" , "vhdl" ) ( "Vim" , "vim" ) ( "YACC" , "yacc" ); const char* NOT_FOUND = "YCMFOOBAR_NOT_FOUND"; } // unnamed namespace std::string RemoveIdentifierFreeText( std::string text ) { boost::erase_all_regex( text, boost::regex( COMMENT_AND_STRING_REGEX ) ); return text; } std::vector< std::string > ExtractIdentifiersFromText( const std::string &text ) { std::string::const_iterator start = text.begin(); std::string::const_iterator end = text.end(); boost::smatch matches; const boost::regex expression( IDENTIFIER_REGEX ); std::vector< std::string > identifiers; while ( boost::regex_search( start, end, matches, expression ) ) { identifiers.push_back( matches[ 0 ] ); start = matches[ 0 ].second; } return identifiers; } FiletypeIdentifierMap ExtractIdentifiersFromTagsFile( const fs::path &path_to_tag_file ) { FiletypeIdentifierMap filetype_identifier_map; std::string tags_file_contents; try { tags_file_contents = ReadUtf8File( path_to_tag_file ); } catch ( ... ) { return filetype_identifier_map; } std::string::const_iterator start = tags_file_contents.begin(); std::string::const_iterator end = tags_file_contents.end(); boost::smatch matches; const boost::regex expression( TAG_REGEX ); const boost::match_flag_type options = boost::match_not_dot_newline; while ( boost::regex_search( start, end, matches, expression, options ) ) { start = matches[ 0 ].second; std::string language( matches[ 3 ] ); std::string filetype = FindWithDefault( LANG_TO_FILETYPE, language.c_str(), NOT_FOUND ); if ( filetype == NOT_FOUND ) continue; std::string identifier( matches[ 1 ] ); fs::path path( matches[ 2 ].str() ); if ( path.is_relative() ) path = path_to_tag_file.parent_path() / path; filetype_identifier_map[ filetype ][ path.string() ].push_back( identifier ); } return filetype_identifier_map; } } // namespace YouCompleteMe