// Copyright (C) 2011, 2012 Strahinja Val Markovic // // This file is part of YouCompleteMe. // // YouCompleteMe is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // YouCompleteMe is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with YouCompleteMe. If not, see . #include "IdentifierUtils.h" #include "Utils.h" #include "standard.h" #include #include #include #include namespace YouCompleteMe { namespace fs = boost::filesystem; namespace { const char *COMMENT_AND_STRING_REGEX = "//.*?$" // Anything following '//' "|" "#.*?$" // Anything following '#' "|" "/\\*.*?\\*/" // C-style comments, '/* ... */' "|" // Anything inside single quotes, '...', but mind: // 1. that the starting single quote is not escaped // 2. the escaped slash (\\) // 3. the escaped single quote inside the string // "(? LANG_TO_FILETYPE = boost::assign::map_list_of ( std::string( "Ant" ), std::string( "ant" ) ) ( std::string( "Asm" ), std::string( "asm" ) ) ( std::string( "Awk" ), std::string( "awk" ) ) ( std::string( "Basic" ), std::string( "basic" ) ) ( std::string( "C++" ), std::string( "cpp" ) ) ( std::string( "C#" ), std::string( "cs" ) ) ( std::string( "C" ), std::string( "c" ) ) ( std::string( "COBOL" ), std::string( "cobol" ) ) ( std::string( "DosBatch" ), std::string( "dosbatch" ) ) ( std::string( "Eiffel" ), std::string( "eiffel" ) ) ( std::string( "Erlang" ), std::string( "erlang" ) ) ( std::string( "Fortran" ), std::string( "fortran" ) ) ( std::string( "HTML" ), std::string( "html" ) ) ( std::string( "Java" ), std::string( "java" ) ) ( std::string( "JavaScript" ), std::string( "javascript" ) ) ( std::string( "Lisp" ), std::string( "lisp" ) ) ( std::string( "Lua" ), std::string( "lua" ) ) ( std::string( "Make" ), std::string( "make" ) ) ( std::string( "MatLab" ), std::string( "matlab" ) ) ( std::string( "OCaml" ), std::string( "ocaml" ) ) ( std::string( "Pascal" ), std::string( "pascal" ) ) ( std::string( "Perl" ), std::string( "perl" ) ) ( std::string( "PHP" ), std::string( "php" ) ) ( std::string( "Python" ), std::string( "python" ) ) ( std::string( "REXX" ), std::string( "rexx" ) ) ( std::string( "Ruby" ), std::string( "ruby" ) ) ( std::string( "Scheme" ), std::string( "scheme" ) ) ( std::string( "Sh" ), std::string( "sh" ) ) ( std::string( "SLang" ), std::string( "slang" ) ) ( std::string( "SML" ), std::string( "sml" ) ) ( std::string( "SQL" ), std::string( "sql" ) ) ( std::string( "Tcl" ), std::string( "tcl" ) ) ( std::string( "Tex" ), std::string( "tex" ) ) ( std::string( "Vera" ), std::string( "vera" ) ) ( std::string( "Verilog" ), std::string( "verilog" ) ) ( std::string( "VHDL" ), std::string( "vhdl" ) ) ( std::string( "Vim" ), std::string( "vim" ) ) ( std::string( "YACC" ), std::string( "yacc" ) ); const std::string NOT_FOUND = "YCMFOOBAR_NOT_FOUND"; } // unnamed namespace std::string RemoveIdentifierFreeText( std::string text ) { boost::erase_all_regex( text, boost::regex( COMMENT_AND_STRING_REGEX ) ); return text; } std::vector< std::string > ExtractIdentifiersFromText( const std::string &text ) { std::string::const_iterator start = text.begin(); std::string::const_iterator end = text.end(); boost::smatch matches; const boost::regex expression( IDENTIFIER_REGEX ); std::vector< std::string > identifiers; while ( boost::regex_search( start, end, matches, expression ) ) { identifiers.push_back( matches[ 0 ] ); start = matches[ 0 ].second; } return identifiers; } FiletypeIdentifierMap ExtractIdentifiersFromTagsFile( const fs::path &path_to_tag_file ) { FiletypeIdentifierMap filetype_identifier_map; std::string tags_file_contents; try { tags_file_contents = ReadUtf8File( path_to_tag_file ); } catch ( ... ) { return filetype_identifier_map; } std::string::const_iterator start = tags_file_contents.begin(); std::string::const_iterator end = tags_file_contents.end(); boost::smatch matches; const boost::regex expression( TAG_REGEX ); const boost::match_flag_type options = boost::match_not_dot_newline; while ( boost::regex_search( start, end, matches, expression, options ) ) { start = matches[ 0 ].second; std::string language( matches[ 3 ] ); std::string filetype = FindWithDefault( LANG_TO_FILETYPE, language, NOT_FOUND ); if ( filetype == NOT_FOUND ) continue; std::string identifier( matches[ 1 ] ); fs::path path( matches[ 2 ].str() ); if ( path.is_relative() ) path = path_to_tag_file.parent_path() / path; filetype_identifier_map[ filetype ][ path.string() ].push_back( identifier ); } return filetype_identifier_map; } } // namespace YouCompleteMe