2012-07-23 23:17:59 -04:00
|
|
|
// Copyright (C) 2011, 2012 Strahinja Val Markovic <val@markovic.io>
|
|
|
|
//
|
|
|
|
// This file is part of YouCompleteMe.
|
|
|
|
//
|
|
|
|
// YouCompleteMe is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// YouCompleteMe is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
#include "IdentifierUtils.h"
|
2013-05-26 16:28:00 -04:00
|
|
|
#include "Utils.h"
|
2012-07-23 23:17:59 -04:00
|
|
|
#include "standard.h"
|
|
|
|
|
2013-05-26 16:28:00 -04:00
|
|
|
#include <boost/unordered_map.hpp>
|
|
|
|
#include <boost/assign/list_of.hpp>
|
2012-07-23 23:17:59 -04:00
|
|
|
#include <boost/regex.hpp>
|
|
|
|
#include <boost/algorithm/string/regex.hpp>
|
|
|
|
|
2013-01-19 23:10:52 -05:00
|
|
|
namespace YouCompleteMe {
|
2012-07-23 23:17:59 -04:00
|
|
|
|
2013-05-26 16:28:00 -04:00
|
|
|
namespace fs = boost::filesystem;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2013-01-19 23:10:52 -05:00
|
|
|
const char *COMMENT_AND_STRING_REGEX =
|
2012-07-23 23:17:59 -04:00
|
|
|
"//.*?$" // Anything following '//'
|
|
|
|
"|"
|
|
|
|
"#.*?$" // Anything following '#'
|
|
|
|
"|"
|
|
|
|
"/\\*.*?\\*/" // C-style comments, '/* ... */'
|
|
|
|
"|"
|
2013-03-23 13:56:56 -04:00
|
|
|
// Anything inside single quotes, '...', but mind the escaped quote and the
|
|
|
|
// escaped slash (\\)
|
2013-03-14 00:41:20 -04:00
|
|
|
"'(?:\\\\\\\\|\\\\'|.)*?'"
|
2012-07-23 23:17:59 -04:00
|
|
|
"|"
|
2013-03-23 13:56:56 -04:00
|
|
|
// Anything inside double quotes, "...", but mind the escaped double quote and
|
|
|
|
// the escaped slash (\\)
|
2013-03-14 00:41:20 -04:00
|
|
|
"\"(?:\\\\\\\\|\\\\\"|.)*?\"";
|
2012-07-23 23:17:59 -04:00
|
|
|
|
2013-01-19 23:10:52 -05:00
|
|
|
const char *IDENTIFIER_REGEX = "[_a-zA-Z]\\w*";
|
2012-07-23 23:17:59 -04:00
|
|
|
|
2013-05-26 16:28:00 -04:00
|
|
|
// For details on the tag format supported, see here for details:
|
|
|
|
// http://ctags.sourceforge.net/FORMAT
|
|
|
|
// TL;DR: The only supported format is the one Exuberant Ctags emits.
|
|
|
|
const char *TAG_REGEX =
|
|
|
|
"^([^\\t\\n\\r]+)" // The first field is the identifier
|
|
|
|
"\\t" // A TAB char is the field separator
|
|
|
|
// The second field is the path to the file that has the identifier; either
|
|
|
|
// absolute or relative to the tags file.
|
|
|
|
"([^\\t\\n\\r]+)"
|
|
|
|
"\\t.*?" // Non-greedy everything
|
|
|
|
"language:([^\\t\\n\\r]+)" // We want to capture the language of the file
|
|
|
|
".*?$";
|
|
|
|
|
|
|
|
|
|
|
|
// List of languages Exuberant Ctags supports:
|
|
|
|
// ctags --list-languages
|
|
|
|
// To map a language name to a filetype, see this file:
|
|
|
|
// :e $VIMRUNTIME/filetype.vim
|
|
|
|
const boost::unordered_map< std::string, std::string > LANG_TO_FILETYPE =
|
|
|
|
boost::assign::map_list_of
|
2013-05-30 01:23:19 -04:00
|
|
|
( std::string( "Ant" ), std::string( "ant" ) )
|
|
|
|
( std::string( "Asm" ), std::string( "asm" ) )
|
|
|
|
( std::string( "Awk" ), std::string( "awk" ) )
|
|
|
|
( std::string( "Basic" ), std::string( "basic" ) )
|
|
|
|
( std::string( "C++" ), std::string( "cpp" ) )
|
|
|
|
( std::string( "C#" ), std::string( "cs" ) )
|
|
|
|
( std::string( "C" ), std::string( "c" ) )
|
|
|
|
( std::string( "COBOL" ), std::string( "cobol" ) )
|
|
|
|
( std::string( "DosBatch" ), std::string( "dosbatch" ) )
|
|
|
|
( std::string( "Eiffel" ), std::string( "eiffel" ) )
|
|
|
|
( std::string( "Erlang" ), std::string( "erlang" ) )
|
|
|
|
( std::string( "Fortran" ), std::string( "fortran" ) )
|
|
|
|
( std::string( "HTML" ), std::string( "html" ) )
|
|
|
|
( std::string( "Java" ), std::string( "java" ) )
|
|
|
|
( std::string( "JavaScript" ), std::string( "javascript" ) )
|
|
|
|
( std::string( "Lisp" ), std::string( "lisp" ) )
|
|
|
|
( std::string( "Lua" ), std::string( "lua" ) )
|
|
|
|
( std::string( "Make" ), std::string( "make" ) )
|
|
|
|
( std::string( "MatLab" ), std::string( "matlab" ) )
|
|
|
|
( std::string( "OCaml" ), std::string( "ocaml" ) )
|
|
|
|
( std::string( "Pascal" ), std::string( "pascal" ) )
|
|
|
|
( std::string( "Perl" ), std::string( "perl" ) )
|
|
|
|
( std::string( "PHP" ), std::string( "php" ) )
|
|
|
|
( std::string( "Python" ), std::string( "python" ) )
|
|
|
|
( std::string( "REXX" ), std::string( "rexx" ) )
|
|
|
|
( std::string( "Ruby" ), std::string( "ruby" ) )
|
|
|
|
( std::string( "Scheme" ), std::string( "scheme" ) )
|
|
|
|
( std::string( "Sh" ), std::string( "sh" ) )
|
|
|
|
( std::string( "SLang" ), std::string( "slang" ) )
|
|
|
|
( std::string( "SML" ), std::string( "sml" ) )
|
|
|
|
( std::string( "SQL" ), std::string( "sql" ) )
|
|
|
|
( std::string( "Tcl" ), std::string( "tcl" ) )
|
|
|
|
( std::string( "Tex" ), std::string( "tex" ) )
|
|
|
|
( std::string( "Vera" ), std::string( "vera" ) )
|
|
|
|
( std::string( "Verilog" ), std::string( "verilog" ) )
|
|
|
|
( std::string( "VHDL" ), std::string( "vhdl" ) )
|
|
|
|
( std::string( "Vim" ), std::string( "vim" ) )
|
|
|
|
( std::string( "YACC" ), std::string( "yacc" ) );
|
2013-05-26 16:28:00 -04:00
|
|
|
|
|
|
|
const std::string NOT_FOUND = "YCMFOOBAR_NOT_FOUND";
|
|
|
|
|
|
|
|
} // unnamed namespace
|
|
|
|
|
2012-07-23 23:17:59 -04:00
|
|
|
|
2013-01-19 23:10:52 -05:00
|
|
|
std::string RemoveIdentifierFreeText( std::string text ) {
|
2012-07-24 23:09:09 -04:00
|
|
|
boost::erase_all_regex( text, boost::regex( COMMENT_AND_STRING_REGEX ) );
|
|
|
|
return text;
|
2012-07-23 23:17:59 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector< std::string > ExtractIdentifiersFromText(
|
2013-01-19 23:10:52 -05:00
|
|
|
const std::string &text ) {
|
2012-07-23 23:17:59 -04:00
|
|
|
std::string::const_iterator start = text.begin();
|
|
|
|
std::string::const_iterator end = text.end();
|
|
|
|
|
2013-05-26 16:28:00 -04:00
|
|
|
boost::smatch matches;
|
|
|
|
const boost::regex expression( IDENTIFIER_REGEX );
|
2012-07-23 23:17:59 -04:00
|
|
|
|
|
|
|
std::vector< std::string > identifiers;
|
2013-01-19 23:10:52 -05:00
|
|
|
|
|
|
|
while ( boost::regex_search( start, end, matches, expression ) ) {
|
2012-07-23 23:17:59 -04:00
|
|
|
identifiers.push_back( matches[ 0 ] );
|
|
|
|
start = matches[ 0 ].second;
|
|
|
|
}
|
|
|
|
|
|
|
|
return identifiers;
|
|
|
|
}
|
|
|
|
|
2013-05-26 16:28:00 -04:00
|
|
|
|
|
|
|
FiletypeIdentifierMap ExtractIdentifiersFromTagsFile(
|
2013-05-30 01:23:19 -04:00
|
|
|
const fs::path &path_to_tag_file ) {
|
2013-05-26 16:28:00 -04:00
|
|
|
FiletypeIdentifierMap filetype_identifier_map;
|
|
|
|
std::string tags_file_contents;
|
|
|
|
|
|
|
|
try {
|
|
|
|
tags_file_contents = ReadUtf8File( path_to_tag_file );
|
2013-05-30 01:23:19 -04:00
|
|
|
} catch ( ... ) {
|
2013-05-26 16:28:00 -04:00
|
|
|
return filetype_identifier_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string::const_iterator start = tags_file_contents.begin();
|
|
|
|
std::string::const_iterator end = tags_file_contents.end();
|
|
|
|
|
|
|
|
boost::smatch matches;
|
|
|
|
const boost::regex expression( TAG_REGEX );
|
|
|
|
const boost::match_flag_type options = boost::match_not_dot_newline;
|
|
|
|
|
|
|
|
while ( boost::regex_search( start, end, matches, expression, options ) ) {
|
|
|
|
start = matches[ 0 ].second;
|
|
|
|
|
|
|
|
std::string language( matches[ 3 ] );
|
|
|
|
std::string filetype = FindWithDefault( LANG_TO_FILETYPE,
|
|
|
|
language,
|
|
|
|
NOT_FOUND );
|
|
|
|
|
|
|
|
if ( filetype == NOT_FOUND )
|
|
|
|
continue;
|
|
|
|
|
|
|
|
std::string identifier( matches[ 1 ] );
|
|
|
|
fs::path path( matches[ 2 ] );
|
|
|
|
|
|
|
|
if ( path.is_relative() )
|
|
|
|
path = path_to_tag_file.parent_path() / path;
|
|
|
|
|
|
|
|
filetype_identifier_map[ filetype ][ path.string() ].push_back( identifier );
|
|
|
|
}
|
|
|
|
|
|
|
|
return filetype_identifier_map;
|
|
|
|
}
|
|
|
|
|
2012-07-23 23:17:59 -04:00
|
|
|
} // namespace YouCompleteMe
|