YouCompleteMe/cpp/ycm/IdentifierUtils.cpp

202 lines
6.7 KiB
C++
Raw Normal View History

// Copyright (C) 2011, 2012 Google Inc.
2012-07-23 20:17:59 -07:00
//
// This file is part of YouCompleteMe.
//
// YouCompleteMe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// YouCompleteMe is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
#include "IdentifierUtils.h"
#include "Utils.h"
2012-07-23 20:17:59 -07:00
#include "standard.h"
#include <boost/unordered_map.hpp>
#include <boost/assign/list_of.hpp>
2012-07-23 20:17:59 -07:00
#include <boost/regex.hpp>
#include <boost/algorithm/string/regex.hpp>
2013-01-19 20:10:52 -08:00
namespace YouCompleteMe {
2012-07-23 20:17:59 -07:00
namespace fs = boost::filesystem;
namespace {
2013-10-29 19:13:52 -07:00
const char *const COMMENT_AND_STRING_REGEX =
2012-07-23 20:17:59 -07:00
"//.*?$" // Anything following '//'
"|"
"#.*?$" // Anything following '#'
"|"
"/\\*.*?\\*/" // C-style comments, '/* ... */'
"|"
// Anything inside single quotes, '...', but mind:
// 1. that the starting single quote is not escaped
// 2. the escaped slash (\\)
// 3. the escaped single quote inside the string
// "(?<!\\\\)'(?:\\\\\\\\|\\\\'|.)*?'"
"(?<!\\\\)'(?:\\\\\\\\|\\\\'|.)*?'"
2012-07-23 20:17:59 -07:00
"|"
// Anything inside double quotes, "...", but mind:
// 1. that the starting double quote is not escaped
// 2. the escaped slash (\\)
// 3. the escaped double quote inside the string
"(?<!\\\\)\"(?:\\\\\\\\|\\\\\"|.)*?\"";
2012-07-23 20:17:59 -07:00
2013-10-29 19:13:52 -07:00
const char *const IDENTIFIER_REGEX = "[_a-zA-Z]\\w*";
2012-07-23 20:17:59 -07:00
// For details on the tag format supported, see here for details:
// http://ctags.sourceforge.net/FORMAT
// TL;DR: The only supported format is the one Exuberant Ctags emits.
2013-10-29 19:13:52 -07:00
const char *const TAG_REGEX =
"^([^\\t\\n\\r]+)" // The first field is the identifier
"\\t" // A TAB char is the field separator
// The second field is the path to the file that has the identifier; either
// absolute or relative to the tags file.
"([^\\t\\n\\r]+)"
"\\t.*?" // Non-greedy everything
"language:([^\\t\\n\\r]+)" // We want to capture the language of the file
".*?$";
// Only used as the equality comparer for the below unordered_map which stores
// const char* pointers and not std::string but needs to hash based on string
// values and not pointer values.
// When passed a const char* this will create a temporary std::string for
// comparison, but it's fast enough for our use case.
struct StringEqualityComparer :
2013-10-29 19:13:52 -07:00
std::binary_function< std::string, std::string, bool > {
bool operator()( const std::string &a, const std::string &b ) const {
return a == b;
}
};
// List of languages Exuberant Ctags supports:
// ctags --list-languages
// To map a language name to a filetype, see this file:
// :e $VIMRUNTIME/filetype.vim
// This is a map of const char* and not std::string to prevent issues with
// static initialization.
2013-10-29 19:13:52 -07:00
const boost::unordered_map < const char *,
const char *,
boost::hash< std::string >,
StringEqualityComparer > LANG_TO_FILETYPE =
boost::assign::map_list_of
( "Ant" , "ant" )
( "Asm" , "asm" )
( "Awk" , "awk" )
( "Basic" , "basic" )
( "C++" , "cpp" )
( "C#" , "cs" )
( "C" , "c" )
( "COBOL" , "cobol" )
( "DosBatch" , "dosbatch" )
( "Eiffel" , "eiffel" )
( "Erlang" , "erlang" )
( "Fortran" , "fortran" )
( "HTML" , "html" )
( "Java" , "java" )
( "JavaScript" , "javascript" )
( "Lisp" , "lisp" )
( "Lua" , "lua" )
( "Make" , "make" )
( "MatLab" , "matlab" )
( "OCaml" , "ocaml" )
( "Pascal" , "pascal" )
( "Perl" , "perl" )
( "PHP" , "php" )
( "Python" , "python" )
( "REXX" , "rexx" )
( "Ruby" , "ruby" )
( "Scheme" , "scheme" )
( "Sh" , "sh" )
( "SLang" , "slang" )
( "SML" , "sml" )
( "SQL" , "sql" )
( "Tcl" , "tcl" )
( "Tex" , "tex" )
( "Vera" , "vera" )
( "Verilog" , "verilog" )
( "VHDL" , "vhdl" )
( "Vim" , "vim" )
( "YACC" , "yacc" );
const char *const NOT_FOUND = "YCMFOOBAR_NOT_FOUND";
} // unnamed namespace
2012-07-23 20:17:59 -07:00
2013-01-19 20:10:52 -08:00
std::string RemoveIdentifierFreeText( std::string text ) {
boost::erase_all_regex( text, boost::regex( COMMENT_AND_STRING_REGEX ) );
return text;
2012-07-23 20:17:59 -07:00
}
std::vector< std::string > ExtractIdentifiersFromText(
2013-01-19 20:10:52 -08:00
const std::string &text ) {
2012-07-23 20:17:59 -07:00
std::string::const_iterator start = text.begin();
std::string::const_iterator end = text.end();
boost::smatch matches;
const boost::regex expression( IDENTIFIER_REGEX );
2012-07-23 20:17:59 -07:00
std::vector< std::string > identifiers;
2013-01-19 20:10:52 -08:00
while ( boost::regex_search( start, end, matches, expression ) ) {
2012-07-23 20:17:59 -07:00
identifiers.push_back( matches[ 0 ] );
start = matches[ 0 ].second;
}
return identifiers;
}
FiletypeIdentifierMap ExtractIdentifiersFromTagsFile(
2013-05-29 22:23:19 -07:00
const fs::path &path_to_tag_file ) {
FiletypeIdentifierMap filetype_identifier_map;
std::string tags_file_contents;
try {
tags_file_contents = ReadUtf8File( path_to_tag_file );
2013-05-29 22:23:19 -07:00
} catch ( ... ) {
return filetype_identifier_map;
}
std::string::const_iterator start = tags_file_contents.begin();
std::string::const_iterator end = tags_file_contents.end();
boost::smatch matches;
const boost::regex expression( TAG_REGEX );
const boost::match_flag_type options = boost::match_not_dot_newline;
while ( boost::regex_search( start, end, matches, expression, options ) ) {
start = matches[ 0 ].second;
std::string language( matches[ 3 ] );
std::string filetype = FindWithDefault( LANG_TO_FILETYPE,
language.c_str(),
NOT_FOUND );
if ( filetype == NOT_FOUND )
continue;
std::string identifier( matches[ 1 ] );
fs::path path( matches[ 2 ].str() );
if ( path.is_relative() )
path = path_to_tag_file.parent_path() / path;
filetype_identifier_map[ filetype ][ path.string() ].push_back( identifier );
}
return filetype_identifier_map;
}
2012-07-23 20:17:59 -07:00
} // namespace YouCompleteMe