/* * * Copyright (c) 1998-2009 John Maddock * Copyright 2008 Eric Niebler. * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org for most recent version. * FILE regex_format.hpp * VERSION see * DESCRIPTION: Provides formatting output routines for search and replace * operations. Note this is an internal header file included * by regex.hpp, do not include on its own. */ #ifndef BOOST_REGEX_FORMAT_HPP #define BOOST_REGEX_FORMAT_HPP #include #include #include #include #include #include #include #include #include #include #ifndef BOOST_NO_SFINAE #include #endif #include namespace boost{ #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) #endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_PREFIX #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif // // Forward declaration: // template >::allocator_type > class match_results; namespace re_detail{ // // struct trivial_format_traits: // defines minimum localisation support for formatting // in the case that the actual regex traits is unavailable. // template struct trivial_format_traits { typedef charT char_type; static std::ptrdiff_t length(const charT* p) { return global_length(p); } static charT tolower(charT c) { return ::boost::re_detail::global_lower(c); } static charT toupper(charT c) { return ::boost::re_detail::global_upper(c); } static int value(const charT c, int radix) { int result = global_value(c); return result >= radix ? -1 : result; } int toi(const charT*& p1, const charT* p2, int radix)const { return global_toi(p1, p2, radix, *this); } }; template class basic_regex_formatter { public: typedef typename traits::char_type char_type; basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} OutputIterator format(ForwardIter p1, ForwardIter p2, match_flag_type f); OutputIterator format(ForwardIter p1, match_flag_type f) { return format(p1, p1 + m_traits.length(p1), f); } private: typedef typename Results::value_type sub_match_type; enum output_state { output_copy, output_next_lower, output_next_upper, output_lower, output_upper, output_none }; void put(char_type c); void put(const sub_match_type& sub); void format_all(); void format_perl(); void format_escape(); void format_conditional(); void format_until_scope_end(); bool handle_perl_verb(bool have_brace); inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const mpl::false_&) { std::vector v(i, j); return (i != j) ? this->m_results.named_subexpression(&v[0], &v[0] + v.size()) : this->m_results.named_subexpression(static_cast(0), static_cast(0)); } inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const mpl::true_&) { return this->m_results.named_subexpression(i, j); } inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j) { typedef typename boost::is_convertible::type tag_type; return get_named_sub(i, j, tag_type()); } inline int get_named_sub_index(ForwardIter i, ForwardIter j, const mpl::false_&) { std::vector v(i, j); return (i != j) ? this->m_results.named_subexpression_index(&v[0], &v[0] + v.size()) : this->m_results.named_subexpression_index(static_cast(0), static_cast(0)); } inline int get_named_sub_index(ForwardIter i, ForwardIter j, const mpl::true_&) { return this->m_results.named_subexpression_index(i, j); } inline int get_named_sub_index(ForwardIter i, ForwardIter j) { typedef typename boost::is_convertible::type tag_type; return get_named_sub_index(i, j, tag_type()); } #ifdef BOOST_MSVC // msvc-8.0 issues a spurious warning on the call to std::advance here: #pragma warning(push) #pragma warning(disable:4244) #endif inline int toi(ForwardIter& i, ForwardIter j, int base, const boost::mpl::false_&) { if(i != j) { std::vector v(i, j); const char_type* start = &v[0]; const char_type* pos = start; int r = m_traits.toi(pos, &v[0] + v.size(), base); std::advance(i, pos - start); return r; } return -1; } #ifdef BOOST_MSVC #pragma warning(pop) #endif inline int toi(ForwardIter& i, ForwardIter j, int base, const boost::mpl::true_&) { return m_traits.toi(i, j, base); } inline int toi(ForwardIter& i, ForwardIter j, int base) { typedef typename boost::is_convertible::type tag_type; return toi(i, j, base, tag_type()); } const traits& m_traits; // the traits class for localised formatting operations const Results& m_results; // the match_results being used. OutputIterator m_out; // where to send output. ForwardIter m_position; // format string, current position ForwardIter m_end; // format string end match_flag_type m_flags; // format flags to use output_state m_state; // what to do with the next character output_state m_restore_state; // what state to restore to. bool m_have_conditional; // we are parsing a conditional private: basic_regex_formatter(const basic_regex_formatter&); basic_regex_formatter& operator=(const basic_regex_formatter&); }; template OutputIterator basic_regex_formatter::format(ForwardIter p1, ForwardIter p2, match_flag_type f) { m_position = p1; m_end = p2; m_flags = f; format_all(); return m_out; } template void basic_regex_formatter::format_all() { // over and over: while(m_position != m_end) { switch(*m_position) { case '&': if(m_flags & ::boost::regex_constants::format_sed) { ++m_position; put(m_results[0]); break; } put(*m_position++); break; case '\\': format_escape(); break; case '(': if(m_flags & boost::regex_constants::format_all) { ++m_position; bool have_conditional = m_have_conditional; m_have_conditional = false; format_until_scope_end(); m_have_conditional = have_conditional; if(m_position == m_end) return; BOOST_ASSERT(*m_position == static_cast(')')); ++m_position; // skip the closing ')' break; } put(*m_position); ++m_position; break; case ')': if(m_flags & boost::regex_constants::format_all) { return; } put(*m_position); ++m_position; break; case ':': if((m_flags & boost::regex_constants::format_all) && m_have_conditional) { return; } put(*m_position); ++m_position; break; case '?': if(m_flags & boost::regex_constants::format_all) { ++m_position; format_conditional(); break; } put(*m_position); ++m_position; break; case '$': if((m_flags & format_sed) == 0) { format_perl(); break; } // fall through, not a special character: default: put(*m_position); ++m_position; break; } } } template void basic_regex_formatter::format_perl() { // // On entry *m_position points to a '$' character // output the information that goes with it: // BOOST_ASSERT(*m_position == '$'); // // see if this is a trailing '$': // if(++m_position == m_end) { --m_position; put(*m_position); ++m_position; return; } // // OK find out what kind it is: // bool have_brace = false; ForwardIter save_position = m_position; switch(*m_position) { case '&': ++m_position; put(this->m_results[0]); break; case '`': ++m_position; put(this->m_results.prefix()); break; case '\'': ++m_position; put(this->m_results.suffix()); break; case '$': put(*m_position++); break; case '+': if((++m_position != m_end) && (*m_position == '{')) { ForwardIter base = ++m_position; while((m_position != m_end) && (*m_position != '}')) ++m_position; if(m_position != m_end) { // Named sub-expression: put(get_named_sub(base, m_position)); ++m_position; break; } else { m_position = --base; } } put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); break; case '{': have_brace = true; ++m_position; // fall through.... default: // see if we have a number: { std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); //len = (std::min)(static_cast(2), len); int v = this->toi(m_position, m_position + len, 10); if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) { // Look for a Perl-5.10 verb: if(!handle_perl_verb(have_brace)) { // leave the $ as is, and carry on: m_position = --save_position; put(*m_position); ++m_position; } break; } // otherwise output sub v: put(this->m_results[v]); if(have_brace) ++m_position; } } } template bool basic_regex_formatter::handle_perl_verb(bool have_brace) { // // We may have a capitalised string containing a Perl action: // static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; if(m_position == m_end) return false; if(have_brace && (*m_position == '^')) ++m_position; std::ptrdiff_t max_len = m_end - m_position; if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) { m_position += 5; if(have_brace) { if((m_position != m_end) && (*m_position == '}')) ++m_position; else { m_position -= 5; return false; } } put(this->m_results[0]); return true; } if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) { m_position += 8; if(have_brace) { if((m_position != m_end) && (*m_position == '}')) ++m_position; else { m_position -= 8; return false; } } put(this->m_results.prefix()); return true; } if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) { m_position += 9; if(have_brace) { if((m_position != m_end) && (*m_position == '}')) ++m_position; else { m_position -= 9; return false; } } put(this->m_results.suffix()); return true; } if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) { m_position += 16; if(have_brace) { if((m_position != m_end) && (*m_position == '}')) ++m_position; else { m_position -= 16; return false; } } put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); return true; } if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) { m_position += 20; if(have_brace) { if((m_position != m_end) && (*m_position == '}')) ++m_position; else { m_position -= 20; return false; } } put(this->m_results.get_last_closed_paren()); return true; } if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) { m_position += 2; if(have_brace) { if((m_position != m_end) && (*m_position == '}')) ++m_position; else { m_position -= 2; return false; } } put(this->m_results.get_last_closed_paren()); return true; } return false; } template void basic_regex_formatter::format_escape() { // skip the escape and check for trailing escape: if(++m_position == m_end) { put(static_cast('\\')); return; } // now switch on the escape type: switch(*m_position) { case 'a': put(static_cast('\a')); ++m_position; break; case 'f': put(static_cast('\f')); ++m_position; break; case 'n': put(static_cast('\n')); ++m_position; break; case 'r': put(static_cast('\r')); ++m_position; break; case 't': put(static_cast('\t')); ++m_position; break; case 'v': put(static_cast('\v')); ++m_position; break; case 'x': if(++m_position == m_end) { put(static_cast('x')); return; } // maybe have \x{ddd} if(*m_position == static_cast('{')) { ++m_position; int val = this->toi(m_position, m_end, 16); if(val < 0) { // invalid value treat everything as literals: put(static_cast('x')); put(static_cast('{')); return; } if((m_position == m_end) || (*m_position != static_cast('}'))) { --m_position; while(*m_position != static_cast('\\')) --m_position; ++m_position; put(*m_position++); return; } ++m_position; put(static_cast(val)); return; } else { std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); len = (std::min)(static_cast(2), len); int val = this->toi(m_position, m_position + len, 16); if(val < 0) { --m_position; put(*m_position++); return; } put(static_cast(val)); } break; case 'c': if(++m_position == m_end) { --m_position; put(*m_position++); return; } put(static_cast(*m_position++ % 32)); break; case 'e': put(static_cast(27)); ++m_position; break; default: // see if we have a perl specific escape: if((m_flags & boost::regex_constants::format_sed) == 0) { bool breakout = false; switch(*m_position) { case 'l': ++m_position; m_restore_state = m_state; m_state = output_next_lower; breakout = true; break; case 'L': ++m_position; m_state = output_lower; breakout = true; break; case 'u': ++m_position; m_restore_state = m_state; m_state = output_next_upper; breakout = true; break; case 'U': ++m_position; m_state = output_upper; breakout = true; break; case 'E': ++m_position; m_state = output_copy; breakout = true; break; } if(breakout) break; } // see if we have a \n sed style backreference: std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); len = (std::min)(static_cast(1), len); int v = this->toi(m_position, m_position+len, 10); if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) { put(m_results[v]); break; } else if(v == 0) { // octal ecape sequence: --m_position; len = ::boost::re_detail::distance(m_position, m_end); len = (std::min)(static_cast(4), len); v = this->toi(m_position, m_position + len, 8); BOOST_ASSERT(v >= 0); put(static_cast(v)); break; } // Otherwise output the character "as is": put(*m_position++); break; } } template void basic_regex_formatter::format_conditional() { if(m_position == m_end) { // oops trailing '?': put(static_cast('?')); return; } int v; if(*m_position == '{') { ForwardIter base = m_position; ++m_position; v = this->toi(m_position, m_end, 10); if(v < 0) { // Try a named subexpression: while((m_position != m_end) && (*m_position != '}')) ++m_position; v = this->get_named_sub_index(base + 1, m_position); } if((v < 0) || (*m_position != '}')) { m_position = base; // oops trailing '?': put(static_cast('?')); return; } // Skip trailing '}': ++m_position; } else { std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); len = (std::min)(static_cast(2), len); v = this->toi(m_position, m_position + len, 10); } if(v < 0) { // oops not a number: put(static_cast('?')); return; } // output varies depending upon whether sub-expression v matched or not: if(m_results[v].matched) { m_have_conditional = true; format_all(); m_have_conditional = false; if((m_position != m_end) && (*m_position == static_cast(':'))) { // skip the ':': ++m_position; // save output state, then turn it off: output_state saved_state = m_state; m_state = output_none; // format the rest of this scope: format_until_scope_end(); // restore output state: m_state = saved_state; } } else { // save output state, then turn it off: output_state saved_state = m_state; m_state = output_none; // format until ':' or ')': m_have_conditional = true; format_all(); m_have_conditional = false; // restore state: m_state = saved_state; if((m_position != m_end) && (*m_position == static_cast(':'))) { // skip the ':': ++m_position; // format the rest of this scope: format_until_scope_end(); } } } template void basic_regex_formatter::format_until_scope_end() { do { format_all(); if((m_position == m_end) || (*m_position == static_cast(')'))) return; put(*m_position++); }while(m_position != m_end); } template void basic_regex_formatter::put(char_type c) { // write a single character to output // according to which case translation mode we are in: switch(this->m_state) { case output_none: return; case output_next_lower: c = m_traits.tolower(c); this->m_state = m_restore_state; break; case output_next_upper: c = m_traits.toupper(c); this->m_state = m_restore_state; break; case output_lower: c = m_traits.tolower(c); break; case output_upper: c = m_traits.toupper(c); break; default: break; } *m_out = c; ++m_out; } template void basic_regex_formatter::put(const sub_match_type& sub) { typedef typename sub_match_type::iterator iterator_type; iterator_type i = sub.first; while(i != sub.second) { put(*i); ++i; } } template class string_out_iterator #ifndef BOOST_NO_STD_ITERATOR : public std::iterator #endif { S* out; public: string_out_iterator(S& s) : out(&s) {} string_out_iterator& operator++() { return *this; } string_out_iterator& operator++(int) { return *this; } string_out_iterator& operator*() { return *this; } string_out_iterator& operator=(typename S::value_type v) { out->append(1, v); return *this; } #ifdef BOOST_NO_STD_ITERATOR typedef std::ptrdiff_t difference_type; typedef typename S::value_type value_type; typedef value_type* pointer; typedef value_type& reference; typedef std::output_iterator_tag iterator_category; #endif }; template OutputIterator regex_format_imp(OutputIterator out, const match_results& m, ForwardIter p1, ForwardIter p2, match_flag_type flags, const traits& t ) { if(flags & regex_constants::format_literal) { return re_detail::copy(p1, p2, out); } re_detail::basic_regex_formatter< OutputIterator, match_results, traits, ForwardIter> f(out, m, t); return f.format(p1, p2, flags); } #ifndef BOOST_NO_SFINAE BOOST_MPL_HAS_XXX_TRAIT_DEF(const_iterator) struct any_type { template any_type(const T&); template any_type(const T&, const U&); template any_type(const T&, const U&, const V&); }; typedef char no_type; typedef char (&unary_type)[2]; typedef char (&binary_type)[3]; typedef char (&ternary_type)[4]; no_type check_is_formatter(unary_type, binary_type, ternary_type); template unary_type check_is_formatter(T const &, binary_type, ternary_type); template binary_type check_is_formatter(unary_type, T const &, ternary_type); template binary_type check_is_formatter(T const &, U const &, ternary_type); template ternary_type check_is_formatter(unary_type, binary_type, T const &); template ternary_type check_is_formatter(T const &, binary_type, U const &); template ternary_type check_is_formatter(unary_type, T const &, U const &); template ternary_type check_is_formatter(T const &, U const &, V const &); struct unary_binary_ternary { typedef unary_type (*unary_fun)(any_type); typedef binary_type (*binary_fun)(any_type, any_type); typedef ternary_type (*ternary_fun)(any_type, any_type, any_type); operator unary_fun(); operator binary_fun(); operator ternary_fun(); }; template::value> struct formatter_wrapper : Formatter , unary_binary_ternary { formatter_wrapper(){} }; template struct formatter_wrapper : unary_binary_ternary { operator Formatter *(); }; template struct formatter_wrapper : unary_binary_ternary { operator Formatter *(); }; template struct format_traits_imp { private: // // F must be a pointer, a function, or a class with a function call operator: // BOOST_STATIC_ASSERT((::boost::is_pointer::value || ::boost::is_function::value || ::boost::is_class::value)); static formatter_wrapper::type> f; static M m; static O out; static boost::regex_constants::match_flag_type flags; public: BOOST_STATIC_CONSTANT(int, value = sizeof(check_is_formatter(f(m), f(m, out), f(m, out, flags)))); }; template struct format_traits { public: // // Type is mpl::int_ where N is one of: // // 0 : F is a pointer to a presumably null-terminated string. // 1 : F is a character-container such as a std::string. // 2 : F is a Unary Functor. // 3 : F is a Binary Functor. // 4 : F is a Ternary Functor. // typedef typename boost::mpl::if_< boost::mpl::and_, boost::mpl::not_::type> > >, boost::mpl::int_<0>, typename boost::mpl::if_< has_const_iterator, boost::mpl::int_<1>, boost::mpl::int_::value> >::type >::type type; // // This static assertion will fail if the functor passed does not accept // the same type of arguments passed. // BOOST_STATIC_ASSERT( boost::is_class::value && !has_const_iterator::value ? (type::value > 1) : true); }; #else // BOOST_NO_SFINAE template struct format_traits { public: // // Type is mpl::int_ where N is one of: // // 0 : F is a pointer to a presumably null-terminated string. // 1 : F is a character-container such as a std::string. // // Other options such as F being a Functor are not supported without // SFINAE support. // typedef typename boost::mpl::if_< boost::is_pointer, boost::mpl::int_<0>, boost::mpl::int_<1> >::type type; }; #endif // BOOST_NO_SFINAE template struct format_functor3 { format_functor3(Base b) : func(b) {} template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f) { return boost::unwrap_ref(func)(m, i, f); } template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) { return (*this)(m, i, f); } private: Base func; format_functor3(const format_functor3&); format_functor3& operator=(const format_functor3&); }; template struct format_functor2 { format_functor2(Base b) : func(b) {} template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) { return boost::unwrap_ref(func)(m, i); } template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) { return (*this)(m, i, f); } private: Base func; format_functor2(const format_functor2&); format_functor2& operator=(const format_functor2&); }; template struct format_functor1 { format_functor1(Base b) : func(b) {} template OutputIter do_format_string(const S& s, OutputIter i) { return re_detail::copy(s.begin(), s.end(), i); } template inline OutputIter do_format_string(const S* s, OutputIter i) { while(s && *s) { *i = *s; ++i; ++s; } return i; } template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) { return do_format_string(boost::unwrap_ref(func)(m), i); } template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) { return (*this)(m, i, f); } private: Base func; format_functor1(const format_functor1&); format_functor1& operator=(const format_functor1&); }; template struct format_functor_c_string { format_functor_c_string(const charT* ps) : func(ps) {} template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) { typedef typename Match::char_type char_type; const charT* end = func; while(*end) ++end; return regex_format_imp(i, m, func, end, f, t); } private: const charT* func; format_functor_c_string(const format_functor_c_string&); format_functor_c_string& operator=(const format_functor_c_string&); }; template struct format_functor_container { format_functor_container(const Container& c) : func(c) {} template OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) { typedef typename Match::char_type char_type; return re_detail::regex_format_imp(i, m, func.begin(), func.end(), f, t); } private: const Container& func; format_functor_container(const format_functor_container&); format_functor_container& operator=(const format_functor_container&); }; template > struct compute_functor_type { typedef typename format_traits::type tag; typedef typename boost::remove_cv< typename boost::remove_pointer::type>::type maybe_char_type; typedef typename mpl::if_< ::boost::is_same >, format_functor_c_string, typename mpl::if_< ::boost::is_same >, format_functor_container, typename mpl::if_< ::boost::is_same >, format_functor1, typename mpl::if_< ::boost::is_same >, format_functor2, format_functor3 >::type >::type >::type >::type type; }; } // namespace re_detail template inline OutputIterator regex_format(OutputIterator out, const match_results& m, Functor fmt, match_flag_type flags = format_all ) { return m.format(out, fmt, flags); } template inline std::basic_string::char_type> regex_format(const match_results& m, Functor fmt, match_flag_type flags = format_all) { return m.format(fmt, flags); } #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) #endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_SUFFIX #endif #ifdef BOOST_MSVC #pragma warning(pop) #endif } // namespace boost #endif // BOOST_REGEX_FORMAT_HPP