commit d9fb4e3ec5eb9fcaf08f757c2a9ddcf57289684f Author: Tim Shen Date: Wed Jul 29 21:08:43 2015 -0700 * include/bits/regex_automaton.h (_State_base, _State<>): Remove _TraitsT dependency from _State<>; Make matcher member into the union to reduce struct size. * include/bits/regex_automaton.tcc (_State_base<>::_M_print, _State_base<>::_M_dot, _StateSeq<>::_M_clone): Adjust to fit the interface. Factor out common parts in _M_clone as _State<>::_M_has_alt. * include/bits/regex_executor.h (_Executer<>::_M_lookahead): Only pass state id instead of the whole state. * include/bits/regex_executor.tcc (_Executer<>::_M_dfs, _Executer<>::_M_lookahead): Adjust to fit the interface. * include/std/regex: Include diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index fc0eb41..e153d42 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -72,7 +72,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION struct _State_base { + protected: _Opcode _M_opcode; // type of outgoing transition + + public: _StateIdT _M_next; // outgoing transition union // Since they are mutually exclusive. { @@ -87,16 +90,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // quantifiers (ungreedy if set true) bool _M_neg; }; + // For _S_opcode_match + __gnu_cxx::__aligned_membuf<_Matcher> _M_matcher_storage; }; + protected: explicit _State_base(_Opcode __opcode) : _M_opcode(__opcode), _M_next(_S_invalid_state_id) { } - protected: - ~_State_base() = default; - public: + bool + _M_has_alt() + { + return _M_opcode == _S_opcode_alternative + || _M_opcode == _S_opcode_repeat + || _M_opcode == _S_opcode_subexpr_lookahead; + } + #ifdef _GLIBCXX_DEBUG std::ostream& _M_print(std::ostream& ostr) const; @@ -107,14 +118,64 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif }; - template + template struct _State : _State_base { - typedef _Matcher _MatcherT; + typedef _Matcher<_Char_type> _MatcherT; + static_assert(sizeof(_MatcherT) == sizeof(_Matcher), + "The aussmption std::function has " + "the same size as std::function is violated"); + static_assert(alignof(_MatcherT) == alignof(_Matcher), + "The aussmption std::function has " + "the same alignment as std::function is violated"); + + explicit + _State(_Opcode __opcode) : _State_base(__opcode) + { + if (_M_opcode() == _S_opcode_match) + new (this->_M_matcher_storage._M_addr()) _MatcherT(); + } + + _State(const _State& __rhs) : _State_base(__rhs) + { + if (__rhs._M_opcode() == _S_opcode_match) + new (this->_M_matcher_storage._M_addr()) + _MatcherT(__rhs._M_get_matcher()); + } + + _State(_State&& __rhs) : _State_base(__rhs) + { + if (__rhs._M_opcode() == _S_opcode_match) + new (this->_M_matcher_storage._M_addr()) + _MatcherT(std::move(__rhs._M_get_matcher())); + } + + _State& + operator=(const _State&) = delete; + + ~_State() + { + if (_M_opcode() == _S_opcode_match) + _M_get_matcher().~_MatcherT(); + } + + // Since correct ctor and dtor rely on _M_opcode, it's better not to + // change it over time. + _Opcode + _M_opcode() const + { return _State_base::_M_opcode; } + + bool + _M_matches(_Char_type __char) const + { return _M_get_matcher()(__char); } - _MatcherT _M_matches; // for _S_opcode_match + _MatcherT& + _M_get_matcher() + { return *reinterpret_cast<_MatcherT*>(this->_M_matcher_storage._M_addr()); } - explicit _State(_Opcode __opcode) : _State_base(__opcode) { } + const _MatcherT& + _M_get_matcher() const + { return *reinterpret_cast(this->_M_matcher_storage._M_addr()); } }; struct _NFA_base @@ -155,10 +216,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template struct _NFA - : _NFA_base, std::vector<_State<_TraitsT>> + : _NFA_base, std::vector<_State> { - typedef _State<_TraitsT> _StateT; - typedef _Matcher _MatcherT; + typedef typename _TraitsT::char_type _Char_type; + typedef _State<_Char_type> _StateT; + typedef _Matcher<_Char_type> _MatcherT; _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags) : _NFA_base(__flags) @@ -202,7 +264,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_insert_matcher(_MatcherT __m) { _StateT __tmp(_S_opcode_match); - __tmp._M_matches = std::move(__m); + __tmp._M_get_matcher() = std::move(__m); return _M_insert_state(std::move(__tmp)); } diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc index 72fe978..cecc407 100644 --- a/libstdc++-v3/include/bits/regex_automaton.tcc +++ b/libstdc++-v3/include/bits/regex_automaton.tcc @@ -174,13 +174,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { for (auto& __it : *this) { - while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode + while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode() == _S_opcode_dummy) __it._M_next = (*this)[__it._M_next]._M_next; - if (__it._M_opcode == _S_opcode_alternative - || __it._M_opcode == _S_opcode_repeat - || __it._M_opcode == _S_opcode_subexpr_lookahead) - while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode + if (__it._M_has_alt()) + while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode() == _S_opcode_dummy) __it._M_alt = (*this)[__it._M_alt]._M_next; } @@ -200,11 +198,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __stack.pop(); auto __dup = _M_nfa[__u]; // _M_insert_state() never return -1 - auto __id = _M_nfa._M_insert_state(__dup); + auto __id = _M_nfa._M_insert_state(std::move(__dup)); __m[__u] = __id; - if (__dup._M_opcode == _S_opcode_alternative - || __dup._M_opcode == _S_opcode_repeat - || __dup._M_opcode == _S_opcode_subexpr_lookahead) + if (__dup._M_has_alt()) if (__dup._M_alt != _S_invalid_state_id && __m.count(__dup._M_alt) == 0) __stack.push(__dup._M_alt); @@ -223,9 +219,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next) > 0); __ref._M_next = __m[__ref._M_next]; } - if (__ref._M_opcode == _S_opcode_alternative - || __ref._M_opcode == _S_opcode_repeat - || __ref._M_opcode == _S_opcode_subexpr_lookahead) + if (__ref._M_has_alt()) if (__ref._M_alt != _S_invalid_state_id) { _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt) > 0); diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 404f30b..f3f8876 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -148,7 +148,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_word_boundary() const; bool - _M_lookahead(_State<_TraitsT> __state); + _M_lookahead(_StateIdT __next); // Holds additional information used in BFS-mode. template diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 9b5c1c6..3fd17f6 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -145,11 +145,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_lookahead(_State<_TraitsT> __state) + _M_lookahead(_StateIdT __next) { _ResultsVec __what(_M_cur_results.size()); _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); - __sub._M_states._M_start = __state._M_alt; + __sub._M_states._M_start = __next; if (__sub._M_search_from_first()) { for (size_t __i = 0; __i < __what.size(); __i++) @@ -203,7 +203,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const auto& __state = _M_nfa[__i]; // Every change on _M_cur_results and _M_current will be rolled back after // finishing the recursion step. - switch (__state._M_opcode) + switch (__state._M_opcode()) { // _M_alt branch is "match once more", while _M_next is "get me out // of this quantifier". Executing _M_next first or _M_alt first don't @@ -280,7 +280,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Here __state._M_alt offers a single start node for a sub-NFA. // We recursively invoke our algorithm to match the sub-NFA. case _S_opcode_subexpr_lookahead: - if (_M_lookahead(__state) == !__state._M_neg) + if (_M_lookahead(__state._M_alt) == !__state._M_neg) _M_dfs(__match_mode, __state._M_next); break; case _S_opcode_match: diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index 3dff372..b6fe4c7 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -53,6 +53,7 @@ #include #include +#include #include #include #include