From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2181) id 2BB0D384D173; Thu, 7 Jul 2022 23:32:55 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2BB0D384D173 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jonathan Wakely To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r11-10126] libstdc++: Simplify std::basic_regex construction and assignment X-Act-Checkin: gcc X-Git-Author: Jonathan Wakely X-Git-Refname: refs/heads/releases/gcc-11 X-Git-Oldrev: 8bee3c458ec14ca3e3a429a08694740a894e0c96 X-Git-Newrev: 0a45fd96b7b123af3c1964cef294463792e7fd8b Message-Id: <20220707233255.2BB0D384D173@sourceware.org> Date: Thu, 7 Jul 2022 23:32:55 +0000 (GMT) X-BeenThere: libstdc++-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Jul 2022 23:32:55 -0000 https://gcc.gnu.org/g:0a45fd96b7b123af3c1964cef294463792e7fd8b commit r11-10126-g0a45fd96b7b123af3c1964cef294463792e7fd8b Author: Jonathan Wakely Date: Wed Sep 29 13:48:02 2021 +0100 libstdc++: Simplify std::basic_regex construction and assignment Introduce a new _M_compile function which does the common work needed by all constructors and assignment. Call that directly to avoid multiple levels of constructor delegation or calls to basic_regex::assign overloads. For assignment, there is no need to construct a std::basic_string if we already have a contiguous sequence of the correct character type, and no need to construct a temporary basic_regex when assigning from an existing basic_regex. Also define the copy and move assignment operators as defaulted, which does the right thing without constructing a temporary and swapping it. Copying or moving the shared_ptr member cannot fail, so they can be noexcept. The assign(const basic_regex&) and assign(basic_regex&&) member can then be defined in terms of copy or move assignment. The new _M_compile function takes pointer arguments, so the caller has to convert arbitrary iterator ranges into a contiguous sequence of characters. With that simplification, the __compile_nfa helpers are not needed and can be removed. This also fixes a bug where construction from a contiguous sequence with the wrong character type would fail to compile, rather than converting the elements to the regex character type. Signed-off-by: Jonathan Wakely libstdc++-v3/ChangeLog: * include/bits/regex.h (__detail::__is_contiguous_iter): Move here from . (basic_regex::_M_compile): New function to compile an NFA from a regular expression string. (basic_regex::basic_regex): Use _M_compile instead of delegating to other constructors. (basic_regex::operator=(const basic_regex&)): Define as defaulted. (basic_regex::operator=(initializer_list)): Use _M_compile. (basic_regex::assign(const basic_regex&)): Use copy assignment. (basic_regex::assign(basic_regex&&)): Use move assignment. (basic_regex::assign(const C*, flag_type)): Use _M_compile instead of constructing a temporary string. (basic_regex::assign(const C*, size_t, flag_type)): Likewise. (basic_regex::assign(const basic_string&, flag_type)): Use _M_compile instead of constructing a temporary basic_regex. (basic_regex::assign(InputIter, InputIter, flag_type)): Avoid constructing a temporary string for contiguous iterators of the right value type. * include/bits/regex_compiler.h (__is_contiguous_iter): Move to . (__enable_if_contiguous_iter, __disable_if_contiguous_iter) (__compile_nfa): Remove. * testsuite/28_regex/basic_regex/assign/exception_safety.cc: New test. * testsuite/28_regex/basic_regex/ctors/char/other.cc: New test. (cherry picked from commit b59be1adbaea022f19dc7c30d9bf5089e80795d9) Diff: --- libstdc++-v3/include/bits/regex.h | 101 ++++++++++++--------- libstdc++-v3/include/bits/regex_compiler.h | 41 --------- .../basic_regex/assign/exception_safety.cc | 20 ++++ .../28_regex/basic_regex/ctors/char/other.cc | 37 ++++++++ 4 files changed, 117 insertions(+), 82 deletions(-) diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index e35b9b06286..77e9eb1b819 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -57,6 +57,16 @@ namespace __detail template class _Executor; + + template + struct __is_contiguous_iter : false_type { }; + + template + struct __is_contiguous_iter<_Tp*> : true_type { }; + + template + struct __is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>> + : true_type { }; } _GLIBCXX_BEGIN_NAMESPACE_CXX11 @@ -441,8 +451,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ explicit basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript) - : basic_regex(__p, __p + char_traits<_Ch_type>::length(__p), __f) - { } + { _M_compile(__p, __p + _Rx_traits::length(__p), __f); } /** * @brief Constructs a basic regular expression from the sequence @@ -458,8 +467,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f = ECMAScript) - : basic_regex(__p, __p + __len, __f) - { } + { _M_compile(__p, __p + __len, __f); } /** * @brief Copy-constructs a basic regular expression. @@ -489,8 +497,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 basic_regex(const std::basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s, flag_type __f = ECMAScript) - : basic_regex(__s.data(), __s.data() + __s.size(), __f) - { } + { _M_compile(__s.data(), __s.data() + __s.size(), __f); } /** * @brief Constructs a basic regular expression from the range @@ -508,8 +515,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 template basic_regex(_FwdIter __first, _FwdIter __last, flag_type __f = ECMAScript) - : basic_regex(std::move(__first), std::move(__last), locale_type(), __f) - { } + { this->assign(__first, __last, __f); } /** * @brief Constructs a basic regular expression from an initializer list. @@ -520,8 +526,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 * @throws regex_error if @p __l is not a valid regular expression. */ basic_regex(initializer_list<_Ch_type> __l, flag_type __f = ECMAScript) - : basic_regex(__l.begin(), __l.end(), __f) - { } + { _M_compile(__l.begin(), __l.end(), __f); } /** * @brief Destroys a basic regular expression. @@ -533,15 +538,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 * @brief Assigns one regular expression to another. */ basic_regex& - operator=(const basic_regex& __rhs) - { return this->assign(__rhs); } + operator=(const basic_regex&) = default; /** * @brief Move-assigns one regular expression to another. */ basic_regex& - operator=(basic_regex&& __rhs) noexcept - { return this->assign(std::move(__rhs)); } + operator=(basic_regex&&) = default; /** * @brief Replaces a regular expression with a new one constructed from @@ -564,7 +567,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ basic_regex& operator=(initializer_list<_Ch_type> __l) - { return this->assign(__l.begin(), __l.end()); } + { return this->assign(__l); } /** * @brief Replaces a regular expression with a new one constructed from @@ -579,30 +582,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 // [7.8.3] assign /** - * @brief the real assignment operator. + * @brief Assigns one regular expression to another. * * @param __rhs Another regular expression object. */ basic_regex& - assign(const basic_regex& __rhs) - { - basic_regex __tmp(__rhs); - this->swap(__tmp); - return *this; - } + assign(const basic_regex& __rhs) noexcept + { return *this = __rhs; } /** - * @brief The move-assignment operator. + * @brief Move-assigns one regular expression to another. * * @param __rhs Another regular expression object. */ basic_regex& assign(basic_regex&& __rhs) noexcept - { - basic_regex __tmp(std::move(__rhs)); - this->swap(__tmp); - return *this; - } + { return *this = std::move(__rhs); } /** * @brief Assigns a new regular expression to a regex object from a @@ -619,7 +614,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ basic_regex& assign(const _Ch_type* __p, flag_type __flags = ECMAScript) - { return this->assign(string_type(__p), __flags); } + { + _M_compile(__p, __p + _Rx_traits::length(__p), __flags); + return *this; + } /** * @brief Assigns a new regular expression to a regex object from a @@ -638,7 +636,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 // 3296. Inconsistent default argument for basic_regex<>::assign basic_regex& assign(const _Ch_type* __p, size_t __len, flag_type __flags = ECMAScript) - { return this->assign(string_type(__p, __len), __flags); } + { + _M_compile(__p, __p + __len, __flags); + return *this; + } /** * @brief Assigns a new regular expression to a regex object from a @@ -656,8 +657,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 assign(const basic_string<_Ch_type, _Ch_traits, _Alloc>& __s, flag_type __flags = ECMAScript) { - return this->assign(basic_regex(__s.data(), __s.data() + __s.size(), - _M_loc, __flags)); + _M_compile(__s.data(), __s.data() + __s.size(), __flags); + return *this; } /** @@ -677,7 +678,21 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 basic_regex& assign(_InputIterator __first, _InputIterator __last, flag_type __flags = ECMAScript) - { return this->assign(string_type(__first, __last), __flags); } + { +#if __cplusplus >= 201703L + using _ValT = typename iterator_traits<_InputIterator>::value_type; + if constexpr (__detail::__is_contiguous_iter<_InputIterator>::value + && is_same_v<_ValT, value_type>) + { + const auto __len = __last - __first; + const _Ch_type* __p = std::__to_address(__first); + _M_compile(__p, __p + __len, __flags); + } + else +#endif + this->assign(string_type(__first, __last), __flags); + return *this; + } /** * @brief Assigns a new regular expression to a regex object. @@ -692,7 +707,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 */ basic_regex& assign(initializer_list<_Ch_type> __l, flag_type __flags = ECMAScript) - { return this->assign(__l.begin(), __l.end(), __flags); } + { + _M_compile(__l.begin(), __l.end(), __flags); + return *this; + } // [7.8.4] const operations /** @@ -760,13 +778,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 private: typedef std::shared_ptr> _AutomatonPtr; - template - basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc, - flag_type __f) - : _M_flags(__f), _M_loc(std::move(__loc)), - _M_automaton(__detail::__compile_nfa<_Rx_traits>( - std::move(__first), std::move(__last), _M_loc, _M_flags)) - { } + void + _M_compile(const _Ch_type* __first, const _Ch_type* __last, + flag_type __f) + { + __detail::_Compiler<_Rx_traits> __c(__first, __last, _M_loc, __f); + _M_automaton = __c._M_get_nfa(); + _M_flags = __f; + } template diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index bf7dcc54dba..de4d9d01a48 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -153,47 +153,6 @@ namespace __detail const _CtypeT& _M_ctype; }; - template - struct __is_contiguous_iter : is_pointer<_Tp>::type { }; - - template - struct - __is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>> - : true_type { }; - - template - using __enable_if_contiguous_iter - = __enable_if_t< __is_contiguous_iter<_Iter>::value, - std::shared_ptr> >; - - template - using __disable_if_contiguous_iter - = __enable_if_t< !__is_contiguous_iter<_Iter>::value, - std::shared_ptr> >; - - template - inline __enable_if_contiguous_iter<_FwdIter, _TraitsT> - __compile_nfa(_FwdIter __first, _FwdIter __last, - const typename _TraitsT::locale_type& __loc, - regex_constants::syntax_option_type __flags) - { - size_t __len = __last - __first; - const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr; - using _Cmplr = _Compiler<_TraitsT>; - return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa(); - } - - template - inline __disable_if_contiguous_iter<_FwdIter, _TraitsT> - __compile_nfa(_FwdIter __first, _FwdIter __last, - const typename _TraitsT::locale_type& __loc, - regex_constants::syntax_option_type __flags) - { - const basic_string __str(__first, __last); - return __compile_nfa<_TraitsT>(__str.data(), __str.data() + __str.size(), - __loc, __flags); - } - // [28.13.14] template class _RegexTranslatorBase diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/assign/exception_safety.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/exception_safety.cc new file mode 100644 index 00000000000..462eebcf2cd --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/exception_safety.cc @@ -0,0 +1,20 @@ +// { dg-do run { target c++11 } } +#include +#include + +int main() +{ + const auto f = std::regex::ECMAScript|std::regex::icase; + std::regex re("abc", f); + try + { + re.assign("[", std::regex::extended); + VERIFY( false ); + } + catch (const std::regex_error&) + { + // [re.regex.assign] "If an exception is thrown, *this is unchanged." + VERIFY( re.flags() == f ); + VERIFY( std::regex_match("abc", re) ); + } +} diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/char/other.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/char/other.cc new file mode 100644 index 00000000000..f9b68a72f0a --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/char/other.cc @@ -0,0 +1,37 @@ +// { dg-do run { target c++11 } } +#include +#include +#include + +void +test01() +{ + signed char s[] = { 'a', '.' }; + std::regex re(s, s+2); // This used to fail up to GCC 11.2 + // VERIFY( regex_match("an", re) ); + + std::wstring str = L"xx"; + str[0] = '1'; + str[1] = '2'; + re.assign(str.begin(), str.end()); + VERIFY( regex_match("12", re) ); +} + +void +test02() +{ + int i[] = { 'a', '.', '[', 'x', 'y', 'z', ']' }; + __gnu_test::forward_container fwd(i); + std::regex re(fwd.begin(), fwd.end()); + VERIFY( regex_match("any", re) ); + + __gnu_test::input_container input(i); + re.assign(input.begin(), input.end(), std::regex::icase); + VERIFY( regex_match("ANY", re) ); +} + +int main() +{ + test01(); + test02(); +}