From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by sourceware.org (Postfix) with ESMTPS id A9D4E385843D for ; Wed, 5 Jan 2022 13:47:47 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org A9D4E385843D Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-583-ZgaOIKZsNgurHA00WHkb-g-1; Wed, 05 Jan 2022 08:47:44 -0500 X-MC-Unique: ZgaOIKZsNgurHA00WHkb-g-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id D8F001006AA3; Wed, 5 Jan 2022 13:47:42 +0000 (UTC) Received: from localhost (unknown [10.33.36.252]) by smtp.corp.redhat.com (Postfix) with ESMTP id 5ED3B7512F; Wed, 5 Jan 2022 13:47:42 +0000 (UTC) From: Jonathan Wakely To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org Subject: [committed] libstdc++: Improve std::regex_error::what() strings Date: Wed, 5 Jan 2022 13:47:41 +0000 Message-Id: <20220105134741.1639698-1-jwakely@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII" X-Spam-Status: No, score=-13.8 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, RCVD_IN_DNSWL_LOW, RCVD_IN_MSPIKE_H3, RCVD_IN_MSPIKE_WL, SPF_HELO_NONE, SPF_NONE, TXREP autolearn=unavailable autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libstdc++@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++ mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 05 Jan 2022 13:47:49 -0000 Tested powerpc64le-linux, pushed to trunk. This replaces the vague "regex_error" for std::regex_error::what() with a string that corresponds to the error_type enum passed to the constructor. This allows us to remove many of the strings passed to __throw_regex_error, because the default string is at least as good. When a string argument to __throw_regex_error is kept it should add some context-specific detail absent from the default string. Also remove full stops (periods) from the end of those strings, to make it easier to include them in logs and other output. I've left them starting with an upper-case letter, which is consistent with strerror output for (at least) Glibc, Solaris and BSD. I'm ambivalent whether that's the right choice. This also adds the missing noreturn attribute to __throw_regex_error. libstdc++-v3/ChangeLog: * include/bits/regex_compiler.tcc: Adjust all calls to __throw_regex_error. * include/bits/regex_error.h (__throw_regex_error): Add noreturn attribute. * include/bits/regex_scanner.tcc: Likewise. * src/c++11/regex.cc (desc): New helper function. (regex_error::regex_error(error_type)): Use desc to get a string corresponding to the error code. --- libstdc++-v3/include/bits/regex_compiler.tcc | 37 +++++----- libstdc++-v3/include/bits/regex_error.h | 27 ++++--- libstdc++-v3/include/bits/regex_scanner.tcc | 76 ++++++++------------ libstdc++-v3/src/c++11/regex.cc | 47 +++++++++++- 4 files changed, 111 insertions(+), 76 deletions(-) diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index ce834b12255..c12f7502538 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -157,8 +157,7 @@ namespace __detail auto __neg = _M_value[0] == 'n'; this->_M_disjunction(); if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) - __throw_regex_error(regex_constants::error_paren, - "Parenthesis is not closed."); + __throw_regex_error(regex_constants::error_paren); auto __tmp = _M_pop(); __tmp._M_append(_M_nfa->_M_insert_accept()); _M_stack.push( @@ -180,8 +179,7 @@ namespace __detail auto __init = [this, &__neg]() { if (_M_stack.empty()) - __throw_regex_error(regex_constants::error_badrepeat, - "Nothing to repeat before a quantifier."); + __throw_regex_error(regex_constants::error_badrepeat); __neg = __neg && _M_match_token(_ScannerT::_S_token_opt); }; if (_M_match_token(_ScannerT::_S_token_closure0)) @@ -217,11 +215,9 @@ namespace __detail else if (_M_match_token(_ScannerT::_S_token_interval_begin)) { if (_M_stack.empty()) - __throw_regex_error(regex_constants::error_badrepeat, - "Nothing to repeat before a quantifier."); + __throw_regex_error(regex_constants::error_badrepeat); if (!_M_match_token(_ScannerT::_S_token_dup_count)) - __throw_regex_error(regex_constants::error_badbrace, - "Unexpected token in brace expression."); + __throw_regex_error(regex_constants::error_badbrace); _StateSeqT __r(_M_pop()); _StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy()); long __min_rep = _M_cur_int_value(10); @@ -237,8 +233,7 @@ namespace __detail __infi = true; } if (!_M_match_token(_ScannerT::_S_token_interval_end)) - __throw_regex_error(regex_constants::error_brace, - "Unexpected end of brace expression."); + __throw_regex_error(regex_constants::error_brace); __neg = __neg && _M_match_token(_ScannerT::_S_token_opt); @@ -257,8 +252,7 @@ namespace __detail else { if (__n < 0) - __throw_regex_error(regex_constants::error_badbrace, - "Invalid range in brace expression."); + __throw_regex_error(regex_constants::error_badbrace); auto __end = _M_nfa->_M_insert_dummy(); // _M_alt is the "match more" branch, and _M_next is the // "match less" one. Switch _M_alt and _M_next of all created @@ -325,8 +319,7 @@ namespace __detail _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy()); this->_M_disjunction(); if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) - __throw_regex_error(regex_constants::error_paren, - "Parenthesis is not closed."); + __throw_regex_error(regex_constants::error_paren); __r._M_append(_M_pop()); _M_stack.push(__r); } @@ -335,8 +328,7 @@ namespace __detail _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin()); this->_M_disjunction(); if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) - __throw_regex_error(regex_constants::error_paren, - "Parenthesis is not closed."); + __throw_regex_error(regex_constants::error_paren); __r._M_append(_M_pop()); __r._M_append(_M_nfa->_M_insert_subexpr_end()); _M_stack.push(__r); @@ -503,7 +495,8 @@ namespace __detail { // "\\w-" is invalid, start of range must be a single char. __throw_regex_error(regex_constants::error_range, - "Invalid start of range in bracket expression."); + "Invalid start of '[x-x]' range in " + "regular expression"); } else if (__last_char._M_is_char()) { @@ -521,7 +514,8 @@ namespace __detail } else __throw_regex_error(regex_constants::error_range, - "Invalid end of range in bracket expression."); + "Invalid end of '[x-x]' range in " + "regular expression"); } else if (_M_flags & regex_constants::ECMAScript) { @@ -532,7 +526,8 @@ namespace __detail } else __throw_regex_error(regex_constants::error_range, - "Invalid dash in bracket expression."); + "Invalid location of '-' within '[...]' in " + "POSIX regular expression"); } else if (_M_match_token(_ScannerT::_S_token_quoted_class)) { @@ -543,8 +538,8 @@ namespace __detail } else __throw_regex_error(regex_constants::error_brack, - "Unexpected character in bracket expression."); - + "Unexpected character within '[...]' in " + "regular expression"); return true; } diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h index 767600ccdab..77d4925921b 100644 --- a/libstdc++-v3/include/bits/regex_error.h +++ b/libstdc++-v3/include/bits/regex_error.h @@ -133,7 +133,9 @@ namespace regex_constants */ class regex_error : public std::runtime_error { - regex_constants::error_type _M_code; + using error_type = regex_constants::error_type; + + error_type _M_code; public: /** @@ -142,7 +144,7 @@ namespace regex_constants * @param __ecode the regex error code. */ explicit - regex_error(regex_constants::error_type __ecode); + regex_error(error_type __ecode); virtual ~regex_error() throw(); @@ -156,23 +158,30 @@ namespace regex_constants { return _M_code; } private: - regex_error(regex_constants::error_type __ecode, const char* __what) + regex_error(error_type __ecode, const char* __what) : std::runtime_error(__what), _M_code(__ecode) { } - friend void __throw_regex_error(regex_constants::error_type, const char*); + [[__noreturn__]] + friend void + __throw_regex_error(error_type __ecode __attribute__((__unused__)), + const char* __what __attribute__((__unused__))) + { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); } }; - ///@} // group regex + /// @cond undocumented + [[__noreturn__]] void __throw_regex_error(regex_constants::error_type __ecode); + [[__noreturn__]] inline void - __throw_regex_error(regex_constants::error_type __ecode - __attribute__((__unused__)), - const char* __what __attribute__((__unused__))) - { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); } + __throw_regex_error(regex_constants::error_type __ecode, const char* __what); + + /// @endcond + + ///@} // group regex _GLIBCXX_END_NAMESPACE_VERSION } // namespace std diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc index 1431cfc37e6..2a1745b42f6 100644 --- a/libstdc++-v3/include/bits/regex_scanner.tcc +++ b/libstdc++-v3/include/bits/regex_scanner.tcc @@ -108,7 +108,7 @@ namespace __detail if (_M_current == _M_end) __throw_regex_error( regex_constants::error_escape, - "Unexpected end of regex when escaping."); + "Invalid escape at end of regular expression"); if (!_M_is_basic() || (*_M_current != '(' @@ -125,9 +125,7 @@ namespace __detail if (_M_is_ecma() && *_M_current == '?') { if (++_M_current == _M_end) - __throw_regex_error( - regex_constants::error_paren, - "Unexpected end of regex when in an open parenthesis."); + __throw_regex_error(regex_constants::error_paren); if (*_M_current == ':') { @@ -147,9 +145,9 @@ namespace __detail _M_value.assign(1, 'n'); } else - __throw_regex_error( - regex_constants::error_paren, - "Invalid special open parenthesis."); + __throw_regex_error(regex_constants::error_paren, + "Invalid '(?...)' zero-width assertion " + "in regular expression"); } else if (_M_flags & regex_constants::nosubs) _M_token = _S_token_subexpr_no_group_begin; @@ -178,10 +176,7 @@ namespace __detail else if (__builtin_expect(__c == _CharT(0), false)) { if (!_M_is_ecma()) - { - __throw_regex_error(regex_constants::_S_null, - "Unexpected null character in regular expression"); - } + __throw_regex_error(regex_constants::_S_null); _M_token = _S_token_ord_char; _M_value.assign(1, __c); } @@ -213,9 +208,7 @@ namespace __detail _M_scan_in_bracket() { if (_M_current == _M_end) - __throw_regex_error( - regex_constants::error_brack, - "Unexpected end of regex when in bracket expression."); + __throw_regex_error(regex_constants::error_brack); auto __c = *_M_current++; @@ -225,7 +218,8 @@ namespace __detail { if (_M_current == _M_end) __throw_regex_error(regex_constants::error_brack, - "Unexpected character class open bracket."); + "Incomplete '[[' character class in " + "regular expression"); if (*_M_current == '.') { @@ -250,7 +244,7 @@ namespace __detail } // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted // literally. So "[]]" and "[^]]" are valid regexes. See the testcases - // `*/empty_range.cc`. + // `.../empty_range.cc`. else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) { _M_token = _S_token_bracket_end; @@ -275,9 +269,7 @@ namespace __detail _M_scan_in_brace() { if (_M_current == _M_end) - __throw_regex_error( - regex_constants::error_brace, - "Unexpected end of regex when in brace expression."); + __throw_regex_error(regex_constants::error_brace); auto __c = *_M_current++; @@ -301,8 +293,7 @@ namespace __detail ++_M_current; } else - __throw_regex_error(regex_constants::error_badbrace, - "Unexpected character in brace expression."); + __throw_regex_error(regex_constants::error_badbrace); } else if (__c == '}') { @@ -310,8 +301,7 @@ namespace __detail _M_token = _S_token_interval_end; } else - __throw_regex_error(regex_constants::error_badbrace, - "Unexpected character in brace expression."); + __throw_regex_error(regex_constants::error_badbrace); } template @@ -320,8 +310,7 @@ namespace __detail _M_eat_escape_ecma() { if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_escape, - "Unexpected end of regex when escaping."); + __throw_regex_error(regex_constants::error_escape); auto __c = *_M_current++; auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); @@ -355,22 +344,26 @@ namespace __detail else if (__c == 'c') { if (_M_current == _M_end) - __throw_regex_error( - regex_constants::error_escape, - "Unexpected end of regex when reading control code."); + __throw_regex_error(regex_constants::error_escape, + "invalid '\\cX' control character in " + "regular expression"); _M_token = _S_token_ord_char; _M_value.assign(1, *_M_current++); } else if (__c == 'x' || __c == 'u') { - _M_value.erase(); - for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) + _M_value.clear(); + const int __n = __c == 'x' ? 2 : 4; + for (int __i = 0; __i < __n; __i++) { if (_M_current == _M_end || !_M_ctype.is(_CtypeT::xdigit, *_M_current)) - __throw_regex_error( - regex_constants::error_escape, - "Unexpected end of regex when ascii character."); + __throw_regex_error(regex_constants::error_escape, + __n == 2 + ? "Invalid '\\xNN' control character in " + "regular expression" + : "Invalid '\\uNNNN' control character in " + "regular expression"); _M_value += *_M_current++; } _M_token = _S_token_hex_num; @@ -399,8 +392,7 @@ namespace __detail _M_eat_escape_posix() { if (_M_current == _M_end) - __throw_regex_error(regex_constants::error_escape, - "Unexpected end of regex when escaping."); + __throw_regex_error(regex_constants::error_escape); auto __c = *_M_current; auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); @@ -425,8 +417,7 @@ namespace __detail { #ifdef __STRICT_ANSI__ // POSIX says it is undefined to escape ordinary characters - __throw_regex_error(regex_constants::error_escape, - "Unexpected escape character."); + __throw_regex_error(regex_constants::error_escape); #else _M_token = _S_token_ord_char; _M_value.assign(1, __c); @@ -466,8 +457,7 @@ namespace __detail return; } else - __throw_regex_error(regex_constants::error_escape, - "Unexpected escape character."); + __throw_regex_error(regex_constants::error_escape); } // Eats a character class or throws an exception. @@ -485,12 +475,8 @@ namespace __detail || _M_current == _M_end // skip __ch || *_M_current++ != ']') // skip ']' { - if (__ch == ':') - __throw_regex_error(regex_constants::error_ctype, - "Unexpected end of character class."); - else - __throw_regex_error(regex_constants::error_collate, - "Unexpected end of character class."); + __throw_regex_error(__ch == ':' ? regex_constants::error_ctype + : regex_constants::error_collate); } } diff --git a/libstdc++-v3/src/c++11/regex.cc b/libstdc++-v3/src/c++11/regex.cc index 0a4a5524b22..d5e1cc7612a 100644 --- a/libstdc++-v3/src/c++11/regex.cc +++ b/libstdc++-v3/src/c++11/regex.cc @@ -35,8 +35,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __attribute__((unused))) { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode)); } +namespace +{ + const char* + desc(regex_constants::error_type e) + { + using namespace regex_constants; + switch (e) + { + case error_collate: + return "Invalid collating element in regular expression"; + case error_ctype: + return "Invalid character class in regular expression"; + case error_escape: + return "Invalid escape in regular expression"; + case error_backref: + return "Invalid back reference in regular expression"; + case error_brack: + return "Mismatched '[' and ']' in regular expression"; + case error_paren: + return "Mismatched '(' and ')' in regular expression"; + case error_brace: + return "Mismatched '{' and '}' in regular expression"; + case error_badbrace: + return "Invalid range in '{}' in regular expression"; + case error_range: + return "Invalid character range in regular expression"; + case error_space: + return "Insufficient memory to compile regular expression"; + case error_badrepeat: + return "Invalid '?', '*', or '+' in regular expression"; + case error_complexity: + return "Complexity of regex match exceeded implementation limits"; + case error_stack: + return "Insufficient memory to determine regex match"; + case _S_null: + return "Unexpected null character in regular expression"; + case _S_grammar: + return "Conflicting regex grammar options"; + default: + return "regex error"; + }; + + } +} + regex_error::regex_error(regex_constants::error_type __ecode) - : std::runtime_error("regex_error"), _M_code(__ecode) + : std::runtime_error(desc(__ecode)), _M_code(__ecode) { } regex_error::~regex_error() throw() { } -- 2.31.1