* [committed] libstdc++: Improve std::regex_error::what() strings
@ 2022-01-05 13:47 Jonathan Wakely
0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2022-01-05 13:47 UTC (permalink / raw)
To: libstdc++, gcc-patches
Tested powerpc64le-linux, pushed to trunk.
This replaces the vague "regex_error" for std::regex_error::what() with
a string that corresponds to the error_type enum passed to the
constructor. This allows us to remove many of the strings passed to
__throw_regex_error, because the default string is at least as good.
When a string argument to __throw_regex_error is kept it should add some
context-specific detail absent from the default string.
Also remove full stops (periods) from the end of those strings, to make
it easier to include them in logs and other output. I've left them
starting with an upper-case letter, which is consistent with strerror
output for (at least) Glibc, Solaris and BSD. I'm ambivalent whether
that's the right choice.
This also adds the missing noreturn attribute to __throw_regex_error.
libstdc++-v3/ChangeLog:
* include/bits/regex_compiler.tcc: Adjust all calls to
__throw_regex_error.
* include/bits/regex_error.h (__throw_regex_error): Add noreturn
attribute.
* include/bits/regex_scanner.tcc: Likewise.
* src/c++11/regex.cc (desc): New helper function.
(regex_error::regex_error(error_type)): Use desc to get a string
corresponding to the error code.
---
libstdc++-v3/include/bits/regex_compiler.tcc | 37 +++++-----
libstdc++-v3/include/bits/regex_error.h | 27 ++++---
libstdc++-v3/include/bits/regex_scanner.tcc | 76 ++++++++------------
libstdc++-v3/src/c++11/regex.cc | 47 +++++++++++-
4 files changed, 111 insertions(+), 76 deletions(-)
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index ce834b12255..c12f7502538 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -157,8 +157,7 @@ namespace __detail
auto __neg = _M_value[0] == 'n';
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren,
- "Parenthesis is not closed.");
+ __throw_regex_error(regex_constants::error_paren);
auto __tmp = _M_pop();
__tmp._M_append(_M_nfa->_M_insert_accept());
_M_stack.push(
@@ -180,8 +179,7 @@ namespace __detail
auto __init = [this, &__neg]()
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat,
- "Nothing to repeat before a quantifier.");
+ __throw_regex_error(regex_constants::error_badrepeat);
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
};
if (_M_match_token(_ScannerT::_S_token_closure0))
@@ -217,11 +215,9 @@ namespace __detail
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat,
- "Nothing to repeat before a quantifier.");
+ __throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count))
- __throw_regex_error(regex_constants::error_badbrace,
- "Unexpected token in brace expression.");
+ __throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_pop());
_StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
long __min_rep = _M_cur_int_value(10);
@@ -237,8 +233,7 @@ namespace __detail
__infi = true;
}
if (!_M_match_token(_ScannerT::_S_token_interval_end))
- __throw_regex_error(regex_constants::error_brace,
- "Unexpected end of brace expression.");
+ __throw_regex_error(regex_constants::error_brace);
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
@@ -257,8 +252,7 @@ namespace __detail
else
{
if (__n < 0)
- __throw_regex_error(regex_constants::error_badbrace,
- "Invalid range in brace expression.");
+ __throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa->_M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
@@ -325,8 +319,7 @@ namespace __detail
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren,
- "Parenthesis is not closed.");
+ __throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
_M_stack.push(__r);
}
@@ -335,8 +328,7 @@ namespace __detail
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren,
- "Parenthesis is not closed.");
+ __throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
__r._M_append(_M_nfa->_M_insert_subexpr_end());
_M_stack.push(__r);
@@ -503,7 +495,8 @@ namespace __detail
{
// "\\w-" is invalid, start of range must be a single char.
__throw_regex_error(regex_constants::error_range,
- "Invalid start of range in bracket expression.");
+ "Invalid start of '[x-x]' range in "
+ "regular expression");
}
else if (__last_char._M_is_char())
{
@@ -521,7 +514,8 @@ namespace __detail
}
else
__throw_regex_error(regex_constants::error_range,
- "Invalid end of range in bracket expression.");
+ "Invalid end of '[x-x]' range in "
+ "regular expression");
}
else if (_M_flags & regex_constants::ECMAScript)
{
@@ -532,7 +526,8 @@ namespace __detail
}
else
__throw_regex_error(regex_constants::error_range,
- "Invalid dash in bracket expression.");
+ "Invalid location of '-' within '[...]' in "
+ "POSIX regular expression");
}
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{
@@ -543,8 +538,8 @@ namespace __detail
}
else
__throw_regex_error(regex_constants::error_brack,
- "Unexpected character in bracket expression.");
-
+ "Unexpected character within '[...]' in "
+ "regular expression");
return true;
}
diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index 767600ccdab..77d4925921b 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -133,7 +133,9 @@ namespace regex_constants
*/
class regex_error : public std::runtime_error
{
- regex_constants::error_type _M_code;
+ using error_type = regex_constants::error_type;
+
+ error_type _M_code;
public:
/**
@@ -142,7 +144,7 @@ namespace regex_constants
* @param __ecode the regex error code.
*/
explicit
- regex_error(regex_constants::error_type __ecode);
+ regex_error(error_type __ecode);
virtual ~regex_error() throw();
@@ -156,23 +158,30 @@ namespace regex_constants
{ return _M_code; }
private:
- regex_error(regex_constants::error_type __ecode, const char* __what)
+ regex_error(error_type __ecode, const char* __what)
: std::runtime_error(__what), _M_code(__ecode)
{ }
- friend void __throw_regex_error(regex_constants::error_type, const char*);
+ [[__noreturn__]]
+ friend void
+ __throw_regex_error(error_type __ecode __attribute__((__unused__)),
+ const char* __what __attribute__((__unused__)))
+ { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
};
- ///@} // group regex
+ /// @cond undocumented
+ [[__noreturn__]]
void
__throw_regex_error(regex_constants::error_type __ecode);
+ [[__noreturn__]]
inline void
- __throw_regex_error(regex_constants::error_type __ecode
- __attribute__((__unused__)),
- const char* __what __attribute__((__unused__)))
- { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+ __throw_regex_error(regex_constants::error_type __ecode, const char* __what);
+
+ /// @endcond
+
+ ///@} // group regex
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 1431cfc37e6..2a1745b42f6 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -108,7 +108,7 @@ namespace __detail
if (_M_current == _M_end)
__throw_regex_error(
regex_constants::error_escape,
- "Unexpected end of regex when escaping.");
+ "Invalid escape at end of regular expression");
if (!_M_is_basic()
|| (*_M_current != '('
@@ -125,9 +125,7 @@ namespace __detail
if (_M_is_ecma() && *_M_current == '?')
{
if (++_M_current == _M_end)
- __throw_regex_error(
- regex_constants::error_paren,
- "Unexpected end of regex when in an open parenthesis.");
+ __throw_regex_error(regex_constants::error_paren);
if (*_M_current == ':')
{
@@ -147,9 +145,9 @@ namespace __detail
_M_value.assign(1, 'n');
}
else
- __throw_regex_error(
- regex_constants::error_paren,
- "Invalid special open parenthesis.");
+ __throw_regex_error(regex_constants::error_paren,
+ "Invalid '(?...)' zero-width assertion "
+ "in regular expression");
}
else if (_M_flags & regex_constants::nosubs)
_M_token = _S_token_subexpr_no_group_begin;
@@ -178,10 +176,7 @@ namespace __detail
else if (__builtin_expect(__c == _CharT(0), false))
{
if (!_M_is_ecma())
- {
- __throw_regex_error(regex_constants::_S_null,
- "Unexpected null character in regular expression");
- }
+ __throw_regex_error(regex_constants::_S_null);
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
}
@@ -213,9 +208,7 @@ namespace __detail
_M_scan_in_bracket()
{
if (_M_current == _M_end)
- __throw_regex_error(
- regex_constants::error_brack,
- "Unexpected end of regex when in bracket expression.");
+ __throw_regex_error(regex_constants::error_brack);
auto __c = *_M_current++;
@@ -225,7 +218,8 @@ namespace __detail
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_brack,
- "Unexpected character class open bracket.");
+ "Incomplete '[[' character class in "
+ "regular expression");
if (*_M_current == '.')
{
@@ -250,7 +244,7 @@ namespace __detail
}
// In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
// literally. So "[]]" and "[^]]" are valid regexes. See the testcases
- // `*/empty_range.cc`.
+ // `.../empty_range.cc`.
else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
{
_M_token = _S_token_bracket_end;
@@ -275,9 +269,7 @@ namespace __detail
_M_scan_in_brace()
{
if (_M_current == _M_end)
- __throw_regex_error(
- regex_constants::error_brace,
- "Unexpected end of regex when in brace expression.");
+ __throw_regex_error(regex_constants::error_brace);
auto __c = *_M_current++;
@@ -301,8 +293,7 @@ namespace __detail
++_M_current;
}
else
- __throw_regex_error(regex_constants::error_badbrace,
- "Unexpected character in brace expression.");
+ __throw_regex_error(regex_constants::error_badbrace);
}
else if (__c == '}')
{
@@ -310,8 +301,7 @@ namespace __detail
_M_token = _S_token_interval_end;
}
else
- __throw_regex_error(regex_constants::error_badbrace,
- "Unexpected character in brace expression.");
+ __throw_regex_error(regex_constants::error_badbrace);
}
template<typename _CharT>
@@ -320,8 +310,7 @@ namespace __detail
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape,
- "Unexpected end of regex when escaping.");
+ __throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
@@ -355,22 +344,26 @@ namespace __detail
else if (__c == 'c')
{
if (_M_current == _M_end)
- __throw_regex_error(
- regex_constants::error_escape,
- "Unexpected end of regex when reading control code.");
+ __throw_regex_error(regex_constants::error_escape,
+ "invalid '\\cX' control character in "
+ "regular expression");
_M_token = _S_token_ord_char;
_M_value.assign(1, *_M_current++);
}
else if (__c == 'x' || __c == 'u')
{
- _M_value.erase();
- for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++)
+ _M_value.clear();
+ const int __n = __c == 'x' ? 2 : 4;
+ for (int __i = 0; __i < __n; __i++)
{
if (_M_current == _M_end
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
- __throw_regex_error(
- regex_constants::error_escape,
- "Unexpected end of regex when ascii character.");
+ __throw_regex_error(regex_constants::error_escape,
+ __n == 2
+ ? "Invalid '\\xNN' control character in "
+ "regular expression"
+ : "Invalid '\\uNNNN' control character in "
+ "regular expression");
_M_value += *_M_current++;
}
_M_token = _S_token_hex_num;
@@ -399,8 +392,7 @@ namespace __detail
_M_eat_escape_posix()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape,
- "Unexpected end of regex when escaping.");
+ __throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current;
auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
@@ -425,8 +417,7 @@ namespace __detail
{
#ifdef __STRICT_ANSI__
// POSIX says it is undefined to escape ordinary characters
- __throw_regex_error(regex_constants::error_escape,
- "Unexpected escape character.");
+ __throw_regex_error(regex_constants::error_escape);
#else
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@@ -466,8 +457,7 @@ namespace __detail
return;
}
else
- __throw_regex_error(regex_constants::error_escape,
- "Unexpected escape character.");
+ __throw_regex_error(regex_constants::error_escape);
}
// Eats a character class or throws an exception.
@@ -485,12 +475,8 @@ namespace __detail
|| _M_current == _M_end // skip __ch
|| *_M_current++ != ']') // skip ']'
{
- if (__ch == ':')
- __throw_regex_error(regex_constants::error_ctype,
- "Unexpected end of character class.");
- else
- __throw_regex_error(regex_constants::error_collate,
- "Unexpected end of character class.");
+ __throw_regex_error(__ch == ':' ? regex_constants::error_ctype
+ : regex_constants::error_collate);
}
}
diff --git a/libstdc++-v3/src/c++11/regex.cc b/libstdc++-v3/src/c++11/regex.cc
index 0a4a5524b22..d5e1cc7612a 100644
--- a/libstdc++-v3/src/c++11/regex.cc
+++ b/libstdc++-v3/src/c++11/regex.cc
@@ -35,8 +35,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__attribute__((unused)))
{ _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode)); }
+namespace
+{
+ const char*
+ desc(regex_constants::error_type e)
+ {
+ using namespace regex_constants;
+ switch (e)
+ {
+ case error_collate:
+ return "Invalid collating element in regular expression";
+ case error_ctype:
+ return "Invalid character class in regular expression";
+ case error_escape:
+ return "Invalid escape in regular expression";
+ case error_backref:
+ return "Invalid back reference in regular expression";
+ case error_brack:
+ return "Mismatched '[' and ']' in regular expression";
+ case error_paren:
+ return "Mismatched '(' and ')' in regular expression";
+ case error_brace:
+ return "Mismatched '{' and '}' in regular expression";
+ case error_badbrace:
+ return "Invalid range in '{}' in regular expression";
+ case error_range:
+ return "Invalid character range in regular expression";
+ case error_space:
+ return "Insufficient memory to compile regular expression";
+ case error_badrepeat:
+ return "Invalid '?', '*', or '+' in regular expression";
+ case error_complexity:
+ return "Complexity of regex match exceeded implementation limits";
+ case error_stack:
+ return "Insufficient memory to determine regex match";
+ case _S_null:
+ return "Unexpected null character in regular expression";
+ case _S_grammar:
+ return "Conflicting regex grammar options";
+ default:
+ return "regex error";
+ };
+
+ }
+}
+
regex_error::regex_error(regex_constants::error_type __ecode)
- : std::runtime_error("regex_error"), _M_code(__ecode)
+ : std::runtime_error(desc(__ecode)), _M_code(__ecode)
{ }
regex_error::~regex_error() throw() { }
--
2.31.1
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-01-05 13:47 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05 13:47 [committed] libstdc++: Improve std::regex_error::what() strings Jonathan Wakely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).