* [Patch, libstdc++] Add specific error message into exceptions
@ 2015-08-28 5:33 Tim Shen
2015-08-28 16:24 ` Jonathan Wakely
0 siblings, 1 reply; 9+ messages in thread
From: Tim Shen @ 2015-08-28 5:33 UTC (permalink / raw)
To: libstdc++, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 58 bytes --]
Bootstrapped and tested.
Thanks!
--
Regards,
Tim Shen
[-- Attachment #2: a.diff --]
[-- Type: text/plain, Size: 18586 bytes --]
commit 53c1caff442e97a18652ec8b1d984341168fd98d
Author: Tim Shen <timshen@google.com>
Date: Thu Aug 27 21:42:40 2015 -0700
PR libstdc++/67361
* include/bits/regex_error.h: Add __throw_regex_error that
supports string.
* include/bits/regex_automaton.h: Add more specific exception
messages.
* include/bits/regex_automaton.tcc: Likewise.
* include/bits/regex_compiler.h: Likewise.
* include/bits/regex_compiler.tcc: Likewise.
* include/bits/regex_scanner.h: Likewise.
* include/bits/regex_scanner.tcc: Likewise.
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index b6ab307..1f672ee 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -327,7 +327,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
this->push_back(std::move(__s));
if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
- __throw_regex_error(regex_constants::error_space);
+ __throw_regex_error(
+ regex_constants::error_space,
+ "Number of NFA states exceeds limit. Please use shorter regex "
+ "string, or use smaller brace expression, or make "
+ "_GLIBCXX_REGEX_STATE_LIMIT larger.");
return this->size()-1;
}
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index cecc407..4c541bc 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -149,7 +149,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_NFA<_TraitsT>::_M_insert_backref(size_t __index)
{
if (this->_M_flags & regex_constants::__polynomial)
- __throw_regex_error(regex_constants::error_complexity);
+ __throw_regex_error(regex_constants::error_complexity,
+ "Unexpected back-reference in polynomial mode.");
// To figure out whether a backref is valid, a stack is used to store
// unfinished sub-expressions. For example, when parsing
// "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
// time, "\\2" is valid, but "\\1" and "\\3" are not.
if (__index >= _M_subexpr_count)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference index exceeds current sub-expression count.");
for (auto __it : this->_M_paren_stack)
if (__index == __it)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference refered to an opened sub-expression.");
this->_M_has_backref = true;
_StateT __tmp(_S_opcode_backref);
__tmp._M_backref_index = __index;
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 0cb0c04..12ffabe 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -397,7 +397,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ string("Invalid collate element: "));
_M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
@@ -411,7 +412,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ string("Invalid equivalence class."));
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
_M_equiv_set.push_back(__st);
@@ -428,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size(),
__icase);
if (__mask == 0)
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_collate,
+ string("Invalid character class."));
if (!__neg)
_M_class_set |= __mask;
else
@@ -442,7 +445,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_make_range(_CharT __l, _CharT __r)
{
if (__l > __r)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(regex_constants::error_range,
+ string("Invalid range in bracket expression."));
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 9a62311..019ca42 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -77,16 +77,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_traits(_M_nfa->_M_traits),
_M_ctype(std::use_facet<_CtypeT>(__loc))
{
- _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
- __r._M_append(_M_nfa->_M_insert_subexpr_begin());
- this->_M_disjunction();
- if (!_M_match_token(_ScannerT::_S_token_eof))
- __throw_regex_error(regex_constants::error_paren);
- __r._M_append(_M_pop());
- _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
- __r._M_append(_M_nfa->_M_insert_subexpr_end());
- __r._M_append(_M_nfa->_M_insert_accept());
- _M_nfa->_M_eliminate_dummy();
+ __try
+ {
+ _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
+ __r._M_append(_M_nfa->_M_insert_subexpr_begin());
+ this->_M_disjunction();
+ if (!_M_match_token(_ScannerT::_S_token_eof))
+ __throw_regex_error(regex_constants::error_paren,
+ "Unexpected end of regex.");
+ __r._M_append(_M_pop());
+ _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
+ __r._M_append(_M_nfa->_M_insert_subexpr_end());
+ __r._M_append(_M_nfa->_M_insert_accept());
+ _M_nfa->_M_eliminate_dummy();
+ }
+ __catch(std::regex_error __e)
+ {
+ __throw_regex_error(__e.code(),
+ string(__e.what()) + " Location: \""
+ + _M_scanner._M_get_location_string() + "\"");
+ }
}
template<typename _TraitsT>
@@ -162,7 +172,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __neg = _M_value[0] == 'n';
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
auto __tmp = _M_pop();
__tmp._M_append(_M_nfa->_M_insert_accept());
_M_stack.push(
@@ -184,7 +195,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __init = [this, &__neg]()
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __throw_regex_error(regex_constants::error_badrepeat,
+ "Nothing to repeat befoer a quantifier.");
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
};
if (_M_match_token(_ScannerT::_S_token_closure0))
@@ -220,9 +232,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __throw_regex_error(regex_constants::error_badrepeat,
+ "Nothing to repeat befoer a quantifier.");
if (!_M_match_token(_ScannerT::_S_token_dup_count))
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected token in brace expression.");
_StateSeqT __r(_M_pop());
_StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
long __min_rep = _M_cur_int_value(10);
@@ -238,7 +252,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else
__n = 0;
if (!_M_match_token(_ScannerT::_S_token_interval_end))
- __throw_regex_error(regex_constants::error_brace);
+ __throw_regex_error(regex_constants::error_brace,
+ "Unexpected end of brace expression.");
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
@@ -257,7 +272,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else
{
if (__n < 0)
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Invalid range in brace expression.");
auto __end = _M_nfa->_M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
@@ -324,7 +340,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
__r._M_append(_M_pop());
_M_stack.push(__r);
}
@@ -333,7 +350,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
__r._M_append(_M_pop());
__r._M_append(_M_nfa->_M_insert_subexpr_end());
_M_stack.push(__r);
@@ -474,7 +492,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Unexpected dash in bracket expression. For POSIX syntax, "
+ "a dash is not treated literally only when it is at "
+ "beginning or end.");
}
__last_char.first = true;
__last_char.second = _M_value[0];
@@ -492,7 +514,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_scanner._M_get_token()
!= _ScannerT::_S_token_bracket_end)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Unexpected end of bracket expression.");
__matcher._M_add_char(_M_value[0]);
}
}
@@ -508,7 +532,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_ctype.is(_CtypeT::upper,
_M_value[0]));
else
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(regex_constants::error_brack,
+ "Unexpected character in bracket expression.");
return true;
}
diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index 778edd5..0dd1fdf 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -155,6 +155,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_constants::error_type
code() const
{ return _M_code; }
+
+ private:
+ regex_error(regex_constants::error_type __ecode, const string& __what)
+ : std::runtime_error(__what), _M_code(__ecode) { }
+
+ friend void __throw_regex_error(regex_constants::error_type, const string&);
};
//@} // group regex
@@ -162,5 +168,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
__throw_regex_error(regex_constants::error_type __ecode);
+ inline void
+ __throw_regex_error(regex_constants::error_type __ecode, const string& __what)
+ { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index b47103e..7795dd2 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -220,6 +220,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_get_value() const
{ return _M_value; }
+ string
+ _M_get_location_string() const
+ {
+ auto __left = std::max(_M_begin, _M_current - 2);
+ auto __right = std::min(_M_end, _M_current + 3);
+ static constexpr char __here[] = ">>><<<";
+ string __s;
+ __s.reserve(__right - __left + ::strlen(__here));
+ while (__left < _M_current)
+ __s += _M_ctype.narrow(*__left++, '?');
+ __s += __here;
+ while (__left < __right)
+ __s += _M_ctype.narrow(*__left++, '?');
+ return __s;
+ }
+
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_print(std::ostream&);
@@ -247,6 +263,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_eat_class(char);
+ _IterT _M_begin;
_IterT _M_current;
_IterT _M_end;
_CtypeT& _M_ctype;
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 1555669..d6d667a 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -58,7 +58,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typename _Scanner::_IterT __end,
_FlagT __flags, std::locale __loc)
: _ScannerBase(__flags),
- _M_current(__begin), _M_end(__end),
+ _M_begin(__begin), _M_current(__begin), _M_end(__end),
_M_ctype(std::use_facet<_CtypeT>(__loc)),
_M_eat_escape(_M_is_ecma()
? &_Scanner::_M_eat_escape_ecma
@@ -108,7 +108,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__c == '\\')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
if (!_M_is_basic()
|| (*_M_current != '('
@@ -125,7 +127,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_is_ecma() && *_M_current == '?')
{
if (++_M_current == _M_end)
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(
+ regex_constants::error_paren,
+ "Unexpected end of regex when in an open parenthesis.");
if (*_M_current == ':')
{
@@ -145,7 +149,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_value.assign(1, 'n');
}
else
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(
+ regex_constants::error_paren,
+ "Invalid special open parenthesis.");
}
else if (_M_flags & regex_constants::nosubs)
_M_token = _S_token_subexpr_no_group_begin;
@@ -204,14 +210,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_in_bracket()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(
+ regex_constants::error_brack,
+ "Unexpected end of regex when in bracket expression.");
auto __c = *_M_current++;
if (__c == '[')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(regex_constants::error_brack,
+ "Unexpected character class open bracket.");
if (*_M_current == '.')
{
@@ -261,7 +270,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_in_brace()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brace);
+ __throw_regex_error(
+ regex_constants::error_brace,
+ "Unexpected end of regex when in brace expression.");
auto __c = *_M_current++;
@@ -285,7 +296,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current;
}
else
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected character in brace expression.");
}
else if (__c == '}')
{
@@ -293,7 +305,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_token = _S_token_interval_end;
}
else
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected character in brace expression.");
}
template<typename _CharT>
@@ -302,7 +315,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
@@ -336,7 +350,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (__c == 'c')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when reading control code.");
_M_token = _S_token_ord_char;
_M_value.assign(1, *_M_current++);
}
@@ -347,7 +363,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_current == _M_end
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when ascii character.");
_M_value += *_M_current++;
}
_M_token = _S_token_hex_num;
@@ -376,7 +394,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_eat_escape_posix()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
auto __c = *_M_current;
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
@@ -401,7 +420,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
#ifdef __STRICT_ANSI__
// POSIX says it is undefined to escape ordinary characters
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected escape character.");
#else
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@@ -441,7 +461,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return;
}
else
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected escape character.");
}
// Eats a character class or throws an exception.
@@ -460,9 +481,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|| *_M_current++ != ']') // skip ']'
{
if (__ch == ':')
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_ctype,
+ "Unexpected end of character class.");
else
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Unexpected end of character class.");
}
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-08-28 5:33 [Patch, libstdc++] Add specific error message into exceptions Tim Shen
@ 2015-08-28 16:24 ` Jonathan Wakely
2015-08-28 19:01 ` Tim Shen
0 siblings, 1 reply; 9+ messages in thread
From: Jonathan Wakely @ 2015-08-28 16:24 UTC (permalink / raw)
To: Tim Shen; +Cc: libstdc++, gcc-patches
On 27/08/15 22:18 -0700, Tim Shen wrote:
>Bootstrapped and tested.
>
>Thanks!
>
>
>--
>Regards,
>Tim Shen
>commit 53c1caff442e97a18652ec8b1d984341168fd98d
>Author: Tim Shen <timshen@google.com>
>Date: Thu Aug 27 21:42:40 2015 -0700
>
> PR libstdc++/67361
> * include/bits/regex_error.h: Add __throw_regex_error that
> supports string.
> * include/bits/regex_automaton.h: Add more specific exception
> messages.
> * include/bits/regex_automaton.tcc: Likewise.
> * include/bits/regex_compiler.h: Likewise.
> * include/bits/regex_compiler.tcc: Likewise.
> * include/bits/regex_scanner.h: Likewise.
> * include/bits/regex_scanner.tcc: Likewise.
Nice, thanks for doing this!
>@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
> // time, "\\2" is valid, but "\\1" and "\\3" are not.
> if (__index >= _M_subexpr_count)
>- __throw_regex_error(regex_constants::error_backref);
>+ __throw_regex_error(
>+ regex_constants::error_backref,
>+ "Back-reference index exceeds current sub-expression count.");
> for (auto __it : this->_M_paren_stack)
> if (__index == __it)
>- __throw_regex_error(regex_constants::error_backref);
>+ __throw_regex_error(
>+ regex_constants::error_backref,
>+ "Back-reference refered to an opened sub-expression.");
Should be "referred".
And one of the other strings in another throw says "befoer".
>diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
>index 0cb0c04..12ffabe 100644
>--- a/libstdc++-v3/include/bits/regex_compiler.h
>+++ b/libstdc++-v3/include/bits/regex_compiler.h
>@@ -397,7 +397,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> auto __st = _M_traits.lookup_collatename(__s.data(),
> __s.data() + __s.size());
> if (__st.empty())
>- __throw_regex_error(regex_constants::error_collate);
>+ __throw_regex_error(regex_constants::error_collate,
>+ string("Invalid collate element: "));
> _M_char_set.push_back(_M_translator._M_translate(__st[0]));
> #ifdef _GLIBCXX_DEBUG
> _M_is_ready = false;
There seems to be no need to construct a std::string here, just pass a
const char* (see below).
Also, this string ends in a colon, whereas most end in a period. Any
reason for the difference?
>@@ -411,7 +412,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> auto __st = _M_traits.lookup_collatename(__s.data(),
> __s.data() + __s.size());
> if (__st.empty())
>- __throw_regex_error(regex_constants::error_collate);
>+ __throw_regex_error(regex_constants::error_collate,
>+ string("Invalid equivalence class."));
> __st = _M_traits.transform_primary(__st.data(),
> __st.data() + __st.size());
> _M_equiv_set.push_back(__st);
Just pass const char*.
>@@ -428,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> __s.data() + __s.size(),
> __icase);
> if (__mask == 0)
>- __throw_regex_error(regex_constants::error_ctype);
>+ __throw_regex_error(regex_constants::error_collate,
>+ string("Invalid character class."));
> if (!__neg)
> _M_class_set |= __mask;
> else
Ditto.
>@@ -442,7 +445,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _M_make_range(_CharT __l, _CharT __r)
> {
> if (__l > __r)
>- __throw_regex_error(regex_constants::error_range);
>+ __throw_regex_error(regex_constants::error_range,
>+ string("Invalid range in bracket expression."));
> _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
> _M_translator._M_transform(__r)));
> #ifdef _GLIBCXX_DEBUG
Ditto.
>diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
>index 9a62311..019ca42 100644
>--- a/libstdc++-v3/include/bits/regex_compiler.tcc
>+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
>@@ -77,16 +77,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _M_traits(_M_nfa->_M_traits),
> _M_ctype(std::use_facet<_CtypeT>(__loc))
> {
>- _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
>- __r._M_append(_M_nfa->_M_insert_subexpr_begin());
>- this->_M_disjunction();
>- if (!_M_match_token(_ScannerT::_S_token_eof))
>- __throw_regex_error(regex_constants::error_paren);
>- __r._M_append(_M_pop());
>- _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
>- __r._M_append(_M_nfa->_M_insert_subexpr_end());
>- __r._M_append(_M_nfa->_M_insert_accept());
>- _M_nfa->_M_eliminate_dummy();
>+ __try
>+ {
>+ _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
>+ __r._M_append(_M_nfa->_M_insert_subexpr_begin());
>+ this->_M_disjunction();
>+ if (!_M_match_token(_ScannerT::_S_token_eof))
>+ __throw_regex_error(regex_constants::error_paren,
>+ "Unexpected end of regex.");
>+ __r._M_append(_M_pop());
>+ _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
>+ __r._M_append(_M_nfa->_M_insert_subexpr_end());
>+ __r._M_append(_M_nfa->_M_insert_accept());
>+ _M_nfa->_M_eliminate_dummy();
>+ }
>+ __catch(std::regex_error __e)
>+ {
>+ __throw_regex_error(__e.code(),
>+ string(__e.what()) + " Location: \""
>+ + _M_scanner._M_get_location_string() + "\"");
>+ }
> }
>
> template<typename _TraitsT>
I wonder if we want to make this more efficient by adding a private
member to regex_error that would allow information to be appended to
the string, rather then creating a new regex_error with a new string.
>diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
>index 778edd5..0dd1fdf 100644
>--- a/libstdc++-v3/include/bits/regex_error.h
>+++ b/libstdc++-v3/include/bits/regex_error.h
>@@ -155,6 +155,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> regex_constants::error_type
> code() const
> { return _M_code; }
>+
>+ private:
>+ regex_error(regex_constants::error_type __ecode, const string& __what)
>+ : std::runtime_error(__what), _M_code(__ecode) { }
>+
>+ friend void __throw_regex_error(regex_constants::error_type, const string&);
> };
>
> //@} // group regex
>@@ -162,5 +168,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> void
> __throw_regex_error(regex_constants::error_type __ecode);
>
>+ inline void
>+ __throw_regex_error(regex_constants::error_type __ecode, const string& __what)
>+ { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
>+
> _GLIBCXX_END_NAMESPACE_VERSION
> } // namespace std
I suggest adding another overload that takes a const char* rather than
std::string. The reason is that when using the new ABI this function
will take a std::__cxx11::string, so calling it will allocate memory
for the string data, then that string is passed to the regex_error
constructor which has to convert it internally to an old std::string,
which has to allocate a second time.
If there is an overload taking a const char* then that can be passed
to the regex_error constructor and only one allocation will be done.
(I have considered making it possible for exceptions to move the
memory from a new string into an their old string member, but that
isn't done currently.)
>diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
>index b47103e..7795dd2 100644
>--- a/libstdc++-v3/include/bits/regex_scanner.h
>+++ b/libstdc++-v3/include/bits/regex_scanner.h
>@@ -220,6 +220,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _M_get_value() const
> { return _M_value; }
>
>+ string
>+ _M_get_location_string() const
>+ {
>+ auto __left = std::max(_M_begin, _M_current - 2);
>+ auto __right = std::min(_M_end, _M_current + 3);
>+ static constexpr char __here[] = ">>><<<";
I don't think there's any advantage to using a static here, it doesn't
need to be a global symbol, and with optimisation enabled we get the
same code from just const char __here[] = ">>><<<";
>@@ -247,6 +263,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> void
> _M_eat_class(char);
>
>+ _IterT _M_begin;
> _IterT _M_current;
> _IterT _M_end;
> _CtypeT& _M_ctype;
This looks like an ABI change, as the size of the type changes.
If I understand correctly this is only needed for the location info,
we could still have nice human readable text in the exceptions without
this, right?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-08-28 16:24 ` Jonathan Wakely
@ 2015-08-28 19:01 ` Tim Shen
2015-08-29 4:46 ` Tim Shen
2015-09-07 11:23 ` Jonathan Wakely
0 siblings, 2 replies; 9+ messages in thread
From: Tim Shen @ 2015-08-28 19:01 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: libstdc++, gcc-patches
On Fri, Aug 28, 2015 at 8:59 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> There seems to be no need to construct a std::string here, just pass a
> const char* (see below).
To be honest, I wasn't considering performance for a bit, since
exceptions are already considered slow by me :P. But yes, we can do
less allocations.
> I wonder if we want to make this more efficient by adding a private
> member to regex_error that would allow information to be appended to
> the string, rather then creating a new regex_error with a new string.
I can add a helper function to _Scanner to construct the exception
object for only once. For functions that can't access this helper, use
return value for error handling.
> I suggest adding another overload that takes a const char* rather than
> std::string. The reason is that when using the new ABI this function
> will take a std::__cxx11::string, so calling it will allocate memory
> for the string data, then that string is passed to the regex_error
> constructor which has to convert it internally to an old std::string,
> which has to allocate a second time.
First, to make it clear: due to _M_get_location_string(), we need
dynamic allocation.
So is it good to have an owned raw pointer stored in runtime_error,
pointing to a heap allocated char chunk, which will be deallocated in
regex_error's dtor?
--
Regards,
Tim Shen
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-08-28 19:01 ` Tim Shen
@ 2015-08-29 4:46 ` Tim Shen
2015-09-07 11:06 ` Jonathan Wakely
2015-09-07 11:23 ` Jonathan Wakely
1 sibling, 1 reply; 9+ messages in thread
From: Tim Shen @ 2015-08-29 4:46 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: libstdc++, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 401 bytes --]
On Fri, Aug 28, 2015 at 11:23 AM, Tim Shen <timshen@google.com> wrote:
> So is it good to have an owned raw pointer stored in runtime_error,
> pointing to a heap allocated char chunk, which will be deallocated in
> regex_error's dtor?
I just put a string member into regex_error, completely ignoring the
storage in std::runtime_error.
Also used rethrow to keep stack frames.
--
Regards,
Tim Shen
[-- Attachment #2: b.diff --]
[-- Type: text/plain, Size: 18803 bytes --]
commit 36e7845b251eb1b2eeea76e22264acad1cab6355
Author: Tim Shen <timshen@google.com>
Date: Thu Aug 27 21:42:40 2015 -0700
PR libstdc++/67361
* include/bits/regex_error.h: Add __throw_regex_error that
supports string.
* include/bits/regex_automaton.h: Add more specific exception
messages.
* include/bits/regex_automaton.tcc: Likewise.
* include/bits/regex_compiler.h: Likewise.
* include/bits/regex_compiler.tcc: Likewise.
* include/bits/regex_scanner.h: Likewise.
* include/bits/regex_scanner.tcc: Likewise.
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index b6ab307..1f672ee 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -327,7 +327,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
this->push_back(std::move(__s));
if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
- __throw_regex_error(regex_constants::error_space);
+ __throw_regex_error(
+ regex_constants::error_space,
+ "Number of NFA states exceeds limit. Please use shorter regex "
+ "string, or use smaller brace expression, or make "
+ "_GLIBCXX_REGEX_STATE_LIMIT larger.");
return this->size()-1;
}
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index cecc407..4eeeac5 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -149,7 +149,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_NFA<_TraitsT>::_M_insert_backref(size_t __index)
{
if (this->_M_flags & regex_constants::__polynomial)
- __throw_regex_error(regex_constants::error_complexity);
+ __throw_regex_error(regex_constants::error_complexity,
+ "Unexpected back-reference in polynomial mode.");
// To figure out whether a backref is valid, a stack is used to store
// unfinished sub-expressions. For example, when parsing
// "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
// time, "\\2" is valid, but "\\1" and "\\3" are not.
if (__index >= _M_subexpr_count)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference index exceeds current sub-expression count.");
for (auto __it : this->_M_paren_stack)
if (__index == __it)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference referred to an opened sub-expression.");
this->_M_has_backref = true;
_StateT __tmp(_S_opcode_backref);
__tmp._M_backref_index = __index;
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 0cb0c04..da44d42 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -397,7 +397,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Invalid collate element.");
_M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
@@ -411,7 +412,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Invalid equivalence class.");
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
_M_equiv_set.push_back(__st);
@@ -428,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size(),
__icase);
if (__mask == 0)
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_collate,
+ "Invalid character class.");
if (!__neg)
_M_class_set |= __mask;
else
@@ -442,7 +445,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_make_range(_CharT __l, _CharT __r)
{
if (__l > __r)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(regex_constants::error_range,
+ "Invalid range in bracket expression.");
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 9a62311..e633681 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -77,16 +77,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_traits(_M_nfa->_M_traits),
_M_ctype(std::use_facet<_CtypeT>(__loc))
{
- _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
- __r._M_append(_M_nfa->_M_insert_subexpr_begin());
- this->_M_disjunction();
- if (!_M_match_token(_ScannerT::_S_token_eof))
- __throw_regex_error(regex_constants::error_paren);
- __r._M_append(_M_pop());
- _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
- __r._M_append(_M_nfa->_M_insert_subexpr_end());
- __r._M_append(_M_nfa->_M_insert_accept());
- _M_nfa->_M_eliminate_dummy();
+ __try
+ {
+ _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
+ __r._M_append(_M_nfa->_M_insert_subexpr_begin());
+ this->_M_disjunction();
+ if (!_M_match_token(_ScannerT::_S_token_eof))
+ __throw_regex_error(regex_constants::error_paren,
+ "Unexpected end of regex.");
+ __r._M_append(_M_pop());
+ _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
+ __r._M_append(_M_nfa->_M_insert_subexpr_end());
+ __r._M_append(_M_nfa->_M_insert_accept());
+ _M_nfa->_M_eliminate_dummy();
+ }
+ __catch(std::regex_error& __e)
+ {
+ __regex_error_get_string(__e).append(" Location: ");
+ _M_scanner._M_append_location_string(
+ __b, __regex_error_get_string(__e));
+ __throw_exception_again;
+ }
}
template<typename _TraitsT>
@@ -162,7 +173,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __neg = _M_value[0] == 'n';
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
auto __tmp = _M_pop();
__tmp._M_append(_M_nfa->_M_insert_accept());
_M_stack.push(
@@ -184,7 +196,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __init = [this, &__neg]()
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __throw_regex_error(regex_constants::error_badrepeat,
+ "Nothing to repeat before a quantifier.");
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
};
if (_M_match_token(_ScannerT::_S_token_closure0))
@@ -220,9 +233,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __throw_regex_error(regex_constants::error_badrepeat,
+ "Nothing to repeat before a quantifier.");
if (!_M_match_token(_ScannerT::_S_token_dup_count))
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected token in brace expression.");
_StateSeqT __r(_M_pop());
_StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
long __min_rep = _M_cur_int_value(10);
@@ -238,7 +253,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else
__n = 0;
if (!_M_match_token(_ScannerT::_S_token_interval_end))
- __throw_regex_error(regex_constants::error_brace);
+ __throw_regex_error(regex_constants::error_brace,
+ "Unexpected end of brace expression.");
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
@@ -257,7 +273,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else
{
if (__n < 0)
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Invalid range in brace expression.");
auto __end = _M_nfa->_M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
@@ -324,7 +341,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
__r._M_append(_M_pop());
_M_stack.push(__r);
}
@@ -333,7 +351,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
__r._M_append(_M_pop());
__r._M_append(_M_nfa->_M_insert_subexpr_end());
_M_stack.push(__r);
@@ -474,7 +493,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Unexpected dash in bracket expression. For POSIX syntax, "
+ "a dash is not treated literally only when it is at "
+ "beginning or end.");
}
__last_char.first = true;
__last_char.second = _M_value[0];
@@ -492,7 +515,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_scanner._M_get_token()
!= _ScannerT::_S_token_bracket_end)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Unexpected end of bracket expression.");
__matcher._M_add_char(_M_value[0]);
}
}
@@ -508,7 +533,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_ctype.is(_CtypeT::upper,
_M_value[0]));
else
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(regex_constants::error_brack,
+ "Unexpected character in bracket expression.");
return true;
}
diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index 778edd5..1c7b4e6 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -135,6 +135,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
class regex_error : public std::runtime_error
{
regex_constants::error_type _M_code;
+ string _M_what;
public:
/**
@@ -145,6 +146,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
explicit
regex_error(regex_constants::error_type __ecode);
+ virtual const char* what() noexcept
+ { return _M_what.data(); }
+
virtual ~regex_error() throw();
/**
@@ -155,6 +159,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_constants::error_type
code() const
{ return _M_code; }
+
+ private:
+ regex_error(regex_constants::error_type __ecode, const char* __what)
+ : std::runtime_error(__what), _M_code(__ecode)
+ {
+ auto __len = ::strlen(__what);
+ // Extra space for location information that will be concatenated later.
+ _M_what.reserve(__len + 32);
+ _M_what.assign(__what, __len);
+ }
+
+ friend void __throw_regex_error(regex_constants::error_type, const char*);
+ friend string& __regex_error_get_string(regex_error&);
};
//@} // group regex
@@ -162,5 +179,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
__throw_regex_error(regex_constants::error_type __ecode);
+ inline void
+ __throw_regex_error(regex_constants::error_type __ecode, const char* __what)
+ { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+
+ inline string&
+ __regex_error_get_string(regex_error& __e)
+ { return __e._M_what; }
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index b47103e..1b380ef 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -220,6 +220,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_get_value() const
{ return _M_value; }
+ void
+ _M_append_location_string(_IterT __begin, string& __s) const
+ {
+ auto __left = std::max(__begin, _M_current - 2);
+ auto __right = std::min(_M_end, _M_current + 3);
+ constexpr char __here[] = ">>><<<";
+ auto __here_len = ::strlen(__here);
+ auto __i = __s.size();
+ __s.resize(__s.size() + __right - __left + __here_len);
+ _M_ctype.narrow(__left, _M_current, '?', &__s[__i]);
+ __i += _M_current - __left;
+ __s.replace(__i, __here_len, __here);
+ __i += __here_len;
+ _M_ctype.narrow(_M_current, __right, '?', &__s[__i]);
+ }
+
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_print(std::ostream&);
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 1555669..e2c18f3 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -108,7 +108,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__c == '\\')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
if (!_M_is_basic()
|| (*_M_current != '('
@@ -125,7 +127,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_is_ecma() && *_M_current == '?')
{
if (++_M_current == _M_end)
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(
+ regex_constants::error_paren,
+ "Unexpected end of regex when in an open parenthesis.");
if (*_M_current == ':')
{
@@ -145,7 +149,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_value.assign(1, 'n');
}
else
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(
+ regex_constants::error_paren,
+ "Invalid special open parenthesis.");
}
else if (_M_flags & regex_constants::nosubs)
_M_token = _S_token_subexpr_no_group_begin;
@@ -204,14 +210,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_in_bracket()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(
+ regex_constants::error_brack,
+ "Unexpected end of regex when in bracket expression.");
auto __c = *_M_current++;
if (__c == '[')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(regex_constants::error_brack,
+ "Unexpected character class open bracket.");
if (*_M_current == '.')
{
@@ -261,7 +270,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_in_brace()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brace);
+ __throw_regex_error(
+ regex_constants::error_brace,
+ "Unexpected end of regex when in brace expression.");
auto __c = *_M_current++;
@@ -285,7 +296,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current;
}
else
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected character in brace expression.");
}
else if (__c == '}')
{
@@ -293,7 +305,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_token = _S_token_interval_end;
}
else
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected character in brace expression.");
}
template<typename _CharT>
@@ -302,7 +315,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
@@ -336,7 +350,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (__c == 'c')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when reading control code.");
_M_token = _S_token_ord_char;
_M_value.assign(1, *_M_current++);
}
@@ -347,7 +363,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_current == _M_end
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when ascii character.");
_M_value += *_M_current++;
}
_M_token = _S_token_hex_num;
@@ -376,7 +394,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_eat_escape_posix()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
auto __c = *_M_current;
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
@@ -401,7 +420,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
#ifdef __STRICT_ANSI__
// POSIX says it is undefined to escape ordinary characters
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected escape character.");
#else
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@@ -441,7 +461,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return;
}
else
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected escape character.");
}
// Eats a character class or throws an exception.
@@ -460,9 +481,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|| *_M_current++ != ']') // skip ']'
{
if (__ch == ':')
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_ctype,
+ "Unexpected end of character class.");
else
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Unexpected end of character class.");
}
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-08-29 4:46 ` Tim Shen
@ 2015-09-07 11:06 ` Jonathan Wakely
0 siblings, 0 replies; 9+ messages in thread
From: Jonathan Wakely @ 2015-09-07 11:06 UTC (permalink / raw)
To: Tim Shen; +Cc: libstdc++, gcc-patches
On 28/08/15 20:44 -0700, Tim Shen wrote:
>On Fri, Aug 28, 2015 at 11:23 AM, Tim Shen <timshen@google.com> wrote:
>> So is it good to have an owned raw pointer stored in runtime_error,
>> pointing to a heap allocated char chunk, which will be deallocated in
>> regex_error's dtor?
>
>I just put a string member into regex_error, completely ignoring the
>storage in std::runtime_error.
That's still an ABI change, so not OK.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-08-28 19:01 ` Tim Shen
2015-08-29 4:46 ` Tim Shen
@ 2015-09-07 11:23 ` Jonathan Wakely
2015-09-12 8:25 ` Tim Shen
[not found] ` <CAG4ZjN=O56GR0+E-SxQOCb6_W_HTAa+ocsjBYHH0XeQm1Fb-TQ@mail.gmail.com>
1 sibling, 2 replies; 9+ messages in thread
From: Jonathan Wakely @ 2015-09-07 11:23 UTC (permalink / raw)
To: Tim Shen; +Cc: libstdc++, gcc-patches
On 28/08/15 11:23 -0700, Tim Shen wrote:
>On Fri, Aug 28, 2015 at 8:59 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
>> There seems to be no need to construct a std::string here, just pass a
>> const char* (see below).
>
>To be honest, I wasn't considering performance for a bit, since
>exceptions are already considered slow by me :P. But yes, we can do
>less allocations.
>
>> I wonder if we want to make this more efficient by adding a private
>> member to regex_error that would allow information to be appended to
>> the string, rather then creating a new regex_error with a new string.
In case it wasn't clear, I was suggesting to add a private member
*function* not data member.
>I can add a helper function to _Scanner to construct the exception
>object for only once. For functions that can't access this helper, use
>return value for error handling.
>
>> I suggest adding another overload that takes a const char* rather than
>> std::string. The reason is that when using the new ABI this function
>> will take a std::__cxx11::string, so calling it will allocate memory
>> for the string data, then that string is passed to the regex_error
>> constructor which has to convert it internally to an old std::string,
>> which has to allocate a second time.
>
>First, to make it clear: due to _M_get_location_string(), we need
>dynamic allocation.
>
>So is it good to have an owned raw pointer stored in runtime_error,
>pointing to a heap allocated char chunk, which will be deallocated in
>regex_error's dtor?
No, adding that pointer is an ABI change.
If you can't do it without an ABI change then you will have to lose
the _M_get_location_string() functionality. It seems non-essential
anyway.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-09-07 11:23 ` Jonathan Wakely
@ 2015-09-12 8:25 ` Tim Shen
[not found] ` <CAG4ZjN=O56GR0+E-SxQOCb6_W_HTAa+ocsjBYHH0XeQm1Fb-TQ@mail.gmail.com>
1 sibling, 0 replies; 9+ messages in thread
From: Tim Shen @ 2015-09-12 8:25 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: libstdc++, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1981 bytes --]
On Mon, Sep 7, 2015 at 4:06 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 28/08/15 11:23 -0700, Tim Shen wrote:
>>
>> On Fri, Aug 28, 2015 at 8:59 AM, Jonathan Wakely <jwakely@redhat.com>
>> wrote:
>>>
>>> There seems to be no need to construct a std::string here, just pass a
>>> const char* (see below).
>>
>>
>> To be honest, I wasn't considering performance for a bit, since
>> exceptions are already considered slow by me :P. But yes, we can do
>> less allocations.
>>
>>> I wonder if we want to make this more efficient by adding a private
>>> member to regex_error that would allow information to be appended to
>>> the string, rather then creating a new regex_error with a new string.
>
>
> In case it wasn't clear, I was suggesting to add a private member
> *function* not data member.
>
>> I can add a helper function to _Scanner to construct the exception
>> object for only once. For functions that can't access this helper, use
>> return value for error handling.
>>
>>> I suggest adding another overload that takes a const char* rather than
>>> std::string. The reason is that when using the new ABI this function
>>> will take a std::__cxx11::string, so calling it will allocate memory
>>> for the string data, then that string is passed to the regex_error
>>> constructor which has to convert it internally to an old std::string,
>>> which has to allocate a second time.
>>
>>
>> First, to make it clear: due to _M_get_location_string(), we need
>> dynamic allocation.
>>
>> So is it good to have an owned raw pointer stored in runtime_error,
>> pointing to a heap allocated char chunk, which will be deallocated in
>> regex_error's dtor?
>
>
> No, adding that pointer is an ABI change.
>
> If you can't do it without an ABI change then you will have to lose
> the _M_get_location_string() functionality. It seems non-essential
> anyway.
Ok then, let's not append dynamic location information, but use a
string literal pointer only.
--
Regards,
Tim Shen
[-- Attachment #2: c.diff --]
[-- Type: text/plain, Size: 15702 bytes --]
commit fc3343a2c719049620447f6dc20191e2af4895f6
Author: Tim Shen <timshen@google.com>
Date: Thu Aug 27 21:42:40 2015 -0700
PR libstdc++/67361
* include/bits/regex_error.h: Add __throw_regex_error that
supports string.
* include/bits/regex_automaton.h: Add more specific exception
messages.
* include/bits/regex_automaton.tcc: Likewise.
* include/bits/regex_compiler.h: Likewise.
* include/bits/regex_compiler.tcc: Likewise.
* include/bits/regex_scanner.h: Likewise.
* include/bits/regex_scanner.tcc: Likewise.
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index b6ab307..1f672ee 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -327,7 +327,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
this->push_back(std::move(__s));
if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
- __throw_regex_error(regex_constants::error_space);
+ __throw_regex_error(
+ regex_constants::error_space,
+ "Number of NFA states exceeds limit. Please use shorter regex "
+ "string, or use smaller brace expression, or make "
+ "_GLIBCXX_REGEX_STATE_LIMIT larger.");
return this->size()-1;
}
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index f6f63a1..9bb1164 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -149,7 +149,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_NFA<_TraitsT>::_M_insert_backref(size_t __index)
{
if (this->_M_flags & regex_constants::__polynomial)
- __throw_regex_error(regex_constants::error_complexity);
+ __throw_regex_error(regex_constants::error_complexity,
+ "Unexpected back-reference in polynomial mode.");
// To figure out whether a backref is valid, a stack is used to store
// unfinished sub-expressions. For example, when parsing
// "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
// time, "\\2" is valid, but "\\1" and "\\3" are not.
if (__index >= _M_subexpr_count)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference index exceeds current sub-expression count.");
for (auto __it : this->_M_paren_stack)
if (__index == __it)
- __throw_regex_error(regex_constants::error_backref);
+ __throw_regex_error(
+ regex_constants::error_backref,
+ "Back-reference referred to an opened sub-expression.");
this->_M_has_backref = true;
_StateT __tmp(_S_opcode_backref);
__tmp._M_backref_index = __index;
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 07a9ed3..1f6348a 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -392,7 +392,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Invalid collate element.");
_M_char_set.push_back(_M_translator._M_translate(__st[0]));
_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
return __st;
@@ -404,7 +405,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size());
if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Invalid equivalence class.");
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
_M_equiv_set.push_back(__st);
@@ -419,7 +421,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size(),
__icase);
if (__mask == 0)
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_collate,
+ "Invalid character class.");
if (!__neg)
_M_class_set |= __mask;
else
@@ -431,7 +434,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_make_range(_CharT __l, _CharT __r)
{
if (__l > __r)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(regex_constants::error_range,
+ "Invalid range in bracket expression.");
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r)));
_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 336a2e8..f7d52fc 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -162,7 +162,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __neg = _M_value[0] == 'n';
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
auto __tmp = _M_pop();
__tmp._M_append(_M_nfa->_M_insert_accept());
_M_stack.push(
@@ -184,7 +185,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __init = [this, &__neg]()
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __throw_regex_error(regex_constants::error_badrepeat,
+ "Nothing to repeat before a quantifier.");
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
};
if (_M_match_token(_ScannerT::_S_token_closure0))
@@ -220,9 +222,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __throw_regex_error(regex_constants::error_badrepeat,
+ "Nothing to repeat before a quantifier.");
if (!_M_match_token(_ScannerT::_S_token_dup_count))
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected token in brace expression.");
_StateSeqT __r(_M_pop());
_StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
long __min_rep = _M_cur_int_value(10);
@@ -238,7 +242,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else
__n = 0;
if (!_M_match_token(_ScannerT::_S_token_interval_end))
- __throw_regex_error(regex_constants::error_brace);
+ __throw_regex_error(regex_constants::error_brace,
+ "Unexpected end of brace expression.");
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
@@ -257,7 +262,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else
{
if (__n < 0)
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Invalid range in brace expression.");
auto __end = _M_nfa->_M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
@@ -324,7 +330,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
__r._M_append(_M_pop());
_M_stack.push(__r);
}
@@ -333,7 +340,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(regex_constants::error_paren,
+ "Parenthesis is not closed.");
__r._M_append(_M_pop());
__r._M_append(_M_nfa->_M_insert_subexpr_end());
_M_stack.push(__r);
@@ -474,7 +482,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Unexpected dash in bracket expression. For POSIX syntax, "
+ "a dash is not treated literally only when it is at "
+ "beginning or end.");
}
__last_char.first = true;
__last_char.second = _M_value[0];
@@ -492,7 +504,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_scanner._M_get_token()
!= _ScannerT::_S_token_bracket_end)
- __throw_regex_error(regex_constants::error_range);
+ __throw_regex_error(
+ regex_constants::error_range,
+ "Unexpected end of bracket expression.");
__matcher._M_add_char(_M_value[0]);
}
}
@@ -508,7 +522,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_ctype.is(_CtypeT::upper,
_M_value[0]));
else
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(regex_constants::error_brack,
+ "Unexpected character in bracket expression.");
return true;
}
diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index 778edd5..be19fc1 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -155,6 +155,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_constants::error_type
code() const
{ return _M_code; }
+
+ private:
+ regex_error(regex_constants::error_type __ecode, const char* __what)
+ : std::runtime_error(__what), _M_code(__ecode)
+ { }
+
+ friend void __throw_regex_error(regex_constants::error_type, const char*);
};
//@} // group regex
@@ -162,5 +169,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
__throw_regex_error(regex_constants::error_type __ecode);
+ inline void
+ __throw_regex_error(regex_constants::error_type __ecode, const char* __what)
+ { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index c158c65..7d24e06 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -108,7 +108,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__c == '\\')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
if (!_M_is_basic()
|| (*_M_current != '('
@@ -125,7 +127,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_is_ecma() && *_M_current == '?')
{
if (++_M_current == _M_end)
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(
+ regex_constants::error_paren,
+ "Unexpected end of regex when in an open parenthesis.");
if (*_M_current == ':')
{
@@ -145,7 +149,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_value.assign(1, 'n');
}
else
- __throw_regex_error(regex_constants::error_paren);
+ __throw_regex_error(
+ regex_constants::error_paren,
+ "Invalid special open parenthesis.");
}
else if (_M_flags & regex_constants::nosubs)
_M_token = _S_token_subexpr_no_group_begin;
@@ -204,14 +210,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_in_bracket()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(
+ regex_constants::error_brack,
+ "Unexpected end of regex when in bracket expression.");
auto __c = *_M_current++;
if (__c == '[')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brack);
+ __throw_regex_error(regex_constants::error_brack,
+ "Unexpected character class open bracket.");
if (*_M_current == '.')
{
@@ -261,7 +270,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_in_brace()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_brace);
+ __throw_regex_error(
+ regex_constants::error_brace,
+ "Unexpected end of regex when in brace expression.");
auto __c = *_M_current++;
@@ -285,7 +296,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current;
}
else
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected character in brace expression.");
}
else if (__c == '}')
{
@@ -293,7 +305,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_token = _S_token_interval_end;
}
else
- __throw_regex_error(regex_constants::error_badbrace);
+ __throw_regex_error(regex_constants::error_badbrace,
+ "Unexpected character in brace expression.");
}
template<typename _CharT>
@@ -302,7 +315,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
@@ -336,7 +350,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (__c == 'c')
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when reading control code.");
_M_token = _S_token_ord_char;
_M_value.assign(1, *_M_current++);
}
@@ -347,7 +363,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_current == _M_end
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(
+ regex_constants::error_escape,
+ "Unexpected end of regex when ascii character.");
_M_value += *_M_current++;
}
_M_token = _S_token_hex_num;
@@ -376,7 +394,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_eat_escape_posix()
{
if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected end of regex when escaping.");
auto __c = *_M_current;
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
@@ -401,7 +420,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
#ifdef __STRICT_ANSI__
// POSIX says it is undefined to escape ordinary characters
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected escape character.");
#else
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@@ -441,7 +461,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return;
}
else
- __throw_regex_error(regex_constants::error_escape);
+ __throw_regex_error(regex_constants::error_escape,
+ "Unexpected escape character.");
}
// Eats a character class or throws an exception.
@@ -460,9 +481,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|| *_M_current++ != ']') // skip ']'
{
if (__ch == ':')
- __throw_regex_error(regex_constants::error_ctype);
+ __throw_regex_error(regex_constants::error_ctype,
+ "Unexpected end of character class.");
else
- __throw_regex_error(regex_constants::error_collate);
+ __throw_regex_error(regex_constants::error_collate,
+ "Unexpected end of character class.");
}
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
[not found] ` <CAG4ZjN=O56GR0+E-SxQOCb6_W_HTAa+ocsjBYHH0XeQm1Fb-TQ@mail.gmail.com>
@ 2015-09-16 17:39 ` Jonathan Wakely
2015-09-19 23:32 ` Tim Shen
0 siblings, 1 reply; 9+ messages in thread
From: Jonathan Wakely @ 2015-09-16 17:39 UTC (permalink / raw)
To: Tim Shen; +Cc: libstdc++, gcc-patches
On 12/09/15 01:57 +0000, Tim Shen wrote:
>Ok then, let's not appending dynamic location string, but only throw a
>string literal pointer.
This looks great, and a *huge* improvement on the current errors even
without more precise location info.
OK for trunk, thanks very much for doing this.
>commit fc3343a2c719049620447f6dc20191e2af4895f6
>Author: Tim Shen <timshen@google.com>
>Date: Thu Aug 27 21:42:40 2015 -0700
>
> PR libstdc++/67361
> * include/bits/regex_error.h: Add __throw_regex_error that
> supports string.
> * include/bits/regex_automaton.h: Add more specific exception
> messages.
> * include/bits/regex_automaton.tcc: Likewise.
> * include/bits/regex_compiler.h: Likewise.
> * include/bits/regex_compiler.tcc: Likewise.
> * include/bits/regex_scanner.h: Likewise.
> * include/bits/regex_scanner.tcc: Likewise.
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch, libstdc++] Add specific error message into exceptions
2015-09-16 17:39 ` Jonathan Wakely
@ 2015-09-19 23:32 ` Tim Shen
0 siblings, 0 replies; 9+ messages in thread
From: Tim Shen @ 2015-09-19 23:32 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: libstdc++, gcc-patches
On Wed, Sep 16, 2015 at 10:38 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 12/09/15 01:57 +0000, Tim Shen wrote:
>>
>> Ok then, let's not appending dynamic location string, but only throw a
>> string literal pointer.
>
>
> This looks great, and a *huge* improvement on the current errors even
> without more precise location info.
I'm glad to hear this :).
> OK for trunk, thanks very much for doing this.
Tested & Committed as r227936.
--
Regards,
Tim Shen
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-09-19 21:00 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-28 5:33 [Patch, libstdc++] Add specific error message into exceptions Tim Shen
2015-08-28 16:24 ` Jonathan Wakely
2015-08-28 19:01 ` Tim Shen
2015-08-29 4:46 ` Tim Shen
2015-09-07 11:06 ` Jonathan Wakely
2015-09-07 11:23 ` Jonathan Wakely
2015-09-12 8:25 ` Tim Shen
[not found] ` <CAG4ZjN=O56GR0+E-SxQOCb6_W_HTAa+ocsjBYHH0XeQm1Fb-TQ@mail.gmail.com>
2015-09-16 17:39 ` Jonathan Wakely
2015-09-19 23:32 ` Tim Shen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).