public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch] regex bracket expression implementaion
@ 2013-08-01 14:59 Tim Shen
  2013-08-02 13:33 ` Paolo Carlini
  0 siblings, 1 reply; 4+ messages in thread
From: Tim Shen @ 2013-08-01 14:59 UTC (permalink / raw)
  To: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 218 bytes --]

Fully tested under x86_64. (make bootstrap && make -k check).

Next, I'll try to refactor _Grep_matcher using templates instead of
virtual functions, to make implementing back-reference easier.

Thanks!


-- 
Tim Shen

[-- Attachment #2: bracket.patch --]
[-- Type: application/octet-stream, Size: 27339 bytes --]

Index: include/bits/regex.h
===================================================================
--- include/bits/regex.h	(revision 201397)
+++ include/bits/regex.h	(working copy)
@@ -95,15 +95,15 @@
           operator~() const
           { return _RegexMask(~_M_base, ~_M_extended); }
 
-          constexpr _RegexMask&
+          _RegexMask&
           operator&=(_RegexMask __other)
           { return *this = (*this) & __other; }
 
-          constexpr _RegexMask&
+          _RegexMask&
           operator|=(_RegexMask __other)
           { return *this = (*this) | __other; }
 
-          constexpr _RegexMask&
+          _RegexMask&
           operator^=(_RegexMask __other)
           { return *this = (*this) ^ __other; }
 
@@ -228,7 +228,7 @@
               __fctyp.tolower(&*__v.begin(), &*__v.end());
               return this->transform(&*__v.begin(), &*__v.end());
             }
-          __catch (...)
+          __catch (std::bad_cast)
             {
             }
           return string_type();
@@ -519,7 +519,6 @@
         };
 
       std::string __s(__last - __first, '?');
-      string_type a(__first, __last);
       __fctyp.narrow(__first, __last, '?', &*__s.begin());
 
       for (unsigned int __i = 0; *__collatenames[__i]; __i++)
Index: include/bits/regex_compiler.h
===================================================================
--- include/bits/regex_compiler.h	(revision 201397)
+++ include/bits/regex_compiler.h	(working copy)
@@ -44,9 +44,8 @@
   {
     typedef unsigned int _StateT;
 
-    static constexpr _StateT _S_state_at_start    = 1 << 0;
-    static constexpr _StateT _S_state_in_brace    = 1 << 2;
-    static constexpr _StateT _S_state_in_bracket  = 1 << 3;
+    static constexpr _StateT _S_state_in_brace    = 1 << 0;
+    static constexpr _StateT _S_state_in_bracket  = 1 << 1;
 
     virtual ~_Scanner_base() { };
   };
@@ -77,8 +76,8 @@
 	_S_token_anychar,
 	_S_token_backref,
 	_S_token_bracket_begin,
+	_S_token_bracket_inverse_begin,
 	_S_token_bracket_end,
-	_S_token_inverse_class,
 	_S_token_char_class_name,
 	_S_token_closure0,
 	_S_token_closure1,
@@ -97,7 +96,6 @@
 	_S_token_opt,
 	_S_token_or,
 	_S_token_ord_char,
-	_S_token_quoted_char,
 	_S_token_subexpr_begin,
 	_S_token_subexpr_end,
 	_S_token_word_begin,
@@ -108,7 +106,7 @@
       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
 	       std::locale __loc)
       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
-        _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
+        _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
       { _M_advance(); }
 
       void
@@ -219,9 +217,14 @@
 	}
       else if (__c == _M_ctype.widen('['))
 	{
-	  _M_curToken = _S_token_bracket_begin;
-	  _M_state |= (_S_state_in_bracket | _S_state_at_start);
-	  ++_M_current;
+          if (*++_M_current == _M_ctype.widen('^'))
+            {
+              _M_curToken = _S_token_bracket_inverse_begin;
+              ++_M_current;
+            }
+          else
+            _M_curToken = _S_token_bracket_begin;
+	  _M_state |= _S_state_in_bracket;
 	  return;
 	}
       else if (__c == _M_ctype.widen('\\'))
@@ -304,16 +307,9 @@
     _Scanner<_InputIterator>::
     _M_scan_in_bracket()
     {
-      if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
+      if (*_M_current == _M_ctype.widen('['))
 	{
-	  _M_curToken = _S_token_inverse_class;
-	  _M_state &= ~_S_state_at_start;
 	  ++_M_current;
-	  return;
-	}
-      else if (*_M_current == _M_ctype.widen('['))
-	{
-	  ++_M_current;
 	  if (_M_current == _M_end)
 	    {
 	      _M_curToken = _S_token_eof;
@@ -347,21 +343,22 @@
 	}
       else if (*_M_current == _M_ctype.widen(']'))
 	{
-	  if (!(_M_flags & regex_constants::ECMAScript)
-	      || !(_M_state & _S_state_at_start))
-	    {
-	      // special case: only if  _not_ chr first after
-	      // '[' or '[^' and if not ECMAscript
-	      _M_curToken = _S_token_bracket_end;
-	      ++_M_current;
-	      return;
-	    }
+          _M_curToken = _S_token_bracket_end;
+          _M_state &= ~_S_state_in_bracket;
+          ++_M_current;
+          return;
 	}
+      else if (*_M_current == _M_ctype.widen('\\'))
+        {
+	  _M_eat_escape();
+	  return;
+        }
       _M_curToken = _S_token_collelem_single;
       _M_curValue.assign(1, *_M_current);
       ++_M_current;
     }
 
+  // TODO implement it.
   template<typename _InputIterator>
     void
     _Scanner<_InputIterator>::
@@ -463,11 +460,28 @@
 	  _M_curToken = _S_token_backref;
 	  _M_curValue.assign(1, __c);
 	}
+      else if (_M_state & _S_state_in_bracket)
+        {
+          if (__c == _M_ctype.widen('-')
+              || __c == _M_ctype.widen('[')
+              || __c == _M_ctype.widen(']'))
+            {
+              _M_curToken = _S_token_ord_char;
+              _M_curValue.assign(1, __c);
+            }
+          else if ((_M_flags & regex_constants::ECMAScript)
+                   && __c == _M_ctype.widen('b'))
+            {
+              _M_curToken = _S_token_ord_char;
+              _M_curValue.assign(1, _M_ctype.widen(' '));
+            }
+          else
+            __throw_regex_error(regex_constants::error_escape);
+        }
       else
 	__throw_regex_error(regex_constants::error_escape);
     }
 
-
   // Eats a character class or throwns an exception.
   // current point to ':' delimiter on entry, char after ']' on return
   template<typename _InputIterator>
@@ -549,6 +563,9 @@
 	case _S_token_bracket_begin:
 	  ostr << "bracket-begin\n";
 	  break;
+	case _S_token_bracket_inverse_begin:
+          ostr << "bracket-inverse-begin\n";
+          break;
 	case _S_token_bracket_end:
 	  ostr << "bracket-end\n";
 	  break;
@@ -606,9 +623,6 @@
 	case _S_token_ord_char:
 	  ostr << "ordinary character: \"" << _M_value() << "\"\n";
 	  break;
-	case _S_token_quoted_char:
-	  ostr << "quoted char\n";
-	  break;
 	case _S_token_subexpr_begin:
 	  ostr << "subexpr begin\n";
 	  break;
@@ -624,6 +638,8 @@
 	case _S_token_unknown:
 	  ostr << "-- unknown token --\n";
 	  break;
+        default:
+          _GLIBCXX_DEBUG_ASSERT(false);
       }
       return ostr;
     }
@@ -650,7 +666,7 @@
       typedef _Scanner<_InIter>                              _ScannerT;
       typedef typename _ScannerT::_TokenT                    _TokenT;
       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
-      typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
+      typedef _BracketMatcher<_InIter, _TraitsT>             _BMatcherT;
 
       // accepts a specific token or returns false.
       bool
@@ -659,7 +675,7 @@
       void
       _M_disjunction();
 
-      bool
+      void
       _M_alternative();
 
       bool
@@ -668,7 +684,7 @@
       bool
       _M_assertion();
 
-      bool
+      void
       _M_quantifier();
 
       bool
@@ -678,32 +694,29 @@
       _M_bracket_expression();
 
       bool
-      _M_bracket_list(_RMatcherT& __matcher);
+      _M_bracket_list(_BMatcherT& __matcher);
 
       bool
-      _M_follow_list(_RMatcherT& __matcher);
+      _M_follow_list(_BMatcherT& __matcher);
 
-      bool
-      _M_follow_list2(_RMatcherT& __matcher);
+      void
+      _M_expression_term(_BMatcherT& __matcher);
 
       bool
-      _M_expression_term(_RMatcherT& __matcher);
+      _M_range_expression(_BMatcherT& __matcher);
 
       bool
-      _M_range_expression(_RMatcherT& __matcher);
+      _M_start_range(_BMatcherT& __matcher);
 
       bool
-      _M_start_range(_RMatcherT& __matcher);
+      _M_collating_symbol(_BMatcherT& __matcher);
 
       bool
-      _M_collating_symbol(_RMatcherT& __matcher);
+      _M_equivalence_class(_BMatcherT& __matcher);
 
       bool
-      _M_equivalence_class(_RMatcherT& __matcher);
+      _M_character_class(_BMatcherT& __matcher);
 
-      bool
-      _M_character_class(_RMatcherT& __matcher);
-
       int
       _M_cur_int_value(int __radix);
 
@@ -712,6 +725,7 @@
       _StringT       _M_cur_value;
       _Nfa           _M_state_store;
       _StackT        _M_stack;
+      _FlagT         _M_flags;
     };
 
   template<typename _InIter, typename _TraitsT>
@@ -719,7 +733,7 @@
     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
 	      _Compiler<_InIter, _TraitsT>::_FlagT __flags)
     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
-      _M_state_store(__flags)
+      _M_state_store(__flags), _M_flags(__flags)
     {
       typedef _StartTagger<_InIter, _TraitsT> _Start;
       typedef _EndTagger<_InIter, _TraitsT> _End;
@@ -743,8 +757,8 @@
     { 
       if (token == _M_scanner._M_token())
 	{
-	  _M_cur_value = _M_scanner._M_value();
-	  _M_scanner._M_advance();
+          _M_cur_value = _M_scanner._M_value();
+          _M_scanner._M_advance();
 	  return true;
 	}
       return false;
@@ -766,7 +780,7 @@
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    void
     _Compiler<_InIter, _TraitsT>::
     _M_alternative()
     {
@@ -780,9 +794,7 @@
 	      _M_stack.pop();
 	    }
 	  _M_stack.push(__re);
-	  return true;
 	}
-      return false;
     }
 
   template<typename _InIter, typename _TraitsT>
@@ -829,7 +841,7 @@
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    void
     _Compiler<_InIter, _TraitsT>::
     _M_quantifier()
     {
@@ -841,7 +853,7 @@
 	  __r._M_append(__r._M_front());
 	  _M_stack.pop();
 	  _M_stack.push(__r);
-	  return true;
+	  return;
 	}
       if (_M_match_token(_ScannerT::_S_token_closure1))
 	{
@@ -852,7 +864,7 @@
 			_M_insert_alt(_S_invalid_state_id,
 				      _M_stack.top()._M_front()));
 	  _M_stack.top()._M_append(__r);
-	  return true;
+	  return;
 	}
       if (_M_match_token(_ScannerT::_S_token_opt))
 	{
@@ -861,7 +873,7 @@
 	  _StateSeq __r(_M_stack.top(), -1);
 	  _M_stack.pop();
 	  _M_stack.push(__r);
-	  return true;
+	  return;
 	}
       if (_M_match_token(_ScannerT::_S_token_interval_begin))
 	{
@@ -897,9 +909,8 @@
 	      }
 	  if (!_M_match_token(_ScannerT::_S_token_interval_end))
 	    __throw_regex_error(regex_constants::error_brace);
-	  return true;
+	  return;
 	}
-      return false;
     }
 
   template<typename _InIter, typename _TraitsT>
@@ -922,17 +933,9 @@
 	{
 	  _M_stack.push(_StateSeq(_M_state_store,
                                   _M_state_store._M_insert_matcher
-                                  (_CMatcher(_M_cur_value[0], _M_traits))));
+                                  (_CMatcher(_M_cur_value[0], _M_flags, _M_traits))));
 	  return true;
 	}
-      if (_M_match_token(_ScannerT::_S_token_quoted_char))
-	{
-	  // note that in the ECMA grammar, this case covers backrefs.
-	  _M_stack.push(_StateSeq(_M_state_store,
-				  _M_state_store._M_insert_matcher
-				  (_CMatcher(_M_cur_value[0], _M_traits))));
-	  return true;
-	}
       if (_M_match_token(_ScannerT::_S_token_backref))
 	{
 	  // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
@@ -966,124 +969,74 @@
     _Compiler<_InIter, _TraitsT>::
     _M_bracket_expression()
     {
-      if (_M_match_token(_ScannerT::_S_token_bracket_begin))
-	{
-	  _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
-			       _M_traits);
-	  if (!_M_bracket_list(__matcher)
-	      || !_M_match_token(_ScannerT::_S_token_bracket_end))
-	    __throw_regex_error(regex_constants::error_brack);
-	  _M_stack.push(_StateSeq(_M_state_store,
-				  _M_state_store._M_insert_matcher(__matcher)));
-	  return true;
-	}
-      return false;
-    }
-
-  // If the dash is the last character in the bracket expression, it is not
-  // special.
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_bracket_list(_RMatcherT& __matcher)
-    {
-      if (_M_follow_list(__matcher))
-	{
-	  if (_M_match_token(_ScannerT::_S_token_dash))
-	    __matcher._M_add_char(_M_cur_value[0]);
-	  return true;
-	}
-      return false;
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_follow_list(_RMatcherT& __matcher)
-    { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_follow_list2(_RMatcherT& __matcher)
-    {
-      if (_M_expression_term(__matcher))
-	return _M_follow_list2(__matcher);
+      bool __inverse =
+        _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
+      if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
+        return false;
+      _BMatcherT __matcher( __inverse, _M_flags, _M_traits);
+      // special case: only if  _not_ chr first after
+      // '[' or '[^' or if ECMAscript
+      if (!_M_bracket_list(__matcher) // list is empty
+          && !(_M_flags & regex_constants::ECMAScript))
+        __throw_regex_error(regex_constants::error_brack);
+      _M_stack.push(_StateSeq(_M_state_store,
+                              _M_state_store._M_insert_matcher(__matcher)));
       return true;
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    bool // list is non-empty
     _Compiler<_InIter, _TraitsT>::
-    _M_expression_term(_RMatcherT& __matcher)
+    _M_bracket_list(_BMatcherT& __matcher)
     {
-      return (_M_collating_symbol(__matcher)
-	      || _M_character_class(__matcher)
-	      || _M_equivalence_class(__matcher)
-	      || (_M_start_range(__matcher)
-		  && _M_range_expression(__matcher)));
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_range_expression(_RMatcherT& __matcher)
-    {
-      if (!_M_collating_symbol(__matcher))
-	if (!_M_match_token(_ScannerT::_S_token_dash))
-	  __throw_regex_error(regex_constants::error_range);
-      __matcher._M_make_range();
+      if (_M_match_token(_ScannerT::_S_token_bracket_end))
+        return false;
+      _M_expression_term(__matcher);
+      _M_bracket_list(__matcher);
       return true;
     }
 
   template<typename _InIter, typename _TraitsT>
-    bool
+    void
     _Compiler<_InIter, _TraitsT>::
-    _M_start_range(_RMatcherT& __matcher)
-    { return _M_match_token(_ScannerT::_S_token_dash); }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_collating_symbol(_RMatcherT& __matcher)
+    _M_expression_term(_BMatcherT& __matcher)
     {
-      if (_M_match_token(_ScannerT::_S_token_collelem_single))
-	{
-	  __matcher._M_add_char(_M_cur_value[0]);
-	  return true;
-	}
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
 	{
 	  __matcher._M_add_collating_element(_M_cur_value);
-	  return true;
+	  return;
 	}
-      return false;
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_equivalence_class(_RMatcherT& __matcher)
-    {
       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
 	{
 	  __matcher._M_add_equivalence_class(_M_cur_value);
-	  return true;
+	  return;
 	}
-      return false;
-    }
-
-  template<typename _InIter, typename _TraitsT>
-    bool
-    _Compiler<_InIter, _TraitsT>::
-    _M_character_class(_RMatcherT& __matcher)
-    {
       if (_M_match_token(_ScannerT::_S_token_char_class_name))
 	{
 	  __matcher._M_add_character_class(_M_cur_value);
-	  return true;
+	  return;
 	}
-      return false;
+      if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
+        {
+          auto __ch = _M_cur_value[0];
+          if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
+            {
+              // If the dash is the last character in the bracket expression,
+              // it is not special.
+              if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
+                __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
+              else // [a-z]
+                {
+                  if (!_M_match_token(_ScannerT::_S_token_collelem_single))
+                    __throw_regex_error(regex_constants::error_range);
+                  __matcher._M_make_range(__ch, _M_cur_value[0]);
+                }
+            }
+          else // [a]
+            __matcher._M_add_char(__ch);
+          return;
+        }
+      __throw_regex_error(regex_constants::error_brack);
     }
 
   template<typename _InIter, typename _TraitsT>
Index: include/bits/regex_nfa.h
===================================================================
--- include/bits/regex_nfa.h	(revision 201397)
+++ include/bits/regex_nfa.h	(working copy)
@@ -129,6 +129,29 @@
       int       _M_index;
     };
 
+  // TODO For now we use an all-in-one comparator. In the future there may be
+  // optimizations based on regex_traits::translate and regex_transform.
+  template<typename _InIterT, typename _TraitsT>
+    struct _Comparator
+    {
+      typedef regex_constants::syntax_option_type _FlagT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef std::basic_string<_CharT>           _StringT;
+
+      _Comparator(_FlagT __flags, const _TraitsT& __traits)
+      : _M_flags(__flags), _M_traits(__traits)
+      { }
+
+      bool
+      _M_equ(_CharT __a, _CharT __b) const;
+
+      bool
+      _M_le(_CharT __a, _CharT __b) const;
+
+      _FlagT                              _M_flags;
+      _TraitsT                            _M_traits;
+    };
+
   /// Indicates if current state matches cursor current.
   typedef std::function<bool (const _PatternCursor&)> _Matcher;
 
@@ -140,12 +163,15 @@
   /// Matches a single character
   template<typename _InIterT, typename _TraitsT>
     struct _CharMatcher
+    : public _Comparator<_InIterT, _TraitsT>
     {
-      typedef typename _TraitsT::char_type char_type;
+      typedef _Comparator<_InIterT, _TraitsT>     _BaseT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef regex_constants::syntax_option_type _FlagT;
 
       explicit
-      _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
-      : _M_traits(__t), _M_c(_M_traits.translate(__c))
+      _CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
+      : _BaseT(__flags, __t), _M_c(__c)
       { }
 
       bool
@@ -153,55 +179,79 @@
       {
 	typedef const _SpecializedCursor<_InIterT>& _CursorT;
 	_CursorT __c = static_cast<_CursorT>(__pc);
-	return _M_traits.translate(__c._M_current()) == _M_c;
+        return this->_M_equ(__c._M_current(), _M_c);
       }
 
-      const _TraitsT& _M_traits;
-      char_type       _M_c;
+      _CharT       _M_c;
     };
 
   /// Matches a character range (bracket expression)
   template<typename _InIterT, typename _TraitsT>
-    struct _RangeMatcher
+    struct _BracketMatcher
+    : public _Comparator<_InIterT, _TraitsT>
     {
-      typedef typename _TraitsT::char_type _CharT;
-      typedef std::basic_string<_CharT>    _StringT;
+      typedef _Comparator<_InIterT, _TraitsT>     _BaseT;
+      typedef typename _TraitsT::char_class_type  _CharClassT;
+      typedef regex_constants::syntax_option_type _FlagT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef std::basic_string<_CharT>           _StringT;
 
       explicit
-      _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
-      : _M_traits(__t), _M_is_non_matching(__is_non_matching)
+      _BracketMatcher(bool __is_non_matching,
+                    _FlagT __flags,
+                    const _TraitsT& __t)
+      : _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
+      _M_is_non_matching(__is_non_matching), _M_class_set(0)
       { }
 
       bool
-      operator()(const _PatternCursor& __pc) const
-      {
-	typedef const _SpecializedCursor<_InIterT>& _CursorT;
-	_CursorT __c = static_cast<_CursorT>(__pc);
-	return true;
-      }
+      operator()(const _PatternCursor& __pc) const;
 
       void
       _M_add_char(_CharT __c)
-      { }
+      { _M_char_set.push_back(__c); }
 
       void
       _M_add_collating_element(const _StringT& __s)
-      { }
+      {
+        auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
+        if (__st.empty())
+          __throw_regex_error(regex_constants::error_collate);
+        // TODO: digraph
+        _M_char_set.push_back(__st[0]);
+      }
 
       void
       _M_add_equivalence_class(const _StringT& __s)
-      { }
+      {
+        _M_add_character_class(
+          _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
+      }
 
       void
       _M_add_character_class(const _StringT& __s)
-      { }
+      {
+        auto __st = _M_traits.lookup_classname(
+          &*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
+        if (__st == 0)
+          __throw_regex_error(regex_constants::error_ctype);
+        _M_class_set |= __st;
+      }
 
       void
-      _M_make_range()
-      { }
+      _M_make_range(_CharT __l, _CharT __r)
+      {
+        if (!this->_M_le(__l, __r))
+          __throw_regex_error(regex_constants::error_range);
+        _M_range_set.push_back(make_pair(__l, __r));
+      }
 
-      const _TraitsT& _M_traits;
-      bool            _M_is_non_matching;
+      _FlagT                              _M_flags;
+      _TraitsT                            _M_traits;
+      bool                                _M_is_non_matching;
+      std::vector<_CharT>                 _M_char_set;
+      std::vector<pair<_CharT, _CharT>>   _M_range_set;
+      _CharClassT                         _M_class_set;
     };
 
   /// Identifies a state in the NFA.
Index: include/bits/regex_nfa.tcc
===================================================================
--- include/bits/regex_nfa.tcc	(revision 201397)
+++ include/bits/regex_nfa.tcc	(working copy)
@@ -35,6 +35,64 @@
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  template<typename _InIterT, typename _TraitsT>
+    bool _BracketMatcher<_InIterT, _TraitsT>::
+    operator()(const _PatternCursor& __pc) const
+    {
+      typedef const _SpecializedCursor<_InIterT>& _CursorT;
+      _CursorT __c = static_cast<_CursorT>(__pc);
+      _CharT __ch = __c._M_current();
+      bool __ret = false;
+      for (auto __c : _M_char_set)
+        if (this->_M_equ(__c, __ch))
+          {
+            __ret = true;
+            break;
+          }
+      if (!__ret && _M_traits.isctype(__ch, _M_class_set))
+        __ret = true;
+      else
+        {
+          for (auto& __it : _M_range_set)
+            if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
+              {
+                __ret = true;
+                break;
+              }
+        }
+      if (_M_is_non_matching)
+        __ret = !__ret;
+      return __ret;
+    }
+
+  template<typename _InIterT, typename _TraitsT>
+    bool _Comparator<_InIterT, _TraitsT>::
+    _M_equ(_CharT __a, _CharT __b) const
+    {
+      if (_M_flags & regex_constants::icase)
+        return _M_traits.translate_nocase(__a)
+          == _M_traits.translate_nocase(__b);
+      if (_M_flags & regex_constants::collate)
+        return _M_traits.translate(__a) == _M_traits.translate(__b);
+      return __a == __b;
+    }
+
+  template<typename _InIterT, typename _TraitsT>
+    bool _Comparator<_InIterT, _TraitsT>::
+    _M_le(_CharT __a, _CharT __b) const
+    {
+      _StringT __str1 = _StringT(1,
+                                 _M_flags & regex_constants::icase
+                                 ? _M_traits.translate_nocase(__a)
+                                 : _M_traits.translate(__a));
+      _StringT __str2 = _StringT(1,
+                                 _M_flags & regex_constants::icase
+                                 ? _M_traits.translate_nocase(__b)
+                                 : _M_traits.translate(__b));
+      return _M_traits.transform(__str1.begin(), __str1.end())
+        <= _M_traits.transform(__str2.begin(), __str2.end());
+    }
+
 #ifdef _GLIBCXX_DEBUG
 inline std::ostream& _State::
 _M_print(std::ostream& ostr) const
Index: testsuite/28_regex/algorithms/regex_match/extended/53622.cc
===================================================================
--- testsuite/28_regex/algorithms/regex_match/extended/53622.cc	(revision 201397)
+++ testsuite/28_regex/algorithms/regex_match/extended/53622.cc	(working copy)
@@ -37,7 +37,7 @@
     std::string target("zxcv/onetwoabc");
     std::smatch m;
 
-    VERIFY( std::regex_search(target, m, re) );
+    VERIFY( std::regex_match(target, m, re) );
     VERIFY( m.size() == 2 );
     VERIFY( m[0].matched == true );
     VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
@@ -50,7 +50,7 @@
     std::string target("zxcv/onetwoabc");
     std::smatch m;
 
-    VERIFY( std::regex_search(target, m, re) );
+    VERIFY( std::regex_match(target, m, re) );
     VERIFY( m.size() == 3 );
     VERIFY( m[0].matched == true );
     VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
Index: testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc
===================================================================
--- testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc	(revision 0)
+++ testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc	(working copy)
@@ -0,0 +1,66 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-01  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended bracket expression against a C-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  {
+    std::regex  re("pre/[za-x]", std::regex::extended);
+    VERIFY( std::regex_match("pre/z", re) );
+    VERIFY( std::regex_match("pre/a", re) );
+    VERIFY( !std::regex_match("pre/y", re) );
+  }
+  {
+    std::regex  re("pre/[[:uPPer:]]", std::regex::extended);
+    VERIFY( std::regex_match("pre/Z", re) );
+    VERIFY( !std::regex_match("pre/_", re) );
+    VERIFY( !std::regex_match("pre/a", re) );
+    VERIFY( !std::regex_match("pre/0", re) );
+  }
+  {
+    std::regex  re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
+    VERIFY( std::regex_match("pre/Z", re) );
+    VERIFY( std::regex_match("pre/a", re) );
+  }
+  {
+    std::regex  re("pre/[[:w:][.tilde.]]", std::regex::extended);
+    VERIFY( std::regex_match("pre/~", re) );
+    VERIFY( std::regex_match("pre/_", re) );
+    VERIFY( std::regex_match("pre/a", re) );
+    VERIFY( std::regex_match("pre/0", re) );
+  }
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}

[-- Attachment #3: changelog --]
[-- Type: application/octet-stream, Size: 507 bytes --]

2013-08-01  Tim Shen  <timshen91@gmail.com>

	Implement bracket expression.
	* include/bits/regex.h: Remove constexpr from "|=", etc.
	* include/bits/regex_compiler.h: Parse bracket expression.
	* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
	_RangeMatcher).
	* include/bits/regex_nfa.tcc: Implement them.
	* testsuite/28_regex/algorithms/regex_match/extended/53622.cc
	: from regex_search to regex_match.
	* testsuite/28_regex/algorithms/regex_match/extended/
	cstring_bracket_01.cc: New.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch] regex bracket expression implementaion
  2013-08-01 14:59 [Patch] regex bracket expression implementaion Tim Shen
@ 2013-08-02 13:33 ` Paolo Carlini
  2013-08-03 23:22   ` Tim Shen
  0 siblings, 1 reply; 4+ messages in thread
From: Paolo Carlini @ 2013-08-02 13:33 UTC (permalink / raw)
  To: Tim Shen; +Cc: libstdc++, gcc-patches

Hi,

On 08/01/2013 04:59 PM, Tim Shen wrote:
> Fully tested under x86_64. (make bootstrap && make -k check).
>
> Next, I'll try to refactor _Grep_matcher using templates instead of
> virtual functions, to make implementing back-reference easier.
If nobody has further comments over the next day or so, patch is Ok with me.

Thanks,
Paolo.

////////////////////

PS: I think it would be nice if from to time you could provide a rough 
estimate of the amount of work done vs the work still needed; major 
milestones; etc. Just to have an idea.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch] regex bracket expression implementaion
  2013-08-02 13:33 ` Paolo Carlini
@ 2013-08-03 23:22   ` Tim Shen
  2013-08-04  0:24     ` Paolo Carlini
  0 siblings, 1 reply; 4+ messages in thread
From: Tim Shen @ 2013-08-03 23:22 UTC (permalink / raw)
  To: libstdc++, gcc-patches

On Fri, Aug 2, 2013 at 9:33 PM, Paolo Carlini <paolo.carlini@oracle.com> wrote:
> If nobody has further comments over the next day or so, patch is Ok with me.

Committed. // http://gcc.gnu.org/ml/gcc-cvs/2013-07/msg00826.html

> PS: I think it would be nice if from to time you could provide a rough
> estimate of the amount of work done vs the work still needed; major
> milestones; etc. Just to have an idea.

What's the format I should use? An email? Comments in the code? A PDF
document? Or?


-- 
Tim Shen

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch] regex bracket expression implementaion
  2013-08-03 23:22   ` Tim Shen
@ 2013-08-04  0:24     ` Paolo Carlini
  0 siblings, 0 replies; 4+ messages in thread
From: Paolo Carlini @ 2013-08-04  0:24 UTC (permalink / raw)
  To: Tim Shen, libstdc++, gcc-patches



Hi,

>What's the format I should use? An email? Comments in the code? A PDF
>document? Or?

All sorts of comments in the code are always welcome: blurbs at the beginning of each file explaining the main design choices, then smaller comments explaining tricky points, TODO or FIXME when appropriate, etc.

But in my previous message I was thinking just short informal emails, to have an idea of the current status, what already works, what doesn't, the major challenges ahead. If you like together with actual patch submissions.

Thanks,
Paolo

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-08-04  0:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-01 14:59 [Patch] regex bracket expression implementaion Tim Shen
2013-08-02 13:33 ` Paolo Carlini
2013-08-03 23:22   ` Tim Shen
2013-08-04  0:24     ` Paolo Carlini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).