public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch 2/2] Localization problem in regex
@ 2013-08-23  9:25 Tim Shen
  2013-08-23 10:15 ` Jonathan Wakely
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Tim Shen @ 2013-08-23  9:25 UTC (permalink / raw)
  To: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 101 bytes --]

Inspired by this mail: http://gcc.gnu.org/ml/libstdc++/2013-08/msg00131.html

Thanks!


-- 
Tim Shen

[-- Attachment #2: regex-locale.patch --]
[-- Type: application/octet-stream, Size: 5533 bytes --]

commit 62121ad0480af3d9abe0b0ece9bc636dabd2b6c2
Author: tim <timshen91@gmail.com>
Date:   Thu Aug 22 17:28:51 2013 +0800

    2013-08-23  Tim Shen  <timshen91@gmail.com>
    
    	* include/bits/regex.h: Fix callers.
    	* include/bits/regex_compiler.h: Store _Traits reference.
    	* include/bits/regex_executor.h: Add _Traits member for _DFSExecutor.
    	* include/bits/regex_executor.tcc: Likewise.
    	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
    	  New.

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 4838819..19d9bbd 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -880,8 +880,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
-	  basic_regex __tmp(__s, __flags);
-	  this->swap(__tmp);
+	  _M_automaton =
+	    __detail::_Compiler<decltype(__s.begin()), _Ch_type, _Rx_traits>
+	    (__s.begin(), __s.end(), _M_traits, _M_flags)._M_get_nfa();
 	  return *this;
 	}
 
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 1d588b9..a1107bb 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -214,7 +214,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return _M_traits.transform(__s.begin(), __s.end());
       }
 
-      _TraitsT                              _M_traits;
+      const _TraitsT&                       _M_traits;
       _FlagT                                _M_flags;
       bool                                  _M_is_non_matching;
       std::vector<_CharT>                   _M_char_set;
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 23998ed..6d66d88 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -120,13 +120,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
       typedef regex_constants::match_flag_type             _FlagT;
 
-      _DFSExecutor(_BiIter        __begin,
-		   _BiIter        __end,
-		   _ResultsT&     __results,
-		   const _RegexT& __nfa,
-		   _FlagT         __flags)
+      _DFSExecutor(_BiIter         __begin,
+		   _BiIter         __end,
+		   _ResultsT&      __results,
+		   const _RegexT&  __nfa,
+		   const _TraitsT& __traits,
+		   _FlagT          __flags)
       : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
-	_M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+	_M_traits(__traits), _M_nfa(__nfa), _M_results_ret(this->_M_results)
       { }
 
       void
@@ -142,9 +143,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	bool
 	_M_dfs(_StateIdT __i);
 
-      _ResultsVec    _M_results_ret;
-      _TraitsT       _M_traits;
-      const _RegexT& _M_nfa;
+      _ResultsVec     _M_results_ret;
+      const _TraitsT& _M_traits;
+      const _RegexT&  _M_nfa;
     };
 
   // Like the DFS approach, it try every possible state transition; Unlike DFS,
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index edfd0b6..788d65e 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -320,7 +320,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
 	(__re._M_automaton);
       if (__p->_M_has_backref)
-	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
+	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
+					      __re._M_traits, __flags));
       return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, __flags));
     }
 
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
new file mode 100644
index 0000000..a7225cb
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
@@ -0,0 +1,48 @@
+// { dg-options "-std=gnu++11" }
+// { dg-require-namedlocale "de_DE.UTF-8" }
+
+//
+// 2013-08-23  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended localization against a wide-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::wstring str2 = L"ÜBER";
+  std::wregex re2;
+  re2.imbue(std::locale("de_DE.UTF-8"));
+  re2.assign(L"[[:upper:]]*", std::regex::extended);
+  std::wsmatch m2;
+  VERIFY(std::regex_match(str2, m2, re2));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23  9:25 [Patch 2/2] Localization problem in regex Tim Shen
@ 2013-08-23 10:15 ` Jonathan Wakely
  2013-08-23 11:08 ` Paolo Carlini
  2013-08-23 19:04 ` Stefan Schweter
  2 siblings, 0 replies; 13+ messages in thread
From: Jonathan Wakely @ 2013-08-23 10:15 UTC (permalink / raw)
  To: Tim Shen; +Cc: libstdc++, gcc-patches

On 23 August 2013 10:17, Tim Shen wrote:
> Inspired by this mail: http://gcc.gnu.org/ml/libstdc++/2013-08/msg00131.html

This is OK too, thanks for the quick fix to the locale issue!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23  9:25 [Patch 2/2] Localization problem in regex Tim Shen
  2013-08-23 10:15 ` Jonathan Wakely
@ 2013-08-23 11:08 ` Paolo Carlini
  2013-08-23 14:04   ` Tim Shen
  2013-08-23 19:04 ` Stefan Schweter
  2 siblings, 1 reply; 13+ messages in thread
From: Paolo Carlini @ 2013-08-23 11:08 UTC (permalink / raw)
  To: Tim Shen; +Cc: libstdc++, gcc-patches

Hi,

On 08/23/2013 11:17 AM, Tim Shen wrote:
>      2013-08-23  Tim Shen<timshen91@gmail.com>
>      
>      	* include/bits/regex.h: Fix callers.
>      	* include/bits/regex_compiler.h: Store _Traits reference.
>      	* include/bits/regex_executor.h: Add _Traits member for _DFSExecutor.
>      	* include/bits/regex_executor.tcc: Likewise.
>      	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
>      	  New.
Thanks a lot indeed. But: "Fix callers" of *what*?!? And from *where*? 
To be really honest this kind of ChangeLog entry doesn't make much 
sense. Please get used to list the specific functions you are changing. 
It's important.

Thanks,
Paolo.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 11:08 ` Paolo Carlini
@ 2013-08-23 14:04   ` Tim Shen
  2013-08-23 14:27     ` Paolo Carlini
  0 siblings, 1 reply; 13+ messages in thread
From: Tim Shen @ 2013-08-23 14:04 UTC (permalink / raw)
  To: Paolo Carlini; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 391 bytes --]

On Fri, Aug 23, 2013 at 6:29 PM, Paolo Carlini <paolo.carlini@oracle.com> wrote:
> Thanks a lot indeed. But: "Fix callers" of *what*?!? And from *where*? To be
> really honest this kind of ChangeLog entry doesn't make much sense. Please
> get used to list the specific functions you are changing. It's important.

I see.

Actually I find a mistake by this review /w\, thanks!


-- 
Tim Shen

[-- Attachment #2: regex-locale.patch --]
[-- Type: application/octet-stream, Size: 5626 bytes --]

commit 962a57c32c1928c021ec0eb837b9a19f8a7fd3f3
Author: tim <timshen91@gmail.com>
Date:   Thu Aug 22 17:28:51 2013 +0800

    2013-08-23  Tim Shen  <timshen91@gmail.com>
    
    	* include/bits/regex.h: Let basic_regex::assign not lose _M_traits.
    	* include/bits/regex_compiler.h: Store _Compiler::_M_traits by
    	  reference instead of by value.
    	* include/bits/regex_executor.h: Add _M_traits to _DFSExecutor.
    	* include/bits/regex_executor.tcc: Likewise.
    	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
    	  New.

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 4838819..40db517 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -880,8 +880,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
-	  basic_regex __tmp(__s, __flags);
-	  this->swap(__tmp);
+	  _M_flags = __flags;
+	  _M_automaton =
+	    __detail::_Compiler<decltype(__s.begin()), _Ch_type, _Rx_traits>
+	    (__s.begin(), __s.end(), _M_traits, _M_flags)._M_get_nfa();
 	  return *this;
 	}
 
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 1d588b9..a1107bb 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -214,7 +214,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return _M_traits.transform(__s.begin(), __s.end());
       }
 
-      _TraitsT                              _M_traits;
+      const _TraitsT&                       _M_traits;
       _FlagT                                _M_flags;
       bool                                  _M_is_non_matching;
       std::vector<_CharT>                   _M_char_set;
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 23998ed..6d66d88 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -120,13 +120,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
       typedef regex_constants::match_flag_type             _FlagT;
 
-      _DFSExecutor(_BiIter        __begin,
-		   _BiIter        __end,
-		   _ResultsT&     __results,
-		   const _RegexT& __nfa,
-		   _FlagT         __flags)
+      _DFSExecutor(_BiIter         __begin,
+		   _BiIter         __end,
+		   _ResultsT&      __results,
+		   const _RegexT&  __nfa,
+		   const _TraitsT& __traits,
+		   _FlagT          __flags)
       : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
-	_M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+	_M_traits(__traits), _M_nfa(__nfa), _M_results_ret(this->_M_results)
       { }
 
       void
@@ -142,9 +143,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	bool
 	_M_dfs(_StateIdT __i);
 
-      _ResultsVec    _M_results_ret;
-      _TraitsT       _M_traits;
-      const _RegexT& _M_nfa;
+      _ResultsVec     _M_results_ret;
+      const _TraitsT& _M_traits;
+      const _RegexT&  _M_nfa;
     };
 
   // Like the DFS approach, it try every possible state transition; Unlike DFS,
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index edfd0b6..788d65e 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -320,7 +320,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
 	(__re._M_automaton);
       if (__p->_M_has_backref)
-	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
+	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
+					      __re._M_traits, __flags));
       return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, __flags));
     }
 
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
new file mode 100644
index 0000000..a7225cb
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
@@ -0,0 +1,48 @@
+// { dg-options "-std=gnu++11" }
+// { dg-require-namedlocale "de_DE.UTF-8" }
+
+//
+// 2013-08-23  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended localization against a wide-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::wstring str2 = L"ÜBER";
+  std::wregex re2;
+  re2.imbue(std::locale("de_DE.UTF-8"));
+  re2.assign(L"[[:upper:]]*", std::regex::extended);
+  std::wsmatch m2;
+  VERIFY(std::regex_match(str2, m2, re2));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 14:04   ` Tim Shen
@ 2013-08-23 14:27     ` Paolo Carlini
  2013-08-23 14:57       ` Tim Shen
  0 siblings, 1 reply; 13+ messages in thread
From: Paolo Carlini @ 2013-08-23 14:27 UTC (permalink / raw)
  To: Tim Shen; +Cc: libstdc++, gcc-patches

On 08/23/2013 03:59 PM, Tim Shen wrote:
> On Fri, Aug 23, 2013 at 6:29 PM, Paolo Carlini <paolo.carlini@oracle.com> wrote:
>> Thanks a lot indeed. But: "Fix callers" of *what*?!? And from *where*? To be
>> really honest this kind of ChangeLog entry doesn't make much sense. Please
>> get used to list the specific functions you are changing. It's important.
> I see.
>
> Actually I find a mistake by this review /w\, thanks!
Great. But please but the names of the functions you are changing 
between round brackets. You have tons of examples everywhere.

Paolo.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 14:27     ` Paolo Carlini
@ 2013-08-23 14:57       ` Tim Shen
  2013-08-23 15:02         ` Jonathan Wakely
  2013-08-23 15:05         ` Paolo Carlini
  0 siblings, 2 replies; 13+ messages in thread
From: Tim Shen @ 2013-08-23 14:57 UTC (permalink / raw)
  To: Paolo Carlini; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 244 bytes --]

On Fri, Aug 23, 2013 at 10:26 PM, Paolo Carlini
<paolo.carlini@oracle.com> wrote:
> Great. But please but the names of the functions you are changing between
> round brackets. You have tons of examples everywhere.

...like this?


-- 
Tim Shen

[-- Attachment #2: changelog --]
[-- Type: application/octet-stream, Size: 406 bytes --]

2013-08-23  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (basic_regex<>::assign): Don't lose _M_traits.
	* include/bits/regex_compiler.h: Store _Compiler::_M_traits by reference
	  instead of by value.
	* include/bits/regex_executor.h: Add _M_traits to _DFSExecutor.
	* include/bits/regex_executor.tcc: Likewise.
	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
	  New.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 14:57       ` Tim Shen
@ 2013-08-23 15:02         ` Jonathan Wakely
  2013-08-23 15:05         ` Paolo Carlini
  1 sibling, 0 replies; 13+ messages in thread
From: Jonathan Wakely @ 2013-08-23 15:02 UTC (permalink / raw)
  To: Tim Shen; +Cc: Paolo Carlini, libstdc++, gcc-patches

On 23 August 2013 15:53, Tim Shen wrote:
> On Fri, Aug 23, 2013 at 10:26 PM, Paolo Carlini
> <paolo.carlini@oracle.com> wrote:
>> Great. But please but the names of the functions you are changing between
>> round brackets. You have tons of examples everywhere.
>
> ...like this?

Yes, that makes it clear what part of the file is affected. That's
important when searching ChangeLogs to find what change last touched
e.g. basic_regex::assign.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 14:57       ` Tim Shen
  2013-08-23 15:02         ` Jonathan Wakely
@ 2013-08-23 15:05         ` Paolo Carlini
  2013-08-23 15:48           ` Tim Shen
  1 sibling, 1 reply; 13+ messages in thread
From: Paolo Carlini @ 2013-08-23 15:05 UTC (permalink / raw)
  To: Tim Shen; +Cc: libstdc++, gcc-patches

On 08/23/2013 04:53 PM, Tim Shen wrote:
> On Fri, Aug 23, 2013 at 10:26 PM, Paolo Carlini
> <paolo.carlini@oracle.com> wrote:
>> Great. But please but the names of the functions you are changing between
>> round brackets. You have tons of examples everywhere.
> ...like this?
The assign change is fine. But I would also put between round brackets 
(class _Compiler<>) for the _M_traits change and use something like 
(_DFSExecutor<>::_DFSExecutor): Adjust. for the last change (remember 
that constructors are special member *functions* thus should be normally 
listed).

Paolo.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 15:05         ` Paolo Carlini
@ 2013-08-23 15:48           ` Tim Shen
  2013-08-23 16:20             ` Jonathan Wakely
  0 siblings, 1 reply; 13+ messages in thread
From: Tim Shen @ 2013-08-23 15:48 UTC (permalink / raw)
  To: Paolo Carlini, Jonathan Wakely; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 879 bytes --]

On Fri, Aug 23, 2013 at 11:03 PM, Paolo Carlini
<paolo.carlini@oracle.com> wrote:
> The assign change is fine. But I would also put between round brackets
> (class _Compiler<>) for the _M_traits change and use something like
> (_DFSExecutor<>::_DFSExecutor): Adjust. for the last change (remember that
> constructors are special member *functions* thus should be normally listed).

I see.

On Fri, Aug 23, 2013 at 10:57 PM, Jonathan Wakely <jwakely.gcc@gmail.com> wrote:
> Yes, that makes it clear what part of the file is affected. That's
> important when searching ChangeLogs to find what change last touched
> e.g. basic_regex::assign.

Why don't we `svn blame`?

By the way, do we seariously need to keep ChangeLog in files? Is it
more like a tradition than a solution? After all, we have SVN or Git
now. I remember a mail metioned this but cannot find it >.<


-- 
Tim Shen

[-- Attachment #2: changelog --]
[-- Type: application/octet-stream, Size: 488 bytes --]

2013-08-23  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (basic_regex<>::assign): Don't lose _M_traits.
	* include/bits/regex_compiler.h: Store _Compiler::_M_traits by reference
	  instead of by value.
	* include/bits/regex_executor.h (_DFSExecutor<>::_DFSExecutor): Add
	  _M_traits to _DFSExecutor.
	* include/bits/regex_executor.tcc (__get_executor<>): Pass _M_traits to
	  _DFSExecutor too.
	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
	  New.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 15:48           ` Tim Shen
@ 2013-08-23 16:20             ` Jonathan Wakely
  0 siblings, 0 replies; 13+ messages in thread
From: Jonathan Wakely @ 2013-08-23 16:20 UTC (permalink / raw)
  To: Tim Shen; +Cc: Paolo Carlini, libstdc++, gcc-patches

On 23 August 2013 16:48, Tim Shen wrote:
> By the way, do we seariously need to keep ChangeLog in files? Is it
> more like a tradition than a solution? After all, we have SVN or Git
> now. I remember a mail metioned this but cannot find it >.<

It's not a tradition, they are required, see
http://www.gnu.org/prep/standards/standards.html#Change-Logs

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23  9:25 [Patch 2/2] Localization problem in regex Tim Shen
  2013-08-23 10:15 ` Jonathan Wakely
  2013-08-23 11:08 ` Paolo Carlini
@ 2013-08-23 19:04 ` Stefan Schweter
  2013-08-24  6:15   ` Tim Shen
  2 siblings, 1 reply; 13+ messages in thread
From: Stefan Schweter @ 2013-08-23 19:04 UTC (permalink / raw)
  To: Tim Shen; +Cc: libstdc++, gcc-patches

Am Fri, 23 Aug 2013 17:17:41 +0800
schrieb Tim Shen <timshen91@gmail.com>:

> Inspired by this mail:
> http://gcc.gnu.org/ml/libstdc++/2013-08/msg00131.html
> 
> Thanks!
> 
> 

Hi Tim,

I applied the patch + compiled everything - it's working now! Thanks!

But... I found a problem with .imbue():

int main()
{
	std::setlocale(LC_ALL, "");
	
	std::wstring str2 = L"öäü";
	std::wregex re2;
	re2.imbue(std::locale(""));
	re2.assign(L"([[:lower:]]+)");
	std::wsmatch m2;
	
	std::wsregex_token_iterator end {};
	
	for (std::wsregex_token_iterator p{str2.begin(), str2.end(),
re2, {1}}; p != end; ++p) { std::wcout << *p << std::endl;
	}

	return 0;
}

Works fine! But when there's no imbue() call, a Segmentation
fault occurs.

int main()
{
	std::setlocale(LC_ALL, "");
	
	std::wstring str2 = L"öäü";
	std::wregex re2;
	//re2.imbue(std::locale(""));
	re2.assign(L"([[:lower:]]+)");
	std::wsmatch m2;
	
	std::wsregex_token_iterator end {};
	
	for (std::wsregex_token_iterator p{str2.begin(), str2.end(),
re2, {1}}; p != end; ++p) { std::wcout << *p << std::endl;
	}

	return 0;
}

May be it's better to throw an exception here? Thanks in advance,

Stefan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-23 19:04 ` Stefan Schweter
@ 2013-08-24  6:15   ` Tim Shen
  2013-08-29 18:45     ` Tim Shen
  0 siblings, 1 reply; 13+ messages in thread
From: Tim Shen @ 2013-08-24  6:15 UTC (permalink / raw)
  To: Stefan Schweter; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1661 bytes --]

On Sat, Aug 24, 2013 at 1:51 AM, Stefan Schweter <stefan@schweter.it> wrote:
> Am Fri, 23 Aug 2013 17:17:41 +0800
> schrieb Tim Shen <timshen91@gmail.com>:
>
>> Inspired by this mail:
>> http://gcc.gnu.org/ml/libstdc++/2013-08/msg00131.html
>>
>> Thanks!
>>
>>
>
> Hi Tim,
>
> I applied the patch + compiled everything - it's working now! Thanks!
>
> But... I found a problem with .imbue():
>
> int main()
> {
>         std::setlocale(LC_ALL, "");
>
>         std::wstring str2 = L"öäü";
>         std::wregex re2;
>         re2.imbue(std::locale(""));
>         re2.assign(L"([[:lower:]]+)");
>         std::wsmatch m2;
>
>         std::wsregex_token_iterator end {};
>
>         for (std::wsregex_token_iterator p{str2.begin(), str2.end(),
> re2, {1}}; p != end; ++p) { std::wcout << *p << std::endl;
>         }
>
>         return 0;
> }
>
> Works fine! But when there's no imbue() call, a Segmentation
> fault occurs.
>
> int main()
> {
>         std::setlocale(LC_ALL, "");
>
>         std::wstring str2 = L"öäü";
>         std::wregex re2;
>         //re2.imbue(std::locale(""));
>         re2.assign(L"([[:lower:]]+)");
>         std::wsmatch m2;
>
>         std::wsregex_token_iterator end {};
>
>         for (std::wsregex_token_iterator p{str2.begin(), str2.end(),
> re2, {1}}; p != end; ++p) { std::wcout << *p << std::endl;
>         }
>
>         return 0;
> }
>
> May be it's better to throw an exception here? Thanks in advance,
>
> Stefan

Fixed.

It's not fully tested. I just run the 28_regex testcases. I'll do a
full test before committing.

Thanks again!

-- 
Tim Shen

[-- Attachment #2: regex-locale.patch --]
[-- Type: application/octet-stream, Size: 8638 bytes --]

commit 7941b4ae05f205ebd65ba1f8420a8eb9120c5bce
Author: tim <timshen91@gmail.com>
Date:   Thu Aug 22 17:28:51 2013 +0800

    2013-08-23  Tim Shen  <timshen91@gmail.com>
    
    	* include/bits/regex.h (basic_regex<>::assign): Don't lose _M_traits.
    	  (regex_iterator<>::regex_iterator): Return nullptr when regex_search
    	     failed.
    	  (regex_token_iterator<>::_M_end_of_seq): Should be defined true when
    	    _M_result is(not isn't) nullptr.
    	* include/bits/regex_compiler.h: Store _Compiler::_M_traits by reference
    	  instead of by value.
    	* include/bits/regex_executor.h (_DFSExecutor<>::_DFSExecutor): Add
    	  _M_traits to _DFSExecutor.
    	* include/bits/regex_executor.tcc (__get_executor<>): Pass _M_traits to
    	  _DFSExecutor too.
    	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
    	  New.
    	* testsuite/28_regex/iterators/regex_token_iterator/wchar_t/
    	  wstring_02.cc: New.

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 4838819..412465a 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -880,8 +880,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
-	  basic_regex __tmp(__s, __flags);
-	  this->swap(__tmp);
+	  _M_flags = __flags;
+	  _M_automaton =
+	    __detail::_Compiler<decltype(__s.begin()), _Ch_type, _Rx_traits>
+	    (__s.begin(), __s.end(), _M_traits, _M_flags)._M_get_nfa();
 	  return *this;
 	}
 
@@ -2591,7 +2593,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		     regex_constants::match_flag_type __m
 		     = regex_constants::match_default)
       : _M_begin(__a), _M_end(__b), _M_pregex(&__re), _M_flags(__m), _M_match()
-      { regex_search(_M_begin, _M_end, _M_match, *_M_pregex, _M_flags); }
+      {
+	if (!regex_search(_M_begin, _M_end, _M_match, *_M_pregex, _M_flags))
+	  *this = regex_iterator();
+      }
 
       /**
        * Copy constructs a %regex_iterator.
@@ -2905,9 +2910,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  return (*_M_position)[_M_subs[_M_n]];
       }
 
-      bool
-      _M_end_of_seq() const
-      { return _M_result != nullptr; }
+      constexpr bool
+      _M_end_of_seq()
+      { return _M_result == nullptr; }
 
       _Position _M_position;
       const value_type* _M_result;
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 1d588b9..a1107bb 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -214,7 +214,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return _M_traits.transform(__s.begin(), __s.end());
       }
 
-      _TraitsT                              _M_traits;
+      const _TraitsT&                       _M_traits;
       _FlagT                                _M_flags;
       bool                                  _M_is_non_matching;
       std::vector<_CharT>                   _M_char_set;
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 23998ed..6d66d88 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -120,13 +120,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
       typedef regex_constants::match_flag_type             _FlagT;
 
-      _DFSExecutor(_BiIter        __begin,
-		   _BiIter        __end,
-		   _ResultsT&     __results,
-		   const _RegexT& __nfa,
-		   _FlagT         __flags)
+      _DFSExecutor(_BiIter         __begin,
+		   _BiIter         __end,
+		   _ResultsT&      __results,
+		   const _RegexT&  __nfa,
+		   const _TraitsT& __traits,
+		   _FlagT          __flags)
       : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
-	_M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+	_M_traits(__traits), _M_nfa(__nfa), _M_results_ret(this->_M_results)
       { }
 
       void
@@ -142,9 +143,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	bool
 	_M_dfs(_StateIdT __i);
 
-      _ResultsVec    _M_results_ret;
-      _TraitsT       _M_traits;
-      const _RegexT& _M_nfa;
+      _ResultsVec     _M_results_ret;
+      const _TraitsT& _M_traits;
+      const _RegexT&  _M_nfa;
     };
 
   // Like the DFS approach, it try every possible state transition; Unlike DFS,
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index edfd0b6..788d65e 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -320,7 +320,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
 	(__re._M_automaton);
       if (__p->_M_has_backref)
-	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
+	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
+					      __re._M_traits, __flags));
       return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, __flags));
     }
 
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
new file mode 100644
index 0000000..a7225cb
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
@@ -0,0 +1,48 @@
+// { dg-options "-std=gnu++11" }
+// { dg-require-namedlocale "de_DE.UTF-8" }
+
+//
+// 2013-08-23  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended localization against a wide-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::wstring str2 = L"ÜBER";
+  std::wregex re2;
+  re2.imbue(std::locale("de_DE.UTF-8"));
+  re2.assign(L"[[:upper:]]*", std::regex::extended);
+  std::wsmatch m2;
+  VERIFY(std::regex_match(str2, m2, re2));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc b/libstdc++-v3/testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc
new file mode 100644
index 0000000..4e58a4f
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc
@@ -0,0 +1,53 @@
+// { dg-options "-std=gnu++11" }
+// { dg-require-namedlocale "en_US.UTF-8" }
+
+//
+// 2013-08-24  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.12.2 regex_token_iterator
+// Tests regex_token_iterator class over 
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::setlocale(LC_ALL, "en_US.UTF-8");
+
+  std::wstring str2 = L"öäü";
+  std::wregex re2;
+  re2.assign(L"([[:lower:]]+)");
+  std::wsmatch m2;
+
+  std::wsregex_token_iterator end {};
+  std::wsregex_token_iterator p{str2.begin(), str2.end(), re2, {1}};
+
+  VERIFY(p == end);
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Patch 2/2] Localization problem in regex
  2013-08-24  6:15   ` Tim Shen
@ 2013-08-29 18:45     ` Tim Shen
  0 siblings, 0 replies; 13+ messages in thread
From: Tim Shen @ 2013-08-29 18:45 UTC (permalink / raw)
  To: Stefan Schweter; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 139 bytes --]

> Fixed.
>
> It's not fully tested. I just run the 28_regex testcases. I'll do a
> full test before committing.

Committed.


-- 
Tim Shen

[-- Attachment #2: changelog --]
[-- Type: application/octet-stream, Size: 766 bytes --]

2013-08-29  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (basic_regex<>::assign): Don't lose _M_traits.
	  (regex_iterator<>::regex_iterator): Return nullptr when regex_search
	     failed.
	  (regex_token_iterator<>::_M_end_of_seq): Should be defined true when
	    _M_result is(not isn't) nullptr.
	* include/bits/regex_compiler.h: Store _Compiler::_M_traits by reference
	  instead of by value.
	* include/bits/regex_executor.h (_DFSExecutor<>::_DFSExecutor): Add
	  _M_traits to _DFSExecutor.
	* include/bits/regex_executor.tcc (__get_executor<>): Pass traits to
	  _DFSExecutor too.
	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
	  New.
	* testsuite/28_regex/iterators/regex_token_iterator/wchar_t/
	  wstring_02.cc: New.

[-- Attachment #3: regex-locale.patch --]
[-- Type: application/octet-stream, Size: 8436 bytes --]

Index: include/bits/regex_compiler.h
===================================================================
--- include/bits/regex_compiler.h	(revision 202081)
+++ include/bits/regex_compiler.h	(revision 202082)
@@ -214,7 +214,7 @@
 	return _M_traits.transform(__s.begin(), __s.end());
       }
 
-      _TraitsT                              _M_traits;
+      const _TraitsT&                       _M_traits;
       _FlagT                                _M_flags;
       bool                                  _M_is_non_matching;
       std::vector<_CharT>                   _M_char_set;
Index: include/bits/regex_executor.tcc
===================================================================
--- include/bits/regex_executor.tcc	(revision 202081)
+++ include/bits/regex_executor.tcc	(revision 202082)
@@ -320,7 +320,8 @@
       auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
 	(__re._M_automaton);
       if (__p->_M_has_backref)
-	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
+	return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
+					      __re._M_traits, __flags));
       return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, __flags));
     }
 
Index: include/bits/regex_executor.h
===================================================================
--- include/bits/regex_executor.h	(revision 202081)
+++ include/bits/regex_executor.h	(revision 202082)
@@ -120,13 +120,14 @@
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
       typedef regex_constants::match_flag_type             _FlagT;
 
-      _DFSExecutor(_BiIter        __begin,
-		   _BiIter        __end,
-		   _ResultsT&     __results,
-		   const _RegexT& __nfa,
-		   _FlagT         __flags)
+      _DFSExecutor(_BiIter         __begin,
+		   _BiIter         __end,
+		   _ResultsT&      __results,
+		   const _RegexT&  __nfa,
+		   const _TraitsT& __traits,
+		   _FlagT          __flags)
       : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
-	_M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+	_M_traits(__traits), _M_nfa(__nfa), _M_results_ret(this->_M_results)
       { }
 
       void
@@ -142,9 +143,9 @@
 	bool
 	_M_dfs(_StateIdT __i);
 
-      _ResultsVec    _M_results_ret;
-      _TraitsT       _M_traits;
-      const _RegexT& _M_nfa;
+      _ResultsVec     _M_results_ret;
+      const _TraitsT& _M_traits;
+      const _RegexT&  _M_nfa;
     };
 
   // Like the DFS approach, it try every possible state transition; Unlike DFS,
Index: include/bits/regex.h
===================================================================
--- include/bits/regex.h	(revision 202081)
+++ include/bits/regex.h	(revision 202082)
@@ -880,8 +880,10 @@
 	assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
-	  basic_regex __tmp(__s, __flags);
-	  this->swap(__tmp);
+	  _M_flags = __flags;
+	  _M_automaton =
+	    __detail::_Compiler<decltype(__s.begin()), _Ch_type, _Rx_traits>
+	    (__s.begin(), __s.end(), _M_traits, _M_flags)._M_get_nfa();
 	  return *this;
 	}
 
@@ -2591,7 +2593,10 @@
 		     regex_constants::match_flag_type __m
 		     = regex_constants::match_default)
       : _M_begin(__a), _M_end(__b), _M_pregex(&__re), _M_flags(__m), _M_match()
-      { regex_search(_M_begin, _M_end, _M_match, *_M_pregex, _M_flags); }
+      {
+	if (!regex_search(_M_begin, _M_end, _M_match, *_M_pregex, _M_flags))
+	  *this = regex_iterator();
+      }
 
       /**
        * Copy constructs a %regex_iterator.
@@ -2905,9 +2910,9 @@
 	  return (*_M_position)[_M_subs[_M_n]];
       }
 
-      bool
-      _M_end_of_seq() const
-      { return _M_result != nullptr; }
+      constexpr bool
+      _M_end_of_seq()
+      { return _M_result == nullptr; }
 
       _Position _M_position;
       const value_type* _M_result;
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 202081)
+++ ChangeLog	(revision 202082)
@@ -1,3 +1,21 @@
+2013-08-29  Tim Shen  <timshen91@gmail.com>
+
+	* include/bits/regex.h (basic_regex<>::assign): Don't lose _M_traits.
+	  (regex_iterator<>::regex_iterator): Return nullptr when regex_search
+	     failed.
+	  (regex_token_iterator<>::_M_end_of_seq): Should be defined true when
+	    _M_result is(not isn't) nullptr.
+	* include/bits/regex_compiler.h: Store _Compiler::_M_traits by reference
+	  instead of by value.
+	* include/bits/regex_executor.h (_DFSExecutor<>::_DFSExecutor): Add
+	  _M_traits to _DFSExecutor.
+	* include/bits/regex_executor.tcc (__get_executor<>): Pass traits to
+	  _DFSExecutor too.
+	* testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc:
+	  New.
+	* testsuite/28_regex/iterators/regex_token_iterator/wchar_t/
+	  wstring_02.cc: New.
+
 2013-08-26  Tim Shen  <timshen91@gmail.com>
 
 	* include/Makefile.am: Add regex_scanner.{h,tcc}.
Index: testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc
===================================================================
--- testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc	(revision 0)
+++ testsuite/28_regex/algorithms/regex_match/extended/wstring_locale.cc	(revision 202082)
@@ -0,0 +1,48 @@
+// { dg-options "-std=gnu++11" }
+// { dg-require-namedlocale "de_DE.UTF-8" }
+
+//
+// 2013-08-29  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended localization against a wide-string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::wstring str2 = L"ÜBER";
+  std::wregex re2;
+  re2.imbue(std::locale("de_DE.UTF-8"));
+  re2.assign(L"[[:upper:]]*", std::regex::extended);
+  std::wsmatch m2;
+  VERIFY(std::regex_match(str2, m2, re2));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
Index: testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc
===================================================================
--- testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc	(revision 0)
+++ testsuite/28_regex/iterators/regex_token_iterator/wchar_t/wstring_02.cc	(revision 202082)
@@ -0,0 +1,53 @@
+// { dg-options "-std=gnu++11" }
+// { dg-require-namedlocale "en_US.UTF-8" }
+
+//
+// 2013-08-29  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.12.2 regex_token_iterator
+// Tests regex_token_iterator class over a localized wstring.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::setlocale(LC_ALL, "en_US.UTF-8");
+
+  std::wstring str2 = L"öäü";
+  std::wregex re2;
+  re2.assign(L"([[:lower:]]+)");
+  std::wsmatch m2;
+
+  std::wsregex_token_iterator end {};
+  std::wsregex_token_iterator p{str2.begin(), str2.end(), re2, {1}};
+
+  VERIFY(p == end);
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2013-08-29 18:41 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-23  9:25 [Patch 2/2] Localization problem in regex Tim Shen
2013-08-23 10:15 ` Jonathan Wakely
2013-08-23 11:08 ` Paolo Carlini
2013-08-23 14:04   ` Tim Shen
2013-08-23 14:27     ` Paolo Carlini
2013-08-23 14:57       ` Tim Shen
2013-08-23 15:02         ` Jonathan Wakely
2013-08-23 15:05         ` Paolo Carlini
2013-08-23 15:48           ` Tim Shen
2013-08-23 16:20             ` Jonathan Wakely
2013-08-23 19:04 ` Stefan Schweter
2013-08-24  6:15   ` Tim Shen
2013-08-29 18:45     ` Tim Shen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).