public inbox for libstdc++@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@redhat.com>
To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org
Subject: [PATCH] libstdc++: Poor man's case insensitive comparisons in time_get [PR71557]
Date: Tue, 14 Dec 2021 14:49:50 +0100	[thread overview]
Message-ID: <20211214134950.GW2646553@tucnak> (raw)

Hi!

This patch uses the same not completely correct case insensitive comparisons
as used elsewhere in the same header.  Proper comparisons that would handle
even multi-byte characters would be harder, but I don't see them implemented
in __ctype's methods.

Tested on x86_64-linux, ok for trunk?

2021-12-14  Jakub Jelinek  <jakub@redhat.com>

	PR libstdc++/71557
	* include/bits/locale_facets_nonio.tcc (_M_extract_via_format):
	Compare characters other than format specifiers and whitespace
	case insensitively.
	(_M_extract_name): Compare characters case insensitively.
	* testsuite/22_locale/time_get/get/char/71557.cc: New test.
	* testsuite/22_locale/time_get/get/wchar_t/71557.cc: New test.

--- libstdc++-v3/include/bits/locale_facets_nonio.tcc.jj	2021-12-10 17:04:35.224563127 +0100
+++ libstdc++-v3/include/bits/locale_facets_nonio.tcc	2021-12-14 13:10:40.845984740 +0100
@@ -910,7 +910,9 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
 	  else
 	    {
 	      // Verify format and input match, extract and discard.
-	      if (__format[__i] == *__beg)
+	      // TODO real case-insensitive comparison
+	      if (__ctype.tolower(__format[__i]) == __ctype.tolower(*__beg)
+		  || __ctype.toupper(__format[__i]) == __ctype.toupper(*__beg))
 		++__beg;
 	      else
 		__tmperr |= ios_base::failbit;
@@ -988,15 +990,15 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
       bool __begupdated = false;
 
       // Look for initial matches.
-      // NB: Some of the locale data is in the form of all lowercase
-      // names, and some is in the form of initially-capitalized
-      // names. Look for both.
       if (__beg != __end)
 	{
 	  const char_type __c = *__beg;
+	  // TODO real case-insensitive comparison
+	  const char_type __cl = __ctype.tolower(__c);
+	  const char_type __cu = __ctype.toupper(__c);
 	  for (size_t __i1 = 0; __i1 < __indexlen; ++__i1)
-	    if (__c == __names[__i1][0]
-		|| __c == __ctype.toupper(__names[__i1][0]))
+	    if (__cl == __ctype.tolower(__names[__i1][0])
+		|| __cu == __ctype.toupper(__names[__i1][0]))
 	      {
 		__lengths[__nmatches]
 		  = __traits_type::length(__names[__i1]);
@@ -1023,15 +1025,22 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
 	      bool __match_longer = false;
 
 	      if (__beg != __end)
-		for (size_t __i3 = 0; __i3 < __nmatches; ++__i3)
-		  {
-		    __name = __names[__matches[__i3]];
-		    if (__lengths[__i3] > __pos && (__name[__pos] == *__beg))
-		      {
-			__match_longer = true;
-			break;
-		      }
-		  }
+		{
+		  // TODO real case-insensitive comparison
+		  const char_type __cl = __ctype.tolower(*__beg);
+		  const char_type __cu = __ctype.toupper(*__beg);
+		  for (size_t __i3 = 0; __i3 < __nmatches; ++__i3)
+		    {
+		      __name = __names[__matches[__i3]];
+		      if (__lengths[__i3] > __pos
+			  && (__ctype.tolower(__name[__pos]) == __cl
+			      || __ctype.toupper(__name[__pos]) == __cu))
+			{
+			  __match_longer = true;
+			  break;
+			}
+		    }
+		}
 	      for (size_t __i4 = 0; __i4 < __nmatches;)
 		if (__match_longer == (__lengths[__i4] == __pos))
 		  {
@@ -1069,17 +1078,23 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
 		}
 	    }
 	  if (__pos < __minlen && __beg != __end)
-	    for (size_t __i6 = 0; __i6 < __nmatches;)
-	      {
-		__name = __names[__matches[__i6]];
-		if (!(__name[__pos] == *__beg))
-		  {
-		    __matches[__i6] = __matches[--__nmatches];
-		    __lengths[__i6] = __lengths[__nmatches];
-		  }
-		else
-		  ++__i6;
-	      }
+	    {
+	      // TODO real case-insensitive comparison
+	      const char_type __cl = __ctype.tolower(*__beg);
+	      const char_type __cu = __ctype.toupper(*__beg);
+	      for (size_t __i6 = 0; __i6 < __nmatches;)
+		{
+		  __name = __names[__matches[__i6]];
+		  if (__ctype.tolower(__name[__pos]) != __cl
+		      && __ctype.toupper(__name[__pos]) != __cu)
+		    {
+		      __matches[__i6] = __matches[--__nmatches];
+		      __lengths[__i6] = __lengths[__nmatches];
+		    }
+		  else
+		    ++__i6;
+		}
+	    }
 	  else
 	    break;
 	}
@@ -1094,7 +1109,12 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
 	    }
 	  __name = __names[__matches[0]];
 	  const size_t __len = __lengths[0];
-	  while (__pos < __len && __beg != __end && __name[__pos] == *__beg)
+	  while (__pos < __len
+		 && __beg != __end
+		 // TODO real case-insensitive comparison
+		 && (__ctype.tolower(__name[__pos]) == __ctype.tolower(*__beg)
+		     || (__ctype.toupper(__name[__pos])
+			 == __ctype.toupper(*__beg))))
 	    ++__beg, (void)++__pos;
 
 	  if (__len == __pos)
--- libstdc++-v3/testsuite/22_locale/time_get/get/char/71557.cc.jj	2021-12-14 13:16:25.956027379 +0100
+++ libstdc++-v3/testsuite/22_locale/time_get/get/char/71557.cc	2021-12-14 13:20:30.628512769 +0100
@@ -0,0 +1,96 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <locale>
+#include <sstream>
+#include <iterator>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  using namespace std;
+
+  locale loc_c = locale::classic();
+
+  istringstream iss;
+  iss.imbue(loc_c);
+  const time_get<char>& tget = use_facet<time_get<char>>(iss.getloc());
+  typedef istreambuf_iterator<char> iter;
+  const iter end;
+
+  tm time;
+  ios_base::iostate err = ios_base::badbit;
+
+  iss.str("20:48:01 MAR 31 2016");
+  string format = "%H:%M:%S %b %d %Y";
+  auto ret = tget.get(iter(iss), end, iss, err, &time,
+		      format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2016 - 1900 );
+  VERIFY( time.tm_mon == 2 );
+  VERIFY( time.tm_mday == 31 );
+  VERIFY( time.tm_hour == 20 );
+  VERIFY( time.tm_min == 48 );
+  VERIFY( time.tm_sec == 01 );
+
+  iss.str("21:38:11 apr 30 2017");
+  ret = tget.get(iter(iss), end, iss, err, &time,
+		 format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2017 - 1900 );
+  VERIFY( time.tm_mon == 3 );
+  VERIFY( time.tm_mday == 30 );
+  VERIFY( time.tm_hour == 21 );
+  VERIFY( time.tm_min == 38 );
+  VERIFY( time.tm_sec == 11 );
+
+  iss.str("22:28:21 mAy 29 2018");
+  ret = tget.get(iter(iss), end, iss, err, &time,
+		 format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2018 - 1900 );
+  VERIFY( time.tm_mon == 4 );
+  VERIFY( time.tm_mday == 29 );
+  VERIFY( time.tm_hour == 22 );
+  VERIFY( time.tm_min == 28 );
+  VERIFY( time.tm_sec == 21 );
+
+  iss.str("23:18:31 JuN 28 2019");
+  ret = tget.get(iter(iss), end, iss, err, &time,
+		 format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2019 - 1900 );
+  VERIFY( time.tm_mon == 5 );
+  VERIFY( time.tm_mday == 28 );
+  VERIFY( time.tm_hour == 23 );
+  VERIFY( time.tm_min == 18 );
+  VERIFY( time.tm_sec == 31 );
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
--- libstdc++-v3/testsuite/22_locale/time_get/get/wchar_t/71557.cc.jj	2021-12-14 13:20:51.981206044 +0100
+++ libstdc++-v3/testsuite/22_locale/time_get/get/wchar_t/71557.cc	2021-12-14 13:21:45.263440673 +0100
@@ -0,0 +1,96 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <locale>
+#include <sstream>
+#include <iterator>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  using namespace std;
+
+  locale loc_c = locale::classic();
+
+  wistringstream iss;
+  iss.imbue(loc_c);
+  const time_get<wchar_t>& tget = use_facet<time_get<wchar_t>>(iss.getloc());
+  typedef istreambuf_iterator<wchar_t> iter;
+  const iter end;
+
+  tm time;
+  ios_base::iostate err = ios_base::badbit;
+
+  iss.str(L"20:48:01 MAR 31 2016");
+  wstring format = L"%H:%M:%S %b %d %Y";
+  auto ret = tget.get(iter(iss), end, iss, err, &time,
+		      format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2016 - 1900 );
+  VERIFY( time.tm_mon == 2 );
+  VERIFY( time.tm_mday == 31 );
+  VERIFY( time.tm_hour == 20 );
+  VERIFY( time.tm_min == 48 );
+  VERIFY( time.tm_sec == 01 );
+
+  iss.str(L"21:38:11 apr 30 2017");
+  ret = tget.get(iter(iss), end, iss, err, &time,
+		 format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2017 - 1900 );
+  VERIFY( time.tm_mon == 3 );
+  VERIFY( time.tm_mday == 30 );
+  VERIFY( time.tm_hour == 21 );
+  VERIFY( time.tm_min == 38 );
+  VERIFY( time.tm_sec == 11 );
+
+  iss.str(L"22:28:21 mAy 29 2018");
+  ret = tget.get(iter(iss), end, iss, err, &time,
+		 format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2018 - 1900 );
+  VERIFY( time.tm_mon == 4 );
+  VERIFY( time.tm_mday == 29 );
+  VERIFY( time.tm_hour == 22 );
+  VERIFY( time.tm_min == 28 );
+  VERIFY( time.tm_sec == 21 );
+
+  iss.str(L"23:18:31 JuN 28 2019");
+  ret = tget.get(iter(iss), end, iss, err, &time,
+		 format.data(), format.data()+format.size());
+  VERIFY( err == ios_base::eofbit );
+  VERIFY( ret == end );
+  VERIFY( time.tm_year == 2019 - 1900 );
+  VERIFY( time.tm_mon == 5 );
+  VERIFY( time.tm_mday == 28 );
+  VERIFY( time.tm_hour == 23 );
+  VERIFY( time.tm_min == 18 );
+  VERIFY( time.tm_sec == 31 );
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}

	Jakub


             reply	other threads:[~2021-12-14 13:49 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-14 13:49 Jakub Jelinek [this message]
2021-12-14 14:08 ` Jonathan Wakely

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211214134950.GW2646553@tucnak \
    --to=jakub@redhat.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=libstdc++@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).