From: Jakub Jelinek <jakub@redhat.com>
To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org
Subject: [PATCH] libstdc++: Poor man's case insensitive comparisons in time_get [PR71557]
Date: Tue, 14 Dec 2021 14:49:50 +0100 [thread overview]
Message-ID: <20211214134950.GW2646553@tucnak> (raw)
Hi!
This patch uses the same not completely correct case insensitive comparisons
as used elsewhere in the same header. Proper comparisons that would handle
even multi-byte characters would be harder, but I don't see them implemented
in __ctype's methods.
Tested on x86_64-linux, ok for trunk?
2021-12-14 Jakub Jelinek <jakub@redhat.com>
PR libstdc++/71557
* include/bits/locale_facets_nonio.tcc (_M_extract_via_format):
Compare characters other than format specifiers and whitespace
case insensitively.
(_M_extract_name): Compare characters case insensitively.
* testsuite/22_locale/time_get/get/char/71557.cc: New test.
* testsuite/22_locale/time_get/get/wchar_t/71557.cc: New test.
--- libstdc++-v3/include/bits/locale_facets_nonio.tcc.jj 2021-12-10 17:04:35.224563127 +0100
+++ libstdc++-v3/include/bits/locale_facets_nonio.tcc 2021-12-14 13:10:40.845984740 +0100
@@ -910,7 +910,9 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
else
{
// Verify format and input match, extract and discard.
- if (__format[__i] == *__beg)
+ // TODO real case-insensitive comparison
+ if (__ctype.tolower(__format[__i]) == __ctype.tolower(*__beg)
+ || __ctype.toupper(__format[__i]) == __ctype.toupper(*__beg))
++__beg;
else
__tmperr |= ios_base::failbit;
@@ -988,15 +990,15 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
bool __begupdated = false;
// Look for initial matches.
- // NB: Some of the locale data is in the form of all lowercase
- // names, and some is in the form of initially-capitalized
- // names. Look for both.
if (__beg != __end)
{
const char_type __c = *__beg;
+ // TODO real case-insensitive comparison
+ const char_type __cl = __ctype.tolower(__c);
+ const char_type __cu = __ctype.toupper(__c);
for (size_t __i1 = 0; __i1 < __indexlen; ++__i1)
- if (__c == __names[__i1][0]
- || __c == __ctype.toupper(__names[__i1][0]))
+ if (__cl == __ctype.tolower(__names[__i1][0])
+ || __cu == __ctype.toupper(__names[__i1][0]))
{
__lengths[__nmatches]
= __traits_type::length(__names[__i1]);
@@ -1023,15 +1025,22 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
bool __match_longer = false;
if (__beg != __end)
- for (size_t __i3 = 0; __i3 < __nmatches; ++__i3)
- {
- __name = __names[__matches[__i3]];
- if (__lengths[__i3] > __pos && (__name[__pos] == *__beg))
- {
- __match_longer = true;
- break;
- }
- }
+ {
+ // TODO real case-insensitive comparison
+ const char_type __cl = __ctype.tolower(*__beg);
+ const char_type __cu = __ctype.toupper(*__beg);
+ for (size_t __i3 = 0; __i3 < __nmatches; ++__i3)
+ {
+ __name = __names[__matches[__i3]];
+ if (__lengths[__i3] > __pos
+ && (__ctype.tolower(__name[__pos]) == __cl
+ || __ctype.toupper(__name[__pos]) == __cu))
+ {
+ __match_longer = true;
+ break;
+ }
+ }
+ }
for (size_t __i4 = 0; __i4 < __nmatches;)
if (__match_longer == (__lengths[__i4] == __pos))
{
@@ -1069,17 +1078,23 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
}
}
if (__pos < __minlen && __beg != __end)
- for (size_t __i6 = 0; __i6 < __nmatches;)
- {
- __name = __names[__matches[__i6]];
- if (!(__name[__pos] == *__beg))
- {
- __matches[__i6] = __matches[--__nmatches];
- __lengths[__i6] = __lengths[__nmatches];
- }
- else
- ++__i6;
- }
+ {
+ // TODO real case-insensitive comparison
+ const char_type __cl = __ctype.tolower(*__beg);
+ const char_type __cu = __ctype.toupper(*__beg);
+ for (size_t __i6 = 0; __i6 < __nmatches;)
+ {
+ __name = __names[__matches[__i6]];
+ if (__ctype.tolower(__name[__pos]) != __cl
+ && __ctype.toupper(__name[__pos]) != __cu)
+ {
+ __matches[__i6] = __matches[--__nmatches];
+ __lengths[__i6] = __lengths[__nmatches];
+ }
+ else
+ ++__i6;
+ }
+ }
else
break;
}
@@ -1094,7 +1109,12 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
}
__name = __names[__matches[0]];
const size_t __len = __lengths[0];
- while (__pos < __len && __beg != __end && __name[__pos] == *__beg)
+ while (__pos < __len
+ && __beg != __end
+ // TODO real case-insensitive comparison
+ && (__ctype.tolower(__name[__pos]) == __ctype.tolower(*__beg)
+ || (__ctype.toupper(__name[__pos])
+ == __ctype.toupper(*__beg))))
++__beg, (void)++__pos;
if (__len == __pos)
--- libstdc++-v3/testsuite/22_locale/time_get/get/char/71557.cc.jj 2021-12-14 13:16:25.956027379 +0100
+++ libstdc++-v3/testsuite/22_locale/time_get/get/char/71557.cc 2021-12-14 13:20:30.628512769 +0100
@@ -0,0 +1,96 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <locale>
+#include <sstream>
+#include <iterator>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+ using namespace std;
+
+ locale loc_c = locale::classic();
+
+ istringstream iss;
+ iss.imbue(loc_c);
+ const time_get<char>& tget = use_facet<time_get<char>>(iss.getloc());
+ typedef istreambuf_iterator<char> iter;
+ const iter end;
+
+ tm time;
+ ios_base::iostate err = ios_base::badbit;
+
+ iss.str("20:48:01 MAR 31 2016");
+ string format = "%H:%M:%S %b %d %Y";
+ auto ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2016 - 1900 );
+ VERIFY( time.tm_mon == 2 );
+ VERIFY( time.tm_mday == 31 );
+ VERIFY( time.tm_hour == 20 );
+ VERIFY( time.tm_min == 48 );
+ VERIFY( time.tm_sec == 01 );
+
+ iss.str("21:38:11 apr 30 2017");
+ ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2017 - 1900 );
+ VERIFY( time.tm_mon == 3 );
+ VERIFY( time.tm_mday == 30 );
+ VERIFY( time.tm_hour == 21 );
+ VERIFY( time.tm_min == 38 );
+ VERIFY( time.tm_sec == 11 );
+
+ iss.str("22:28:21 mAy 29 2018");
+ ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2018 - 1900 );
+ VERIFY( time.tm_mon == 4 );
+ VERIFY( time.tm_mday == 29 );
+ VERIFY( time.tm_hour == 22 );
+ VERIFY( time.tm_min == 28 );
+ VERIFY( time.tm_sec == 21 );
+
+ iss.str("23:18:31 JuN 28 2019");
+ ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2019 - 1900 );
+ VERIFY( time.tm_mon == 5 );
+ VERIFY( time.tm_mday == 28 );
+ VERIFY( time.tm_hour == 23 );
+ VERIFY( time.tm_min == 18 );
+ VERIFY( time.tm_sec == 31 );
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
--- libstdc++-v3/testsuite/22_locale/time_get/get/wchar_t/71557.cc.jj 2021-12-14 13:20:51.981206044 +0100
+++ libstdc++-v3/testsuite/22_locale/time_get/get/wchar_t/71557.cc 2021-12-14 13:21:45.263440673 +0100
@@ -0,0 +1,96 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <locale>
+#include <sstream>
+#include <iterator>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+ using namespace std;
+
+ locale loc_c = locale::classic();
+
+ wistringstream iss;
+ iss.imbue(loc_c);
+ const time_get<wchar_t>& tget = use_facet<time_get<wchar_t>>(iss.getloc());
+ typedef istreambuf_iterator<wchar_t> iter;
+ const iter end;
+
+ tm time;
+ ios_base::iostate err = ios_base::badbit;
+
+ iss.str(L"20:48:01 MAR 31 2016");
+ wstring format = L"%H:%M:%S %b %d %Y";
+ auto ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2016 - 1900 );
+ VERIFY( time.tm_mon == 2 );
+ VERIFY( time.tm_mday == 31 );
+ VERIFY( time.tm_hour == 20 );
+ VERIFY( time.tm_min == 48 );
+ VERIFY( time.tm_sec == 01 );
+
+ iss.str(L"21:38:11 apr 30 2017");
+ ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2017 - 1900 );
+ VERIFY( time.tm_mon == 3 );
+ VERIFY( time.tm_mday == 30 );
+ VERIFY( time.tm_hour == 21 );
+ VERIFY( time.tm_min == 38 );
+ VERIFY( time.tm_sec == 11 );
+
+ iss.str(L"22:28:21 mAy 29 2018");
+ ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2018 - 1900 );
+ VERIFY( time.tm_mon == 4 );
+ VERIFY( time.tm_mday == 29 );
+ VERIFY( time.tm_hour == 22 );
+ VERIFY( time.tm_min == 28 );
+ VERIFY( time.tm_sec == 21 );
+
+ iss.str(L"23:18:31 JuN 28 2019");
+ ret = tget.get(iter(iss), end, iss, err, &time,
+ format.data(), format.data()+format.size());
+ VERIFY( err == ios_base::eofbit );
+ VERIFY( ret == end );
+ VERIFY( time.tm_year == 2019 - 1900 );
+ VERIFY( time.tm_mon == 5 );
+ VERIFY( time.tm_mday == 28 );
+ VERIFY( time.tm_hour == 23 );
+ VERIFY( time.tm_min == 18 );
+ VERIFY( time.tm_sec == 31 );
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
Jakub
next reply other threads:[~2021-12-14 13:49 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-14 13:49 Jakub Jelinek [this message]
2021-12-14 14:08 ` Jonathan Wakely
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211214134950.GW2646553@tucnak \
--to=jakub@redhat.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=libstdc++@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).