public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] stdlib: fix grouping verification with multi-byte thousands separator
@ 2023-10-11 14:49 Andreas Schwab
  2023-10-12  8:25 ` Florian Weimer
  0 siblings, 1 reply; 4+ messages in thread
From: Andreas Schwab @ 2023-10-11 14:49 UTC (permalink / raw)
  To: libc-alpha

The grouping verification only worked for a single-byte thousands
separator.  With a multi-byte separator it returned as if no separators
were present.  The actual parsing in str_to_mpn will then go wrong when
there are multiple adjacent multi-byte separators in the number.
---
 stdlib/grouping.c    | 33 ++++++++++-----------------------
 stdlib/tst-strtod4.c |  3 ++-
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/stdlib/grouping.c b/stdlib/grouping.c
index b6bf1dbab2..16b266d3e0 100644
--- a/stdlib/grouping.c
+++ b/stdlib/grouping.c
@@ -59,7 +59,6 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
   size_t thousands_len = 1;
 #else
   size_t thousands_len = strlen (thousands);
-  int cnt;
 #endif
 
   while (end - begin >= thousands_len)
@@ -74,14 +73,8 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 	  if (*cp == thousands)
 	    break;
 #else
-	  if (cp[thousands_len - 1] == *thousands)
-	    {
-	      for (cnt = 1; thousands[cnt] != '\0'; ++cnt)
-		if (thousands[cnt] != cp[thousands_len - 1 - cnt])
-		  break;
-	      if (thousands[cnt] == '\0')
-		break;
-	    }
+	  if (memcmp (cp, thousands, thousands_len) == 0)
+	    break;
 #endif
 	  --cp;
 	}
@@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
       if (cp < begin)
 	return end;
 
-      if (end - cp == (int) *gp + 1)
+      if (end - cp == (int) *gp + thousands_len)
 	{
 	  /* This group matches the specification.  */
 
@@ -105,7 +98,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 	     remainder of the string from BEGIN to NEW_END is the part we
 	     will consider if there is a grouping error in this trailing
 	     portion from CP to END.  */
-	  new_end = cp - 1;
+	  new_end = cp;
 
 	  /* Loop while the grouping is correct.  */
 	  while (1)
@@ -132,10 +125,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 		      if (*cp == thousands)
 			break;
 #else
-		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
-			if (thousands[cnt] != cp[thousands_len - cnt - 1])
-			  break;
-		      if (thousands[cnt] == '\0')
+		      if (memcmp (cp, thousands, thousands_len) == 0)
 			break;
 #endif
 		      --cp;
@@ -156,20 +146,17 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 		      if (*cp == thousands)
 			break;
 #else
-		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
-			if (thousands[cnt] != cp[thousands_len - cnt - 1])
-			  break;
-		      if (thousands[cnt] == '\0')
+		      if (memcmp (cp, thousands, thousands_len) == 0)
 			break;
 #endif
 		      --cp;
 		    }
 
-		  if (cp < begin && group_end - cp <= (int) *gp)
+		  if (cp < begin && group_end - cp <= (int) *gp + thousands_len - 1)
 		    /* Final group is correct.  */
 		    return end;
 
-		  if (cp < begin || group_end - cp != (int) *gp)
+		  if (cp < begin || group_end - cp != (int) *gp + thousands_len - 1)
 		    /* Incorrect group.  Punt.  */
 		    break;
 		}
@@ -183,8 +170,8 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
       else
 	{
 	  /* Even the first group was wrong; determine maximum shift.  */
-	  if (end - cp > (int) *gp + 1)
-	    end = cp + (int) *gp + 1;
+	  if (end - cp > (int) *gp + thousands_len)
+	    end = cp + (int) *gp + thousands_len;
 	  else if (cp < begin)
 	    /* This number does not fill the first group, but is correct.  */
 	    return end;
diff --git a/stdlib/tst-strtod4.c b/stdlib/tst-strtod4.c
index aae9835d82..841b525836 100644
--- a/stdlib/tst-strtod4.c
+++ b/stdlib/tst-strtod4.c
@@ -13,7 +13,8 @@ static const struct
 } tests[] =
   {
     { "000"NNBSP"000"NNBSP"000", "", 0.0 },
-    { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 }
+    { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 },
+    { "10"NNBSP NNBSP"200", NNBSP NNBSP"200", 10.0 }
   };
 #define NTESTS (sizeof (tests) / sizeof (tests[0]))
 
-- 
2.42.0


-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] stdlib: fix grouping verification with multi-byte thousands separator
  2023-10-11 14:49 [PATCH] stdlib: fix grouping verification with multi-byte thousands separator Andreas Schwab
@ 2023-10-12  8:25 ` Florian Weimer
  2023-10-12  9:28   ` Andreas Schwab
  0 siblings, 1 reply; 4+ messages in thread
From: Florian Weimer @ 2023-10-12  8:25 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: libc-alpha

* Andreas Schwab:

> @@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
>        if (cp < begin)
>  	return end;
>  
> -      if (end - cp == (int) *gp + 1)
> +      if (end - cp == (int) *gp + thousands_len)
>  	{
>  	  /* This group matches the specification.  */

This still does not work for alternative digits, right?  But that's an
existing limitation of strto[dl]?

Does this need a bug?  Is grouping validation required by a standard?

The patch itself looks okay to me.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] stdlib: fix grouping verification with multi-byte thousands separator
  2023-10-12  8:25 ` Florian Weimer
@ 2023-10-12  9:28   ` Andreas Schwab
  2023-10-12  9:33     ` Florian Weimer
  0 siblings, 1 reply; 4+ messages in thread
From: Andreas Schwab @ 2023-10-12  9:28 UTC (permalink / raw)
  To: Florian Weimer; +Cc: libc-alpha

On Okt 12 2023, Florian Weimer wrote:

> * Andreas Schwab:
>
>> @@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
>>        if (cp < begin)
>>  	return end;
>>  
>> -      if (end - cp == (int) *gp + 1)
>> +      if (end - cp == (int) *gp + thousands_len)
>>  	{
>>  	  /* This group matches the specification.  */
>
> This still does not work for alternative digits, right?  But that's an
> existing limitation of strto[dl]?

strtod only supports [0-9].

> Does this need a bug?

Bug 30964.

> Is grouping validation required by a standard?

I don't think so, but this makes the behaviour consistent with
locales that have single-byte separators.  Note that str_to_mpn will
malfunction if the number contains multiple adjacent separators.

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] stdlib: fix grouping verification with multi-byte thousands separator
  2023-10-12  9:28   ` Andreas Schwab
@ 2023-10-12  9:33     ` Florian Weimer
  0 siblings, 0 replies; 4+ messages in thread
From: Florian Weimer @ 2023-10-12  9:33 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: libc-alpha

* Andreas Schwab:

> On Okt 12 2023, Florian Weimer wrote:
>
>> * Andreas Schwab:
>>
>>> @@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
>>>        if (cp < begin)
>>>  	return end;
>>>  
>>> -      if (end - cp == (int) *gp + 1)
>>> +      if (end - cp == (int) *gp + thousands_len)
>>>  	{
>>>  	  /* This group matches the specification.  */
>>
>> This still does not work for alternative digits, right?  But that's an
>> existing limitation of strto[dl]?
>
> strtod only supports [0-9].
>
>> Does this need a bug?
>
> Bug 30964.

Thanks, please commit this with the bug reference in the commit message.

Florian


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-10-12  9:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-11 14:49 [PATCH] stdlib: fix grouping verification with multi-byte thousands separator Andreas Schwab
2023-10-12  8:25 ` Florian Weimer
2023-10-12  9:28   ` Andreas Schwab
2023-10-12  9:33     ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).