public inbox for fortran@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
@ 2021-12-16 20:22 FX
  2021-12-16 21:00 ` Harald Anlauf
  2021-12-16 21:47 ` FX
  0 siblings, 2 replies; 7+ messages in thread
From: FX @ 2021-12-16 20:22 UTC (permalink / raw)
  To: fortran; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 430 bytes --]

Hi,

Functions from <ctype.h> should only be called on values that can be represented by unsigned char. On targets where char is a signed type, some of libgfortran calls have undefined behaviour.

The solution is to cast the argument to unsigned char type. I’ve defined macros in libgfortran.h to do so, to retain legibility of the library code.

Bootstrapped and regtested on x86_64-pc-linux-gnu.
OK to commit?

FX


[-- Attachment #2: pr95177.patch --]
[-- Type: application/octet-stream, Size: 9625 bytes --]

commit a3d0045d510707b82d2bd65722d0ca6cec235caa
Author: François-Xavier Coudert <fxcoudert@gcc.gnu.org>
Date:   2021-12-16 18:38:30 +0100

    Cast arguments of <ctype.h> functions to unsigned char
    
    PR libfortran/95177
    
    libgfortran/ChangeLog
    
            * libgfortran.h: include ctype.h, provide safe macros.
            * io/format.c: use safe macros.
            * io/list_read.c: use safe macros.
            * io/read.c: use safe macros.
            * io/write.c: use safe macros.
            * runtime/environ.c: use safe macros.

diff --git a/libgfortran/io/format.c b/libgfortran/io/format.c
index 56f8dbd858a..927e3785a34 100644
--- a/libgfortran/io/format.c
+++ b/libgfortran/io/format.c
@@ -29,7 +29,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #include "io.h"
 #include "format.h"
-#include <ctype.h>
 #include <string.h>
 
 
@@ -193,7 +192,7 @@ next_char (format_data *fmt, int literal)
 	return -1;
 
       fmt->format_string_len--;
-      c = toupper (*fmt->format_string++);
+      c = safe_toupper (*fmt->format_string++);
       fmt->error_element = c;
     }
   while ((c == ' ' || c == '\t') && !literal);
@@ -328,7 +327,7 @@ format_lex (format_data *fmt)
 
     case '+':
       c = next_char (fmt, 0);
-      if (!isdigit (c))
+      if (!safe_isdigit (c))
 	{
 	  token = FMT_UNKNOWN;
 	  break;
@@ -339,7 +338,7 @@ format_lex (format_data *fmt)
       for (;;)
 	{
 	  c = next_char (fmt, 0);
-	  if (!isdigit (c))
+	  if (!safe_isdigit (c))
 	    break;
 
 	  fmt->value = 10 * fmt->value + c - '0';
@@ -367,7 +366,7 @@ format_lex (format_data *fmt)
       for (;;)
 	{
 	  c = next_char (fmt, 0);
-	  if (!isdigit (c))
+	  if (!safe_isdigit (c))
 	    break;
 
 	  fmt->value = 10 * fmt->value + c - '0';
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index 8cc7ddbe8e2..f902ee4fe1d 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -29,7 +29,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #include "fbuf.h"
 #include "unix.h"
 #include <string.h>
-#include <ctype.h>
 
 typedef unsigned char uchar;
 
@@ -811,7 +810,7 @@ read_logical (st_parameter_dt *dtp, int length)
   if (parse_repeat (dtp))
     return;
 
-  c = tolower (next_char (dtp));
+  c = safe_tolower (next_char (dtp));
   l_push_char (dtp, c);
   switch (c)
     {
@@ -837,7 +836,7 @@ read_logical (st_parameter_dt *dtp, int length)
       break;
 
     case '.':
-      c = tolower (next_char (dtp));
+      c = safe_tolower (next_char (dtp));
       switch (c)
 	{
 	  case 't':
@@ -1052,7 +1051,7 @@ read_integer (st_parameter_dt *dtp, int length)
     }
 
  get_integer:
-  if (!isdigit (c))
+  if (!safe_isdigit (c))
     goto bad_integer;
   push_char (dtp, c);
 
@@ -1303,7 +1302,7 @@ parse_real (st_parameter_dt *dtp, void *buffer, int length)
   if (c == ',' && dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
     c = '.';
 
-  if (!isdigit (c) && c != '.')
+  if (!safe_isdigit (c) && c != '.')
     {
       if (c == 'i' || c == 'I' || c == 'n' || c == 'N')
 	goto inf_nan;
@@ -1377,7 +1376,7 @@ parse_real (st_parameter_dt *dtp, void *buffer, int length)
     }
 
  exp2:
-  if (!isdigit (c))
+  if (!safe_isdigit (c))
     {
       /* Extension: allow default exponent of 0 when omitted.  */
       if (dtp->common.flags & IOPARM_DT_DEC_EXT)
@@ -1748,7 +1747,7 @@ read_real (st_parameter_dt *dtp, void *dest, int length)
   if (c == ',' && dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
     c = '.';
 
-  if (!isdigit (c) && c != '.')
+  if (!safe_isdigit (c) && c != '.')
     {
       if (c == 'i' || c == 'I' || c == 'n' || c == 'N')
 	goto inf_nan;
@@ -1828,7 +1827,7 @@ read_real (st_parameter_dt *dtp, void *dest, int length)
     }
 
  exp2:
-  if (!isdigit (c))
+  if (!safe_isdigit (c))
     {
       /* Extension: allow default exponent of 0 when omitted.  */
       if (dtp->common.flags & IOPARM_DT_DEC_EXT)
@@ -2757,7 +2756,7 @@ nml_match_name (st_parameter_dt *dtp, const char *name, index_type len)
   for (i = 0; i < len; i++)
     {
       c = next_char (dtp);
-      if (c == EOF || (tolower (c) != tolower (name[i])))
+      if (c == EOF || (safe_tolower (c) != safe_tolower (name[i])))
 	{
 	  dtp->u.p.nml_read_error = 1;
 	  break;
@@ -3286,7 +3285,7 @@ get_name:
   do
     {
       if (!is_separator (c))
-	push_char_default (dtp, tolower(c));
+	push_char_default (dtp, safe_tolower(c));
       if ((c = next_char (dtp)) == EOF)
 	goto nml_err_ret;
     }
diff --git a/libgfortran/io/read.c b/libgfortran/io/read.c
index 7515d912c51..7b3f137d687 100644
--- a/libgfortran/io/read.c
+++ b/libgfortran/io/read.c
@@ -28,7 +28,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #include "format.h"
 #include "unix.h"
 #include <string.h>
-#include <ctype.h>
 #include <assert.h>
 #include "async.h"
 
@@ -959,7 +958,7 @@ read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
 	 between "NaN" and the optional perenthesis is not permitted.  */
       while (w > 0)
 	{
-	  *out = tolower (*p);
+	  *out = safe_tolower (*p);
 	  switch (*p)
 	    {
 	    case ' ':
@@ -981,7 +980,7 @@ read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
 		goto bad_float;
 	      break;
 	    default:
-	      if (!isalnum (*out))
+	      if (!safe_isalnum (*out))
 		goto bad_float;
 	    }
 	  --w;
@@ -1109,7 +1108,7 @@ exponent:
 
   if (dtp->u.p.blank_status == BLANK_UNSPECIFIED)
     {
-      while (w > 0 && isdigit (*p))
+      while (w > 0 && safe_isdigit (*p))
 	{
 	  exponent *= 10;
 	  exponent += *p - '0';
@@ -1137,7 +1136,7 @@ exponent:
 	      else
 		assert (dtp->u.p.blank_status == BLANK_NULL);
 	    }
-	  else if (!isdigit (*p))
+	  else if (!safe_isdigit (*p))
 	    goto bad_float;
 	  else
 	    {
diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c
index 278cd47cabb..b9e92845bcf 100644
--- a/libgfortran/io/write.c
+++ b/libgfortran/io/write.c
@@ -30,7 +30,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #include "unix.h"
 #include <assert.h>
 #include <string.h>
-#include <ctype.h>
 
 #define star_fill(p, n) memset(p, '*', n)
 
@@ -2101,14 +2100,14 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info *obj, index_type offset,
 	  base_name_len = strlen (base_name);
 	  for (dim_i = 0; dim_i < base_name_len; dim_i++)
             {
-	      cup = toupper ((int) base_name[dim_i]);
+	      cup = safe_toupper (base_name[dim_i]);
 	      write_character (dtp, &cup, 1, 1, NODELIM);
             }
 	}
       clen = strlen (obj->var_name);
       for (dim_i = len; dim_i < clen; dim_i++)
 	{
-	  cup = toupper ((int) obj->var_name[dim_i]);
+	  cup = safe_toupper (obj->var_name[dim_i]);
 	  if (cup == '+')
 	    cup = '%';
 	  write_character (dtp, &cup, 1, 1, NODELIM);
@@ -2426,7 +2425,7 @@ namelist_write (st_parameter_dt *dtp)
   /* Write namelist name in upper case - f95 std.  */
   for (gfc_charlen_type i = 0; i < dtp->namelist_name_len; i++ )
     {
-      c = toupper ((int) dtp->namelist_name[i]);
+      c = safe_toupper (dtp->namelist_name[i]);
       write_character (dtp, &c, 1 ,1, NODELIM);
     }
 
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index 285c36a00b5..93e3591b21f 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -39,6 +39,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* config.h MUST be first because it can affect system headers.  */
 #include "config.h"
 
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
@@ -103,6 +104,20 @@ typedef off_t gfc_offset;
 #endif
 
 
+/* These functions from <ctype.h> should only be used on values that can be
+   represented as unsigned char, otherwise the behavior is undefined.
+   Some targets have a char type that is signed, so we cast the argument
+   to unsigned char. See:
+     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95177
+     https://wiki.sei.cmu.edu/confluence/x/BNcxBQ
+ */
+
+#define safe_isalnum(x) isalnum((unsigned char) (x))
+#define safe_isdigit(x) isdigit((unsigned char) (x))
+#define safe_tolower(x) tolower((unsigned char) (x))
+#define safe_toupper(x) toupper((unsigned char) (x))
+
+
 /* The following macros can be used to annotate conditions which are likely or
    unlikely to be true.  Avoid using them when a condition is only slightly
    more likely/less unlikely than average to avoid the performance penalties of
diff --git a/libgfortran/runtime/environ.c b/libgfortran/runtime/environ.c
index fe16c080797..ce408cf11af 100644
--- a/libgfortran/runtime/environ.c
+++ b/libgfortran/runtime/environ.c
@@ -26,7 +26,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #include <string.h>
 #include <strings.h>
-#include <ctype.h>
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
@@ -91,7 +90,7 @@ init_integer (variable * v)
     return;
 
   for (q = p; *q; q++)
-    if (!isdigit (*q) && (p != q || *q != '-'))
+    if (!safe_isdigit (*q) && (p != q || *q != '-'))
       return;
 
   *v->var = atoi (p);
@@ -344,7 +343,7 @@ static int
 match_integer (void)
 {
   unit_num = 0;
-  while (isdigit (*p))
+  while (safe_isdigit (*p))
     unit_num = unit_num * 10 + (*p++ - '0');
   return INTEGER;
 }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
  2021-12-16 20:22 [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments FX
@ 2021-12-16 21:00 ` Harald Anlauf
  2021-12-16 21:47 ` FX
  1 sibling, 0 replies; 7+ messages in thread
From: Harald Anlauf @ 2021-12-16 21:00 UTC (permalink / raw)
  To: fortran; +Cc: gcc-patches

Hi FX,

Am 16.12.21 um 21:22 schrieb FX via Fortran:
> Hi,
> 
> Functions from <ctype.h> should only be called on values that can be represented by unsigned char. On targets where char is a signed type, some of libgfortran calls have undefined behaviour.
> 
> The solution is to cast the argument to unsigned char type. I’ve defined macros in libgfortran.h to do so, to retain legibility of the library code.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu.
> OK to commit?
> 
> FX
> 

I think that is basically OK.

However, I am wondering if calling the macros safe_* gives a false
impression of what they actually do.  The cases where they are used
with your patch applied seem sane, for now, though.

Thanks for the patch!

Harald


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
  2021-12-16 20:22 [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments FX
  2021-12-16 21:00 ` Harald Anlauf
@ 2021-12-16 21:47 ` FX
  2021-12-16 23:34   ` FX
  1 sibling, 1 reply; 7+ messages in thread
From: FX @ 2021-12-16 21:47 UTC (permalink / raw)
  To: fortran, gcc-patches; +Cc: Harald Anlauf

Hi Harald,

I’m not on the list for now, please keep me in CC so I don’t miss replies. Thought it feels nice to be working on gfortran again :)


> However, I am wondering if calling the macros safe_* gives a false
> impression of what they actually do.  The cases where they are used
> with your patch applied seem sane, for now, though.

I thought about that, it’s not really “safer” than the underlying C library obviously. I couldn’t find a better name, but I’ll wait before I commit so others can suggest something else.

unrelated PS: I’ve been thinking aloud and benchmarking faster integer I/O for libgfortran at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98076
Comments are welcome on the proposed design, I think the current proposal is a low-hanging fruit (not risky, much faster).


Best,

FX

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
  2021-12-16 21:47 ` FX
@ 2021-12-16 23:34   ` FX
  2021-12-17 20:40     ` Harald Anlauf
  2021-12-18 12:54     ` Thomas Koenig
  0 siblings, 2 replies; 7+ messages in thread
From: FX @ 2021-12-16 23:34 UTC (permalink / raw)
  To: fortran; +Cc: Harald Anlauf

> unrelated PS: I’ve been thinking aloud and benchmarking faster integer I/O for libgfortran at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98076
> Comments are welcome on the proposed design, I think the current proposal is a low-hanging fruit (not risky, much faster).

Quick test integrating the idea into libgfortran, here are the timings to make a formatted write of 10 million integers into a string:

- very small value (1), negligible speedup (2.273s to 2.248s)
- small value (1042), speedup of 28% (3.224s to 2.350s)
- huge(0_8), speed up of 50% (5.914s to 2.560s)
- huge(0_16), speed up of 83% (19.46s to 3.31s)

Conclusion: this looks quite interesting! I’m not sure what use cases people have for writing lots of formatted integers, but this doesn’t sound too bad.

Further thought: fast 64-bit itoa() implementations, under the MIT license (https://github.com/jeaiii/itoa) promise a speed-up of 2 to 10 times compared to naive implementation. That could bring us down further, but we probably cannot incorporate that, right?

Two questions:

1. This is easy, am I missing something? Some reason why it was never tried before?
2. Why is gfc_xtoa() in runtime/error.c? We should probably move it.

Cheers,
FX

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
  2021-12-16 23:34   ` FX
@ 2021-12-17 20:40     ` Harald Anlauf
  2021-12-17 20:40       ` Harald Anlauf
  2021-12-18 12:54     ` Thomas Koenig
  1 sibling, 1 reply; 7+ messages in thread
From: Harald Anlauf @ 2021-12-17 20:40 UTC (permalink / raw)
  To: FX, fortran

Hi FX,

Am 17.12.21 um 00:34 schrieb FX via Fortran:
>> unrelated PS: I’ve been thinking aloud and benchmarking faster integer I/O for libgfortran at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98076
>> Comments are welcome on the proposed design, I think the current proposal is a low-hanging fruit (not risky, much faster).
>
> Quick test integrating the idea into libgfortran, here are the timings to make a formatted write of 10 million integers into a string:
>
> - very small value (1), negligible speedup (2.273s to 2.248s)
> - small value (1042), speedup of 28% (3.224s to 2.350s)
> - huge(0_8), speed up of 50% (5.914s to 2.560s)
> - huge(0_16), speed up of 83% (19.46s to 3.31s)
>
> Conclusion: this looks quite interesting! I’m not sure what use cases people have for writing lots of formatted integers, but this doesn’t sound too bad.

yes, it does!

> Further thought: fast 64-bit itoa() implementations, under the MIT license (https://github.com/jeaiii/itoa) promise a speed-up of 2 to 10 times compared to naive implementation. That could bring us down further, but we probably cannot incorporate that, right?
>
> Two questions:
>
> 1. This is easy, am I missing something? Some reason why it was never tried before?

I can't really answer that, but it appears that having __int128 support
requires a 64bit platform.  How common was that in 2004 when itoa was
added to libgfortran?  Do people who care about performance write lots
of formatted integers?  ;-)

> 2. Why is gfc_xtoa() in runtime/error.c? We should probably move it.

Based on the logs itoa was moved around between files, but xtoa
wasn't.  gfc_xtoa is only used in one file: write.c.

> Cheers,
> FX
>

Harald

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
  2021-12-17 20:40     ` Harald Anlauf
@ 2021-12-17 20:40       ` Harald Anlauf
  0 siblings, 0 replies; 7+ messages in thread
From: Harald Anlauf @ 2021-12-17 20:40 UTC (permalink / raw)
  To: fortran

Hi FX,

Am 17.12.21 um 00:34 schrieb FX via Fortran:
>> unrelated PS: I’ve been thinking aloud and benchmarking faster integer I/O for libgfortran at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98076
>> Comments are welcome on the proposed design, I think the current proposal is a low-hanging fruit (not risky, much faster).
> 
> Quick test integrating the idea into libgfortran, here are the timings to make a formatted write of 10 million integers into a string:
> 
> - very small value (1), negligible speedup (2.273s to 2.248s)
> - small value (1042), speedup of 28% (3.224s to 2.350s)
> - huge(0_8), speed up of 50% (5.914s to 2.560s)
> - huge(0_16), speed up of 83% (19.46s to 3.31s)
> 
> Conclusion: this looks quite interesting! I’m not sure what use cases people have for writing lots of formatted integers, but this doesn’t sound too bad.

yes, it does!

> Further thought: fast 64-bit itoa() implementations, under the MIT license (https://github.com/jeaiii/itoa) promise a speed-up of 2 to 10 times compared to naive implementation. That could bring us down further, but we probably cannot incorporate that, right?
> 
> Two questions:
> 
> 1. This is easy, am I missing something? Some reason why it was never tried before?

I can't really answer that, but it appears that having __int128 support
requires a 64bit platform.  How common was that in 2004 when itoa was
added to libgfortran?  Do people who care about performance write lots
of formatted integers?  ;-)

> 2. Why is gfc_xtoa() in runtime/error.c? We should probably move it.

Based on the logs itoa was moved around between files, but xtoa
wasn't.  gfc_xtoa is only used in one file: write.c.

> Cheers,
> FX
> 

Harald


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments
  2021-12-16 23:34   ` FX
  2021-12-17 20:40     ` Harald Anlauf
@ 2021-12-18 12:54     ` Thomas Koenig
  1 sibling, 0 replies; 7+ messages in thread
From: Thomas Koenig @ 2021-12-18 12:54 UTC (permalink / raw)
  To: FX, fortran; +Cc: Harald Anlauf

On 17.12.21 00:34, FX via Fortran wrote:
>> unrelated PS: I’ve been thinking aloud and benchmarking faster integer I/O for libgfortran at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98076
>> Comments are welcome on the proposed design, I think the current proposal is a low-hanging fruit (not risky, much faster).
> 
> Quick test integrating the idea into libgfortran, here are the timings to make a formatted write of 10 million integers into a string:
> 
> - very small value (1), negligible speedup (2.273s to 2.248s)
> - small value (1042), speedup of 28% (3.224s to 2.350s)
> - huge(0_8), speed up of 50% (5.914s to 2.560s)
> - huge(0_16), speed up of 83% (19.46s to 3.31s)
> 
> Conclusion: this looks quite interesting!

Yes indeed.
> I’m not sure what use cases people have for writing lots of formatted integers, but this doesn’t sound too bad.

If there is an obvoius speed-up to be had, we should go for it, for
a) the coolness value and
b) because somebody might need it :-)

> 
> Further thought: fast 64-bit itoa() implementations, under the MIT license (https://github.com/jeaiii/itoa) promise a speed-up of 2 to 10 times compared to naive implementation. That could bring us down further, but we probably cannot incorporate that, right?

Not sure about that.  We could reverse-engineer the algorithm,
if necessary (and if we are convinced that it is mathematically
correct :-)

> 
> Two questions:
> 
> 1. This is easy, am I missing something? Some reason why it was never tried before?

Round tuits seem to have been lacking :-)

> 2. Why is gfc_xtoa() in runtime/error.c? We should probably move it.

No idea, and yes.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-12-18 12:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-16 20:22 [patch] Fix PR libfortran/95177, ctype.h functions should not be called with char arguments FX
2021-12-16 21:00 ` Harald Anlauf
2021-12-16 21:47 ` FX
2021-12-16 23:34   ` FX
2021-12-17 20:40     ` Harald Anlauf
2021-12-17 20:40       ` Harald Anlauf
2021-12-18 12:54     ` Thomas Koenig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).