* [PATCH] ctype functions and signedness
@ 2015-11-16 15:59 Michael McConville
2015-11-17 0:18 ` DJ Delorie
2015-11-24 23:15 ` Jeff Law
0 siblings, 2 replies; 4+ messages in thread
From: Michael McConville @ 2015-11-16 15:59 UTC (permalink / raw)
To: gcc-patches
Hi, everyone.
While it often (usually?) isn't an issue, passing a signed char to ctype
functions is undefined. Here's the CERT entry:
https://www.securecoding.cert.org/confluence/x/fAs
This means that we need to cast chars to unsigned char before passing
them to one of these functions.
The below patch, generated by a Coccinelle script, fixes instances of
this. It may need some minor tweaks to add line breaks and conform with
local style conventions.
Thanks,
Michael
--- boehm-gc/cord/de_win.c
+++ /tmp/cocci-output-25514-78f80b-de_win.c
@@ -86,7 +86,7 @@ int APIENTRY WinMain (HINSTANCE hInstanc
} else {
char *p = command_line;
- while (*p != 0 && !isspace(*p)) p++;
+ while (*p != 0 && !isspace((unsigned char)*p)) p++;
arg_file_name = CORD_to_char_star(
CORD_substr(command_line, 0, p - command_line));
}
@@ -129,7 +129,7 @@ char * plain_chars(char * text, size_t l
register size_t i;
for (i = 0; i < len; i++) {
- if (iscntrl(text[i])) {
+ if (iscntrl((unsigned char)text[i])) {
result[i] = ' ';
} else {
result[i] = text[i];
@@ -147,7 +147,7 @@ char * control_chars(char * text, size_t
register size_t i;
for (i = 0; i < len; i++) {
- if (iscntrl(text[i])) {
+ if (iscntrl((unsigned char)text[i])) {
result[i] = text[i] + 0x40;
} else {
result[i] = ' ';
--- boehm-gc/os_dep.c
+++ /tmp/cocci-output-25514-d4c0bc-os_dep.c
@@ -271,31 +271,31 @@ char *GC_parse_map_entry(char *buf_ptr,
}
p = buf_ptr;
- while (isspace(*p)) ++p;
+ while (isspace((unsigned char)*p)) ++p;
start_start = p;
- GC_ASSERT(isxdigit(*start_start));
+ GC_ASSERT(isxdigit((unsigned char)*start_start));
*start = strtoul(start_start, &endp, 16); p = endp;
GC_ASSERT(*p=='-');
++p;
end_start = p;
- GC_ASSERT(isxdigit(*end_start));
+ GC_ASSERT(isxdigit((unsigned char)*end_start));
*end = strtoul(end_start, &endp, 16); p = endp;
- GC_ASSERT(isspace(*p));
+ GC_ASSERT(isspace((unsigned char)*p));
- while (isspace(*p)) ++p;
+ while (isspace((unsigned char)*p)) ++p;
prot_start = p;
GC_ASSERT(*prot_start == 'r' || *prot_start == '-');
memcpy(prot_buf, prot_start, 4);
prot_buf[4] = '\0';
if (prot_buf[1] == 'w') {/* we can skip the rest if it's not writable. */
/* Skip past protection field to offset field */
- while (!isspace(*p)) ++p; while (isspace(*p)) ++p;
- GC_ASSERT(isxdigit(*p));
+ while (!isspace((unsigned char)*p)) ++p; while (isspace((unsigned char)*p)) ++p;
+ GC_ASSERT(isxdigit((unsigned char)*p));
/* Skip past offset field, which we ignore */
- while (!isspace(*p)) ++p; while (isspace(*p)) ++p;
+ while (!isspace((unsigned char)*p)) ++p; while (isspace((unsigned char)*p)) ++p;
maj_dev_start = p;
- GC_ASSERT(isxdigit(*maj_dev_start));
+ GC_ASSERT(isxdigit((unsigned char)*maj_dev_start));
*maj_dev = strtoul(maj_dev_start, NULL, 16);
}
@@ -969,11 +969,11 @@ ptr_t GC_get_stack_base()
/* Skip the required number of fields. This number is hopefully */
/* constant across all Linux implementations. */
for (i = 0; i < STAT_SKIP; ++i) {
- while (isspace(c)) c = stat_buf[buf_offset++];
- while (!isspace(c)) c = stat_buf[buf_offset++];
+ while (isspace((unsigned char)c)) c = stat_buf[buf_offset++];
+ while (!isspace((unsigned char)c)) c = stat_buf[buf_offset++];
}
- while (isspace(c)) c = stat_buf[buf_offset++];
- while (isdigit(c)) {
+ while (isspace((unsigned char)c)) c = stat_buf[buf_offset++];
+ while (isdigit((unsigned char)c)) {
result *= 10;
result += c - '0';
c = stat_buf[buf_offset++];
--- gcc/testsuite/gcc.dg/charset/builtin1.c
+++ /tmp/cocci-output-20442-6d79bd-builtin1.c
@@ -14,9 +14,9 @@ static int strA(void) { return 'A'; }
int
main(void)
{
- if (!isdigit('1'))
+ if (!isdigit((unsigned char)'1'))
abort();
- if (isdigit('A'))
+ if (isdigit((unsigned char)'A'))
abort();
if (!isdigit(str1()))
abort();
--- gcc/testsuite/gcc.dg/torture/pr67821.c
+++ /tmp/cocci-output-18483-79f7b5-pr67821.c
@@ -7,9 +7,9 @@ foo (const char *s)
{
int success = 1;
const char *p = s + 2;
- if (!isdigit (*p))
+ if (!isdigit ((unsigned char)*p))
success = 0;
- while (isdigit (*p))
+ while (isdigit ((unsigned char)*p))
++p;
return success;
}
--- gcc/ada/adaint.c
+++ /tmp/cocci-output-21151-00798e-adaint.c
@@ -1628,7 +1628,7 @@ __gnat_is_absolute_path (char *name, int
{
if (name[index] == ':' &&
((name[index + 1] == '/') ||
- (isalpha (name[index + 1]) && index + 2 <= length &&
+ (isalpha ((unsigned char)name[index + 1]) && index + 2 <= length &&
name[index + 2] == '/')))
return 1;
--- gcc/config/darwin.c
+++ /tmp/cocci-output-11137-daf870-darwin.c
@@ -3471,7 +3471,7 @@ darwin_build_constant_cfstring (tree str
int l = 0;
for (l = 0; l < length; l++)
- if (!s[l] || !isascii (s[l]))
+ if (!s[l] || !isascii ((unsigned char)s[l]))
{
warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal",
s[l] ? "non-ASCII character" : "embedded NUL");
--- libcpp/makeucnid.c
+++ /tmp/cocci-output-1465-01eaaf-makeucnid.c
@@ -81,7 +81,7 @@ read_ucnid (const char *fname)
fl = C11;
else if (strcmp (line, "[C11NOSTART]\n") == 0)
fl = C11|N11;
- else if (isxdigit (line[0]))
+ else if (isxdigit ((unsigned char)line[0]))
{
char *l = line;
while (*l)
@@ -89,7 +89,7 @@ read_ucnid (const char *fname)
unsigned long start, end;
char *endptr;
start = strtoul (l, &endptr, 16);
- if (endptr == l || (*endptr != '-' && ! isspace (*endptr)))
+ if (endptr == l || (*endptr != '-' && ! isspace ((unsigned char)*endptr)))
fail ("parsing ucnid.tab [1]");
l = endptr;
if (*l != '-')
@@ -100,10 +100,10 @@ read_ucnid (const char *fname)
if (end < start)
fail ("parsing ucnid.tab, end before start");
l = endptr;
- if (! isspace (*l))
+ if (! isspace ((unsigned char)*l))
fail ("parsing ucnid.tab, junk after range");
}
- while (isspace (*l))
+ while (isspace ((unsigned char)*l))
l++;
if (end > MAX_CODE_POINT)
fail ("parsing ucnid.tab, end too large");
@@ -156,7 +156,7 @@ read_table (char *fname)
} while (*l != ';');
/* Canonical combining class; in NFC/NFKC, they must be increasing
(or zero). */
- if (! isdigit (*++l))
+ if (! isdigit ((unsigned char)*++l))
fail ("parsing UnicodeData.txt, combining class not number");
combining_value[codepoint] = strtoul (l, &l, 10);
if (*l++ != ';')
@@ -175,11 +175,11 @@ read_table (char *fname)
{
if (*l == ';')
break;
- if (!isxdigit (*l))
+ if (!isxdigit ((unsigned char)*l))
fail ("parsing UnicodeData.txt, decomposition format");
this_decomp[i] = strtoul (l, &l, 16);
decomp_useful &= flags[this_decomp[i]];
- while (isspace (*l))
+ while (isspace ((unsigned char)*l))
l++;
}
if (i > 2) /* Decomposition too long. */
--- libgfortran/runtime/environ.c
+++ /tmp/cocci-output-8814-ef5a91-environ.c
@@ -80,7 +80,7 @@ init_integer (variable * v)
return;
for (q = p; *q; q++)
- if (!isdigit (*q) && (p != q || *q != '-'))
+ if (!isdigit ((unsigned char)*q) && (p != q || *q != '-'))
return;
*v->var = atoi (p);
@@ -99,7 +99,7 @@ init_unsigned_integer (variable * v)
return;
for (q = p; *q; q++)
- if (!isdigit (*q))
+ if (!isdigit ((unsigned char)*q))
return;
*v->var = atoi (p);
@@ -348,7 +348,7 @@ static int
match_integer (void)
{
unit_num = 0;
- while (isdigit (*p))
+ while (isdigit ((unsigned char)*p))
unit_num = unit_num * 10 + (*p++ - '0');
return INTEGER;
}
--- libgfortran/io/list_read.c
+++ /tmp/cocci-output-7054-3fc663-list_read.c
@@ -2656,7 +2656,7 @@ nml_match_name (st_parameter_dt *dtp, co
for (i = 0; i < len; i++)
{
c = next_char (dtp);
- if (c == EOF || (tolower (c) != tolower (name[i])))
+ if (c == EOF || (tolower (c) != tolower ((unsigned char)name[i])))
{
dtp->u.p.nml_read_error = 1;
break;
--- libgfortran/io/read.c
+++ /tmp/cocci-output-7054-209e45-read.c
@@ -947,7 +947,7 @@ read_f (st_parameter_dt *dtp, const fnod
between "NaN" and the optional perenthesis is not permitted. */
while (w > 0)
{
- *out = tolower (*p);
+ *out = tolower ((unsigned char)*p);
switch (*p)
{
case ' ':
@@ -969,7 +969,7 @@ read_f (st_parameter_dt *dtp, const fnod
goto bad_float;
break;
default:
- if (!isalnum (*out))
+ if (!isalnum ((unsigned char)*out))
goto bad_float;
}
--w;
@@ -1091,7 +1091,7 @@ exponent:
if (dtp->u.p.blank_status == BLANK_UNSPECIFIED)
{
- while (w > 0 && isdigit (*p))
+ while (w > 0 && isdigit ((unsigned char)*p))
{
exponent *= 10;
exponent += *p - '0';
@@ -1119,7 +1119,7 @@ exponent:
else
assert (dtp->u.p.blank_status == BLANK_NULL);
}
- else if (!isdigit (*p))
+ else if (!isdigit ((unsigned char)*p))
goto bad_float;
else
{
--- libgfortran/io/format.c
+++ /tmp/cocci-output-16280-1f8fd7-format.c
@@ -194,7 +194,7 @@ next_char (format_data *fmt, int literal
return -1;
fmt->format_string_len--;
- c = toupper (*fmt->format_string++);
+ c = toupper ((unsigned char)*fmt->format_string++);
fmt->error_element = c;
}
while ((c == ' ' || c == '\t') && !literal);
--- libgo/go/regexp/testdata/testregex.c
+++ /tmp/cocci-output-11909-272cb0-testregex.c
@@ -1267,7 +1267,7 @@ main(int argc, char** argv)
state.NOMATCH.rm_so = state.NOMATCH.rm_eo = -2;
p = unit;
version = (char*)id + 10;
- while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace(*p))
+ while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace((unsigned char)*p))
p++;
*p = 0;
while ((p = *++argv) && *p == '-')
@@ -1452,7 +1452,7 @@ main(int argc, char** argv)
/* parse: */
line = p;
- if (*p == ':' && !isspace(*(p + 1)))
+ if (*p == ':' && !isspace((unsigned char)*(p + 1)))
{
while (*++p && *p != ':');
if (!*p++)
@@ -1462,7 +1462,7 @@ main(int argc, char** argv)
continue;
}
}
- while (isspace(*p))
+ while (isspace((unsigned char)*p))
p++;
if (*p == 0 || *p == '#' || *p == 'T')
{
@@ -1476,8 +1476,8 @@ main(int argc, char** argv)
printf("%s\n", line);
else if (!(test & (TEST_ACTUAL|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
{
- while (*++p && !isspace(*p));
- while (isspace(*p))
+ while (*++p && !isspace((unsigned char)*p));
+ while (isspace((unsigned char)*p))
p++;
printf("NOTE %s\n", p);
}
@@ -1533,7 +1533,7 @@ main(int argc, char** argv)
nsub = -1;
for (p = spec; *p; p++)
{
- if (isdigit(*p))
+ if (isdigit((unsigned char)*p))
{
nmatch = strtol(p, &p, 10);
if (nmatch >= elementsof(match))
--- libiberty/pex-win32.c
+++ /tmp/cocci-output-25924-3a75ca-pex-win32.c
@@ -547,8 +547,8 @@ env_compare (const void *a_ptr, const vo
do
{
- c1 = (unsigned char) tolower (*a++);
- c2 = (unsigned char) tolower (*b++);
+ c1 = (unsigned char) tolower ((unsigned char)*a++);
+ c2 = (unsigned char) tolower ((unsigned char)*b++);
if (c1 == '=')
c1 = '\0';
--- libobjc/gc.c
+++ /tmp/cocci-output-2585-fc246d-gc.c
@@ -68,7 +68,7 @@ __objc_gc_setup_array (GC_bitmap mask, c
{
int i, len = atoi (type + 1);
- while (isdigit (*++type))
+ while (isdigit ((unsigned char)*++type))
/* do nothing */; /* skip the size of the array */
switch (*type) {
--- libquadmath/printf/quadmath-printf.c
+++ /tmp/cocci-output-27535-6b9ccc-quadmath-printf.c
@@ -189,7 +189,7 @@ quadmath_snprintf (char *str, size_t siz
++format;
info.width = va_arg (ap, int);
}
- else if (isdigit (*format))
+ else if (isdigit ((unsigned char)*format))
/* Constant width specification. */
info.width = read_int (&format);
@@ -206,7 +206,7 @@ quadmath_snprintf (char *str, size_t siz
info.prec = va_arg (ap, int);
}
- else if (isdigit (*format))
+ else if (isdigit ((unsigned char)*format))
info.prec = read_int (&format);
else
/* "%.?" is treated like "%.0?". */
--- libquadmath/printf/printf_fphex.c
+++ /tmp/cocci-output-25463-ffa004-printf_fphex.c
@@ -169,7 +169,7 @@ __quadmath_printf_fphex (struct __quadma
if (isnanq (fpnum.value))
{
negative = fpnum.ieee.negative != 0;
- if (isupper (info->spec))
+ if (isupper ((unsigned char)info->spec))
{
special = "NAN";
wspecial = L_("NAN");
@@ -184,7 +184,7 @@ __quadmath_printf_fphex (struct __quadma
{
if (isinfq (fpnum.value))
{
- if (isupper (info->spec))
+ if (isupper ((unsigned char)info->spec))
{
special = "INF";
wspecial = L_("INF");
@@ -363,7 +363,7 @@ __quadmath_printf_fphex (struct __quadma
think about it! */
break;
}
- else if (tolower (ch) < 'f')
+ else if (tolower ((unsigned char)ch) < 'f')
{
++numstr[cnt];
++wnumstr[cnt];
@@ -382,7 +382,7 @@ __quadmath_printf_fphex (struct __quadma
get an overflow. */
if (leading == '9')
leading = info->spec;
- else if (tolower (leading) < 'f')
+ else if (tolower ((unsigned char)leading) < 'f')
++leading;
else
{
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] ctype functions and signedness
2015-11-16 15:59 [PATCH] ctype functions and signedness Michael McConville
@ 2015-11-17 0:18 ` DJ Delorie
2015-11-17 0:24 ` Michael McConville
2015-11-24 23:15 ` Jeff Law
1 sibling, 1 reply; 4+ messages in thread
From: DJ Delorie @ 2015-11-17 0:18 UTC (permalink / raw)
To: Michael McConville; +Cc: gcc-patches
> --- libiberty/pex-win32.c
> +++ /tmp/cocci-output-25924-3a75ca-pex-win32.c
> @@ -547,8 +547,8 @@ env_compare (const void *a_ptr, const vo
>
> do
> {
> - c1 = (unsigned char) tolower (*a++);
> - c2 = (unsigned char) tolower (*b++);
> + c1 = (unsigned char) tolower ((unsigned char)*a++);
> + c2 = (unsigned char) tolower ((unsigned char)*b++);
>
> if (c1 == '=')
> c1 = '\0';
Since the only use of a and b in this function are to pass to tolower,
changing the type of a and b to unsigned char (and updating the casts
where they're initialized) would make more sense.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] ctype functions and signedness
2015-11-17 0:18 ` DJ Delorie
@ 2015-11-17 0:24 ` Michael McConville
0 siblings, 0 replies; 4+ messages in thread
From: Michael McConville @ 2015-11-17 0:24 UTC (permalink / raw)
To: DJ Delorie; +Cc: gcc-patches
DJ Delorie wrote:
>
> > --- libiberty/pex-win32.c
> > +++ /tmp/cocci-output-25924-3a75ca-pex-win32.c
> > @@ -547,8 +547,8 @@ env_compare (const void *a_ptr, const vo
> >
> > do
> > {
> > - c1 = (unsigned char) tolower (*a++);
> > - c2 = (unsigned char) tolower (*b++);
> > + c1 = (unsigned char) tolower ((unsigned char)*a++);
> > + c2 = (unsigned char) tolower ((unsigned char)*b++);
> >
> > if (c1 == '=')
> > c1 = '\0';
>
> Since the only use of a and b in this function are to pass to tolower,
> changing the type of a and b to unsigned char (and updating the casts
> where they're initialized) would make more sense.
True. This patch was generated with an automated tool (Coccinelle). I
figured that it'd be easiest to send an initial patch and let someone
with a commit bit tweak it as needed. I've never worked with GCC before,
so I have no idea what the style conventions are.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] ctype functions and signedness
2015-11-16 15:59 [PATCH] ctype functions and signedness Michael McConville
2015-11-17 0:18 ` DJ Delorie
@ 2015-11-24 23:15 ` Jeff Law
1 sibling, 0 replies; 4+ messages in thread
From: Jeff Law @ 2015-11-24 23:15 UTC (permalink / raw)
To: Michael McConville, gcc-patches
On 11/16/2015 08:59 AM, Michael McConville wrote:
> Hi, everyone.
>
> While it often (usually?) isn't an issue, passing a signed char to ctype
> functions is undefined. Here's the CERT entry:
>
> https://www.securecoding.cert.org/confluence/x/fAs
>
> This means that we need to cast chars to unsigned char before passing
> them to one of these functions.
>
> The below patch, generated by a Coccinelle script, fixes instances of
> this. It may need some minor tweaks to add line breaks and conform with
> local style conventions.
The changes to the boehm-gc directory really should go to the upstream
project. I wouldn't be terribly surprised if boehm-gc for GCC gets
deprecated given the trajectory GCJ is on.
> --- gcc/testsuite/gcc.dg/charset/builtin1.c
> +++ /tmp/cocci-output-20442-6d79bd-builtin1.c
> @@ -14,9 +14,9 @@ static int strA(void) { return 'A'; }
> int
> main(void)
> {
> - if (!isdigit('1'))
> + if (!isdigit((unsigned char)'1'))
> abort();
> - if (isdigit('A'))
> + if (isdigit((unsigned char)'A'))
> abort();
> if (!isdigit(str1()))
> abort();
This change actually fails regression testing. We're using
-fexec-charset=IBM1047 (EBCDIC). So '1' is -15, turned into an unsigned
char, 241 and boom, it fails isdigit.
While I'm not terribly concerned about EBCDIC within GCC itself, it does
bring into question the changes for the testsuites & runtimes in particular.
Jeff
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-11-24 23:01 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-16 15:59 [PATCH] ctype functions and signedness Michael McConville
2015-11-17 0:18 ` DJ Delorie
2015-11-17 0:24 ` Michael McConville
2015-11-24 23:15 ` Jeff Law
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).