PR tree-optimization/86853 - sprintf optimization for wide strings doesn't account for conversion failure gcc/ChangeLog: PR tree-optimization/86853 * gimple-ssa-sprintf.c (struct format_result): Rename member. (struct fmtresult): Add member and initialize it in ctors. (format_character): Handle %C. Extend range to NUL. Set MAYFAIL. (format_string): Handle %S the same as %ls. Set MAYFAIL. (format_directive): Set POSUNDER4K when MAYFAIL is set. (parse_directive): Handle %C same as %c. (sprintf_dom_walker::compute_format_length): Adjust. (is_call_safe): Adjust. gcc/testsuite/ChangeLog: PR tree-optimization/86853 * gcc.dg/tree-ssa/builtin-sprintf-10.c: New test. * gcc.dg/tree-ssa/builtin-sprintf-11.c: New test. * gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Adjust. Index: gcc/gimple-ssa-sprintf.c =================================================================== --- gcc/gimple-ssa-sprintf.c (revision 263295) +++ gcc/gimple-ssa-sprintf.c (working copy) @@ -211,12 +211,14 @@ struct format_result the return value optimization. */ bool knownrange; - /* True if no individual directive resulted in more than 4095 bytes - of output (the total NUMBER_CHARS_{MIN,MAX} might be greater). - Implementations are not required to handle directives that produce - more than 4K bytes (leading to undefined behavior) and so when one - is found it disables the return value optimization. */ - bool under4k; + /* True if no individual directive could fail or result in more than + 4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be + greater). Implementations are not required to handle directives + that produce more than 4K bytes (leading to undefined behavior) + and so when one is found it disables the return value optimization. + Similarly, directives that can fail (such as wide character + directives) disable the optimization. */ + bool posunder4k; /* True when a floating point directive has been seen in the format string. */ @@ -650,7 +652,7 @@ struct fmtresult fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX) : argmin (), argmax (), knownrange (min < HOST_WIDE_INT_MAX), - nullp () + mayfail (), nullp () { range.min = min; range.max = min; @@ -664,7 +666,7 @@ struct fmtresult unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX) : argmin (), argmax (), knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX), - nullp () + mayfail (), nullp () { range.min = min; range.max = max; @@ -694,6 +696,10 @@ struct fmtresult heuristics that depend on warning levels. */ bool knownrange; + /* True for a directive that may fail (such as wide character + directives). */ + bool mayfail; + /* True when the argument is a null pointer. */ bool nullp; }; @@ -2202,7 +2208,8 @@ format_character (const directive &dir, tree arg, res.knownrange = true; - if (dir.modifier == FMT_LEN_l) + if (dir.specifier == 'C' + || dir.modifier == FMT_LEN_l) { /* A wide character can result in as few as zero bytes. */ res.range.min = 0; @@ -2217,13 +2224,20 @@ format_character (const directive &dir, tree arg, res.range.likely = 0; res.range.unlikely = 0; } - else if (min > 0 && min < 128) + else if (min >= 0 && min < 128) { + /* Be conservative if the target execution character set + is not a 1-to-1 mapping to the source character set or + if the source set is not ASCII. */ + bool one_2_one_ascii + = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97); + /* A wide character in the ASCII range most likely results in a single byte, and only unlikely in up to MB_LEN_MAX. */ - res.range.max = 1; + res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();; res.range.likely = 1; res.range.unlikely = target_mb_len_max (); + res.mayfail = !one_2_one_ascii; } else { @@ -2232,6 +2246,8 @@ format_character (const directive &dir, tree arg, res.range.max = target_mb_len_max (); res.range.likely = 2; res.range.unlikely = res.range.max; + /* Converting such a character may fail. */ + res.mayfail = true; } } else @@ -2241,6 +2257,7 @@ format_character (const directive &dir, tree arg, res.range.max = target_mb_len_max (); res.range.likely = 2; res.range.unlikely = res.range.max; + res.mayfail = true; } } else @@ -2276,7 +2293,8 @@ format_string (const directive &dir, tree arg, vr_ /* A '%s' directive with a string argument with constant length. */ res.range = slen.range; - if (dir.modifier == FMT_LEN_l) + if (dir.specifier == 'S' + || dir.modifier == FMT_LEN_l) { /* In the worst case the length of output of a wide string S is bounded by MB_LEN_MAX * wcslen (S). */ @@ -2302,6 +2320,10 @@ format_string (const directive &dir, tree arg, vr_ /* Even a non-empty wide character string need not convert into any bytes. */ res.range.min = 0; + + /* A non-emtpy wide character conversion may fail. */ + if (slen.range.max > 0) + res.mayfail = true; } else { @@ -2340,7 +2362,8 @@ format_string (const directive &dir, tree arg, vr_ at level 2. This result is adjust upward for width (if it's specified). */ - if (dir.modifier == FMT_LEN_l) + if (dir.specifier == 'S' + || dir.modifier == FMT_LEN_l) { /* A wide character converts to as few as zero bytes. */ slen.range.min = 0; @@ -2352,6 +2375,10 @@ format_string (const directive &dir, tree arg, vr_ if (slen.range.likely < target_int_max ()) slen.range.unlikely *= target_mb_len_max (); + + /* A non-emtpy wide character conversion may fail. */ + if (slen.range.max > 0) + res.mayfail = true; } res.range = slen.range; @@ -2904,11 +2931,14 @@ format_directive (const sprintf_dom_walker::call_i of 4095 bytes required to be supported? */ bool minunder4k = fmtres.range.min < 4096; bool maxunder4k = fmtres.range.max < 4096; - /* Clear UNDER4K in the overall result if the maximum has exceeded - the 4k (this is necessary to avoid the return valuye optimization + /* Clear POSUNDER4K in the overall result if the maximum has exceeded + the 4k (this is necessary to avoid the return value optimization that may not be safe in the maximum case). */ if (!maxunder4k) - res->under4k = false; + res->posunder4k = false; + /* Also clear POSUNDER4K if the directive may fail. */ + if (fmtres.mayfail) + res->posunder4k = false; if (!warned /* Only warn at level 2. */ @@ -3354,12 +3384,15 @@ parse_directive (sprintf_dom_walker::call_info &in dir.fmtfunc = format_none; break; + case 'C': case 'c': + /* POSIX wide character and C/POSIX narrow character. */ dir.fmtfunc = format_character; break; case 'S': case 's': + /* POSIX wide string and C/POSIX narrow character string. */ dir.fmtfunc = format_string; break; @@ -3517,10 +3550,10 @@ sprintf_dom_walker::compute_format_length (call_in res->range.min = res->range.max = 0; /* No directive has been seen yet so the length of output is bounded - by the known range [0, 0] (with no conversion producing more than - 4K bytes) until determined otherwise. */ + by the known range [0, 0] (with no conversion resulting in a failure + or producing more than 4K bytes) until determined otherwise. */ res->knownrange = true; - res->under4k = true; + res->posunder4k = true; res->floating = false; res->warned = false; @@ -3588,7 +3621,7 @@ is_call_safe (const sprintf_dom_walker::call_info const format_result &res, bool under4k, unsigned HOST_WIDE_INT retval[2]) { - if (under4k && !res.under4k) + if (under4k && !res.posunder4k) return false; /* The minimum return value. */ Index: gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-10.c (working copy) @@ -0,0 +1,119 @@ +/* PR tree-optimization/86853 - sprintf optimization for wide strings + doesn't account for conversion failure​ + { dg-do compile } + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __WCHAR_TYPE__ wchar_t; + +extern int snprintf (char*, size_t, const char*, ...); + +#define CONCAT(x, y) x ## y +#define CAT(x, y) CONCAT (x, y) +#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__) + +#define FAIL(name) do { \ + extern void FAILNAME (name) (void); \ + FAILNAME (name)(); \ + } while (0) + +/* Macro to emit a call to funcation named + call_in_true_branch_not_eliminated_on_line_NNN() + for each call that's expected to be eliminated. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that no such call appears in output. */ +#define ELIM(expr) \ + if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0 + +/* Macro to emit a call to a function named + call_made_in_{true,false}_branch_on_line_NNN() + for each call that's expected to be retained. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that the expected number of both kinds of calls appears in output + (a pair for each line with the invocation of the KEEP() macro. */ +#define KEEP(expr) \ + if (expr) \ + FAIL (made_in_true_branch); \ + else \ + FAIL (made_in_false_branch) + + +extern wchar_t wc; +extern wchar_t ws[]; + +const wchar_t ws3[] = L"12\xff"; + +/* Verify that the following calls are eliminated. */ + +void elim_wide_char_call (void) +{ + ELIM (snprintf (0, 0, "%lc", L'\0')); + ELIM (snprintf (0, 0, "%lc", L'1')); + ELIM (snprintf (0, 0, "%lc", L'a')); + ELIM (snprintf (0, 0, "%lc", ws3[0])); + ELIM (snprintf (0, 0, "%lc", ws3[1])); + ELIM (snprintf (0, 0, "%lc", ws3[3])); + + ELIM (snprintf (0, 0, "%C", L'\0')); + ELIM (snprintf (0, 0, "%C", L'9')); + ELIM (snprintf (0, 0, "%C", L'z')); + ELIM (snprintf (0, 0, "%C", ws3[0])); + ELIM (snprintf (0, 0, "%C", ws3[1])); + ELIM (snprintf (0, 0, "%C", ws3[3])); + + /* Verify an unknown character value within the ASCII range. */ + if (wc < 1 || 127 < wc) + wc = 0; + + ELIM (snprintf (0, 0, "%C", wc)); + ELIM (snprintf (0, 0, "%C", wc)); +} + +void elim_wide_string_call (void) +{ + ELIM (snprintf (0, 0, "%ls", L"")); +} + + +#line 1000 + + /* Verify that the following calls are not eliminated. */ + +void keep_wide_char_call (void) +{ + KEEP (snprintf (0, 0, "%lc", L'\xff')); + KEEP (snprintf (0, 0, "%lc", L'\xffff')); + KEEP (snprintf (0, 0, "%lc", wc)); + KEEP (snprintf (0, 0, "%lc", ws3[2])); + + KEEP (snprintf (0, 0, "%C", L'\xff')); + KEEP (snprintf (0, 0, "%C", L'\xffff')); + KEEP (snprintf (0, 0, "%C", wc)); + KEEP (snprintf (0, 0, "%C", ws3[2])); + + /* Verify an unknown character value outside the ASCII range + (with 128 being the only one). */ + if (wc < 32 || 128 < wc) + wc = 32; + + KEEP (snprintf (0, 0, "%lc", wc)); + KEEP (snprintf (0, 0, "%C", wc)); +} + +void keep_wide_string_call (void) +{ + KEEP (snprintf (0, 0, "%ls", L"\xff")); + KEEP (snprintf (0, 0, "%ls", L"\xffff")); + KEEP (snprintf (0, 0, "%ls", ws)); + KEEP (snprintf (0, 0, "%ls", ws3)); + + KEEP (snprintf (0, 0, "%S", L"\xff")); + KEEP (snprintf (0, 0, "%S", L"\xffff")); + KEEP (snprintf (0, 0, "%S", ws)); + KEEP (snprintf (0, 0, "%S", ws3)); +} + +/* { dg-final { scan-tree-dump-times "call_made_in_true_branch_not_eliminated" 0 "optimized" } } + + { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 18 "optimized" } } + { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 18 "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-11.c (working copy) @@ -0,0 +1,65 @@ +/* PR tree-optimization/86853 - sprintf optimization for wide strings + doesn't account for conversion failure​ + Exercise wide character handling in an EBCDIC execution charset. + { dg-do compile } + { dg-require-iconv "IBM1047" } + { dg-options "-O2 -Wall -Wno-format -Wformat-overflow -fexec-charset=IBM1047 -fdump-tree-optimized" } */ + +typedef __WCHAR_TYPE__ wchar_t; + +/* Exercise wide character constants. */ + +void test_lc_cst (void) +{ + /* IBM1047 0x30 maps to ASCII 0x94 which neeed not be representable + in the current locale (and the snprintf() call may fail). Verify + that snprintf() doesn't assume it is. */ + wchar_t wc = 0x30; + + int n = __builtin_snprintf (0, 0, "%lc", wc); + if (n < 0) + __builtin_abort (); +} + +void test_C_cst (void) +{ + /* Same as above but for %C and 0x31 which maps to 0x95. */ + wchar_t wc = 0x31; + + int n = __builtin_snprintf (0, 0, "%C", wc); + if (n < 0) + __builtin_abort (); +} + +/* Exercise wide character values in known ranges. */ + +void test_lc_range (wchar_t wc) +{ + if (wc < 0x40 || 0x49 < wc) + wc = 0x40; + + int n = __builtin_snprintf (0, 0, "%lc", wc); + if (n < 0) + __builtin_abort (); +} + +void test_C_range (wchar_t wc) +{ + if (wc < 0x41 || 0x48 < wc) + wc = 0x41; + + int n = __builtin_snprintf (0, 0, "%C", wc); + if (n < 0) + __builtin_abort (); +} + +/* Exercise unknown wide character values. */ + +void test_var (wchar_t wc) +{ + int n = __builtin_snprintf (0, 0, "%lc", wc); + if (n < 0) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-times "abort" 5 "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (revision 263295) +++ gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (working copy) @@ -18,7 +18,7 @@ void test_characters () T ("%A", 0.0); /* { dg-warning ".%A. directive writing between 6 and 20 " } */ T ("%a", 0.0); /* { dg-warning ".%a. directive writing between 6 and 20 " } */ - T ("%C", 'a'); /* { dg-warning ".%C. directive writing 1 byte" "bug 80537" { xfail *-*-* } } */ + T ("%C", L'a'); /* { dg-warning ".%C. directive writing up to 6 bytes" } */ T ("%c", 'a'); /* { dg-warning ".%c. directive writing 1 byte" } */ T ("%d", 12); /* { dg-warning ".%d. directive writing 2 bytes" } */ @@ -93,7 +93,8 @@ void test_characters () T ("%x", 1234); /* { dg-warning ".%x. directive writing 3 bytes" } */ T ("%#X", 1235); /* { dg-warning ".%#X. directive writing 5 bytes" } */ - T ("%S", L"1"); /* { dg-warning ".%S. directive writing 1 byte" } */ + T ("%S", L"1"); /* { dg-warning ".%S. directive writing up to 6 bytes" } */ + T ("%ls", L"12"); /* { dg-warning ".%ls. directive writing up to 12 bytes" } */ T ("%-s", "1"); /* { dg-warning ".%-s. directive writing 1 byte" } */ /* Verify that characters in the source character set appear in