public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] avoid non-printable characters in diagnostics (c/77620, c/77521)
@ 2016-09-09  4:10 Martin Sebor
  2016-09-09 14:00 ` Joseph Myers
  2016-09-16 17:00 ` Jeff Law
  0 siblings, 2 replies; 6+ messages in thread
From: Martin Sebor @ 2016-09-09  4:10 UTC (permalink / raw)
  To: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 1378 bytes --]

While working on the -Wformat-length pass I noticed that in some
diagnostics that make use of the %qc and %qs directives GCC prints
non-printable characters raw.  For example, it might print a newline,
corrupting the diagnostic stream (bug 77521).

Some other diagnostics that try to avoid this problem by using
a directive such as %x when the character is not printable might
use the sign-extended value of the character, printing a very
large hexadecimal value (bug 77520).

The attached patch changes the pretty printer to detect non-printable
characters in %qc and %qs directives (but not %c or %s) and print
those in hexadecimal (via "\\x%02x").

Martin

PS I used hexadecimal based on what c-format.c does but now that
I checked more carefully how %qE formats string literals I see it
uses octal.  I think hexadecimal is preferable because it avoids
ambiguity but I'm open to changing it to octal if there's a strong
preference for it.  Incidentally, %qE too suffers from bug 77520
(see below).  The patch doesn't try to fix that.

$ cat z.C && gcc z.C
constexpr int i = "ABC\x7f_\x80XYZ";
z.C:1:19: error: invalid conversion from ‘const char*’ to ‘int’ 
[-fpermissive]
  constexpr int i = "ABC\x7f_\x80XYZ";
                    ^~~~~~~~~~~~~~~~~
z.C:1:19: error: ‘(int)((const char*)"ABC\177_\37777777600XYZ")’ is not 
a constant expression

[-- Attachment #2: gcc-77520.diff --]
[-- Type: text/x-patch, Size: 5430 bytes --]

PR c/77520 - wrong value for extended ASCII characters in -Wformat message
PR c/77521 - %qc format directive should quote non-printable characters

gcc/c-family/ChangeLog:
2016-09-08  Martin Sebor  <msebor@redhat.com>

	PR c/77520
	PR c/77521
	* c-format.c (argument_parser::find_format_char_info): Use %qc
	format directive unconditionally.

gcc/ChangeLog:
2016-09-08  Martin Sebor  <msebor@redhat.com>

	PR c/77520
	PR c/77521
	* pretty-print.c (pp_quoted_string): New function.
	(pp_format): Call it for %c and %s directives.

gcc/testsuite/ChangeLog:
2016-09-08  Martin Sebor  <msebor@redhat.com>

	PR c/77520
	PR c/77521
	* gcc.dg/pr77520.c: New test.
	* gcc.dg/pr77521.c: New test.

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 09d514e..0c17340 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -2355,20 +2355,12 @@ argument_parser::find_format_char_info (char format_char)
     ++fci;
   if (fci->format_chars == 0)
     {
-      if (ISGRAPH (format_char))
-	format_warning_at_char (format_string_loc, format_string_cst,
-				format_chars - orig_format_chars,
-				OPT_Wformat_,
-				"unknown conversion type character"
-				" %qc in format",
-				format_char);
-      else
-	format_warning_at_char (format_string_loc, format_string_cst,
-				format_chars - orig_format_chars,
-				OPT_Wformat_,
-				"unknown conversion type character"
-				" 0x%x in format",
-				format_char);
+      format_warning_at_char (format_string_loc, format_string_cst,
+			      format_chars - orig_format_chars,
+			      OPT_Wformat_,
+			      "unknown conversion type character"
+			      " %qc in format",
+			      format_char);
       return NULL;
     }
 
diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c
index 325263e..a39815e 100644
--- a/gcc/pretty-print.c
+++ b/gcc/pretty-print.c
@@ -30,6 +30,8 @@ along with GCC; see the file COPYING3.  If not see
 #include <iconv.h>
 #endif
 
+static void pp_quoted_string (pretty_printer *, const char *, size_t = -1);
+
 /* Overwrite the given location/range within this text_info's rich_location.
    For use e.g. when implementing "+" in client format decoders.  */
 
@@ -555,8 +557,20 @@ pp_format (pretty_printer *pp, text_info *text)
 	  break;
 
 	case 'c':
-	  pp_character (pp, va_arg (*text->args_ptr, int));
-	  break;
+	  {
+	    /* When quoting, print alphanumeric, punctuation, and the space
+	       character unchanged, and all others in hexadecimal with the
+	       "\x" prefix.  Otherwise print them all unchanged.  */
+	    int chr = va_arg (*text->args_ptr, int);
+	    if (ISPRINT (chr) || !quote)
+	      pp_character (pp, chr);
+	    else
+	      {
+		const char str [2] = { chr, '\0' };
+		pp_quoted_string (pp, str, 1);
+	      }
+	    break;
+	  }
 
 	case 'd':
 	case 'i':
@@ -577,7 +591,10 @@ pp_format (pretty_printer *pp, text_info *text)
 	  break;
 
 	case 's':
-	  pp_string (pp, va_arg (*text->args_ptr, const char *));
+	  if (quote)
+	    pp_quoted_string (pp, va_arg (*text->args_ptr, const char *));
+	  else
+	    pp_string (pp, va_arg (*text->args_ptr, const char *));
 	  break;
 
 	case 'p':
@@ -939,6 +956,41 @@ pp_string (pretty_printer *pp, const char *str)
   pp_maybe_wrap_text (pp, str, str + strlen (str));
 }
 
+/* Append the leading N characters of STRING to the output area of
+   PRETTY-PRINTER, quoting in hexadecimal non-printable characters.
+   Setting N = -1 is as if N were set to strlen (STRING).  The STRING
+   may be line-wrapped if in appropriate mode.  */
+static void
+pp_quoted_string (pretty_printer *pp, const char *str, size_t n /* = -1 */)
+{
+  gcc_checking_assert (str);
+
+  const char *last = str;
+  const char *ps;
+
+  /* Compute the length if not specified.  */
+  if (n == (size_t) -1)
+    n = strlen (str);
+
+  for (ps = str; n; ++ps, --n)
+    {
+      if (ISPRINT (*ps))
+	  continue;
+
+      if (last < ps)
+	pp_maybe_wrap_text (pp, last, ps - 1);
+
+      /* Append the hexadecimal value of the character.  Allocate a buffer
+	 that's large enough for a 32-bit char plus the hex prefix.  */
+      char buf [11];
+      int n = sprintf (buf, "\\x%02x", (unsigned char)*ps);
+      pp_maybe_wrap_text (pp, buf, buf + n);
+      last = ps + 1;
+    }
+
+  pp_maybe_wrap_text (pp, last, ps);
+}
+
 /* Maybe print out a whitespace if needed.  */
 
 void
diff --git a/gcc/testsuite/gcc.dg/pr77520.c b/gcc/testsuite/gcc.dg/pr77520.c
new file mode 100644
index 0000000..b237639
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77520.c
@@ -0,0 +1,10 @@
+/* PR c/77520 - wrong value for extended ASCII characters in -Wformat message
+   Verify that characters in the extended ASCII range are quoted and not
+   allowed to be printed raw.  */
+/* { dg-do compile } */
+/* { dg-options "-Wformat" } */
+
+void f (void)
+{
+  __builtin_printf ("%\x80");   /* { dg-warning "unknown conversion type character .\\\\x80. in format" } */
+}
diff --git a/gcc/testsuite/gcc.dg/pr77521.c b/gcc/testsuite/gcc.dg/pr77521.c
new file mode 100644
index 0000000..a41d3e0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77521.c
@@ -0,0 +1,8 @@
+/* PR c/77521 - %qc format directive should quote non-printable characters.
+   Verify that non-printable characters in assembly constraints are quoted
+   and not allowed to be printed raw.  */
+
+void f (int a, int b)
+{
+  asm ("combine %2, %0" : "=r" (a) : "0" (a), "\n" (b));   /* { dg-error "invalid punctuation .\\x0a. in constraint" } */
+}

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-01-01 21:33 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-09  4:10 [PATCH] avoid non-printable characters in diagnostics (c/77620, c/77521) Martin Sebor
2016-09-09 14:00 ` Joseph Myers
2016-09-10  0:07   ` Martin Sebor
2016-09-16 16:57     ` Jeff Law
     [not found]     ` <alpine.LSU.2.20.1612311507300.2994@anthias.pfeifer.com>
2017-01-01 21:33       ` Martin Sebor
2016-09-16 17:00 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).