public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Martin Uecker <uecker@tugraz.at>
To: gcc-patches@gcc.gnu.org
Cc: "Joseph Myers" <joseph@codesourcery.com>,
	"Martin Liška" <mliska@suse.cz>
Subject: Re: [C PATCH 3/4]  introduce ubsan checking for assigment of VM types 3/4
Date: Mon, 29 May 2023 12:22:09 +0200	[thread overview]
Message-ID: <6a868211b3892c5c9161eb5fae908195eec728ce.camel@tugraz.at> (raw)
In-Reply-To: <93a1692e7f0e895379cb6847bfcb6e6d3dafadc3.camel@tugraz.at>



    c: introduce ubsan checking for assigment of VM types 3/4
    
    Support instrumentation of function arguments for functions
    called via a declaration.  We can support only simple size
    expressions without side effects, because the UBSan
    instrumentation is done before the call, but the expressions
    are evaluated in the callee.
    
    gcc/c-family:
            * c-ubsan.cc (ubsan_instrument_vm_assign): Add arguments
            for size expressions.
            * c-ubsan.h (ubsan_instrument_vm_assign): Dito.
    
    gcc/c:
            * c-typeck.cc (process_vm_constraints): Add support
            for instrumenting function arguments.
    
    gcc/testsuide/gcc.dg:
            * ubsan/vm-bounds-2.c: Update.
            * ubsan/vm-bounds-3.c: New test.
            * ubsan/vm-bounds-4.c: New test.

diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc
index 59ef9708188..a8f95aa39e8 100644
--- a/gcc/c-family/c-ubsan.cc
+++ b/gcc/c-family/c-ubsan.cc
@@ -337,19 +337,13 @@ ubsan_instrument_vla (location_t loc, tree size)
 /* Instrument assignment of variably modified types.  */
 
 tree
-ubsan_instrument_vm_assign (location_t loc, tree a, tree b)
+ubsan_instrument_vm_assign (location_t loc, tree a, tree as, tree b, tree bs)
 {
   tree t, tt;
 
   gcc_assert (TREE_CODE (a) == ARRAY_TYPE);
   gcc_assert (TREE_CODE (b) == ARRAY_TYPE);
 
-  tree as = TYPE_MAX_VALUE (TYPE_DOMAIN (a));
-  tree bs = TYPE_MAX_VALUE (TYPE_DOMAIN (b));
-
-  as = fold_build2 (PLUS_EXPR, sizetype, as, size_one_node);
-  bs = fold_build2 (PLUS_EXPR, sizetype, bs, size_one_node);
-
   t = build2 (NE_EXPR, boolean_type_node, as, bs);
   if (flag_sanitize_trap & SANITIZE_VLA)
     tt = build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), 0);
diff --git a/gcc/c-family/c-ubsan.h b/gcc/c-family/c-ubsan.h
index 1b07b0645f2..42be1d691a8 100644
--- a/gcc/c-family/c-ubsan.h
+++ b/gcc/c-family/c-ubsan.h
@@ -26,7 +26,7 @@ extern tree ubsan_instrument_shift (location_t, enum tree_code, tree, tree);
 extern tree ubsan_instrument_vla (location_t, tree);
 extern tree ubsan_instrument_return (location_t);
 extern tree ubsan_instrument_bounds (location_t, tree, tree *, bool);
-extern tree ubsan_instrument_vm_assign (location_t, tree, tree);
+extern tree ubsan_instrument_vm_assign (location_t, tree, tree, tree, tree);
 extern bool ubsan_array_ref_instrumented_p (const_tree);
 extern void ubsan_maybe_instrument_array_ref (tree *, bool);
 extern void ubsan_maybe_instrument_reference (tree *);
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index a8fccc6f6ed..aeddac315fc 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -3408,7 +3408,8 @@ static tree
 convert_argument (location_t ploc, tree function, tree fundecl,
 		  tree type, tree origtype, tree val, tree valtype,
 		  bool npc, tree rname, int parmnum, int argnum,
-		  bool excess_precision, int warnopt)
+		  bool excess_precision, int warnopt,
+		  vec<struct instrument_data, va_gc> *instr_vec)
 {
   /* Formal parm type is specified by a function prototype.  */
 
@@ -3567,7 +3568,7 @@ convert_argument (location_t ploc, tree function, tree fundecl,
 						    val, origtype, ic_argpass,
 						    npc, fundecl, function,
 						    parmnum + 1, warnopt,
-						    NULL);
+						    instr_vec);
 
   if (targetm.calls.promote_prototypes (fundecl ? TREE_TYPE (fundecl) : 0)
       && INTEGRAL_TYPE_P (type)
@@ -3582,15 +3583,111 @@ convert_argument (location_t ploc, tree function, tree fundecl,
 
 static tree
 process_vm_constraints (location_t location,
-			vec<struct instrument_data, va_gc> *instr_vec)
+			vec<struct instrument_data, va_gc> *instr_vec,
+			tree function, tree fundecl, vec<tree, va_gc> *values)
 {
   unsigned int i;
   struct instrument_data* d;
   tree instr_expr = void_node;
+  tree args = NULL;
+
+  /* Find the arguments for the function declaration / type.  */
+  if (function)
+    {
+      if (FUNCTION_DECL == TREE_CODE (function))
+	{
+	  fundecl = function;
+	  args = DECL_ARGUMENTS (fundecl);
+	}
+      else
+	{
+	  /* Functions called via pointers are not yet supported.  */
+	  return void_node;
+	}
+    }
 
   FOR_EACH_VEC_SAFE_ELT (instr_vec, i, d)
     {
-      tree in = ubsan_instrument_vm_assign (location, d->t1, d->t2);
+      tree t1 = d->t1;
+      tree t2 = d->t2;
+
+      gcc_assert (ARRAY_TYPE == TREE_CODE (t1));
+      gcc_assert (ARRAY_TYPE == TREE_CODE (t2));
+
+      tree as = TYPE_MAX_VALUE (TYPE_DOMAIN (t1));
+      tree bs = TYPE_MAX_VALUE (TYPE_DOMAIN (t2));
+
+      gcc_assert (as);
+      gcc_assert (bs);
+
+      as = fold_build2 (PLUS_EXPR, sizetype, as, size_one_node);
+      bs = fold_build2 (PLUS_EXPR, sizetype, bs, size_one_node);
+
+      if (function)
+	{
+
+	  if (INTEGER_CST == TREE_CODE (bs))
+	    goto cont;
+
+	  if (NOP_EXPR == TREE_CODE (bs)
+	      && SAVE_EXPR == TREE_CODE (TREE_OPERAND (bs, 0)))
+	    {
+	      tree bs1 = TREE_OPERAND (bs, 0);
+	      tree bs2 = TREE_OPERAND (bs1, 0);
+
+	      /* Another parameter of the current functions.  */
+	      if (PARM_DECL == TREE_CODE (bs2)
+		  && (DECL_CONTEXT (bs2) == fundecl
+		      || DECL_CONTEXT (bs2) == NULL))
+		{
+		  tree arg = args;
+		  int pos = 0;
+		  while (arg)
+		   {
+		     if (arg == bs2)
+		       {
+			 bs = (*values)[pos];
+			 bs = save_expr (bs);
+			 bs = build1 (NOP_EXPR, sizetype, bs);
+			 break;
+		       }
+		     pos++;
+		     arg = DECL_CHAIN (arg);
+		   }
+		  if (!arg)
+		    goto giveup;
+		  goto cont;
+		}
+
+	      /*  A parameter of an enclosing function.  */
+	      if (PARM_DECL == TREE_CODE (bs2)
+		  && DECL_CONTEXT (bs2) != fundecl)
+		{
+		  bs2 = unshare_expr (bs2);
+		  bs1 = save_expr (bs2);
+		  bs = build1 (NOP_EXPR, sizetype, bs1);
+		  goto cont;
+		}
+
+	      /*  A variable with enclosing scope.  */
+	      if (VAR_DECL == TREE_CODE (bs2))
+		{
+		  bs2 = unshare_expr (bs2);
+		  bs1 = save_expr (bs2);
+		  bs = build1 (NOP_EXPR, sizetype, bs1);
+		  goto cont;
+		}
+	    }
+	giveup:
+	  /*  Give up.  If we do not understand a size expression, we can
+	      also not instrument any of the others because it may have
+	      side effects affecting them.  (We could restart and instrument
+	      the only the ones with integer constants.)   */
+	    warning_at (location, 0, "Function call not instrumented.");
+	    return void_node;
+	}
+cont:
+      tree in = ubsan_instrument_vm_assign (location, t1, as, t2, bs);
       instr_expr = fold_build2 (COMPOUND_EXPR, void_type_node, in, instr_expr);
     }
   return instr_expr;
@@ -3689,6 +3786,11 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist,
 	  }
     }
 
+  vec<struct instrument_data, va_gc> *instr_vec = NULL;
+
+  if (sanitize_flags_p (SANITIZE_VLA))
+    vec_alloc (instr_vec, 20);
+
   /* Scan the given expressions (VALUES) and types (TYPELIST), producing
      individual converted arguments.  */
 
@@ -3801,7 +3903,7 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist,
 	  tree origtype = (!origtypes) ? NULL_TREE : (*origtypes)[parmnum];
 	  parmval = convert_argument (ploc, function, fundecl, type, origtype,
 				      val, valtype, npc, rname, parmnum, argnum,
-				      excess_precision, 0);
+				      excess_precision, 0, instr_vec);
 	}
       else if (promote_float_arg)
         {
@@ -3854,7 +3956,7 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist,
 	  convert_argument (ploc, function, fundecl, builtin_type, origtype,
 			    val, valtype, npc, rname, parmnum, argnum,
 			    excess_precision,
-			    OPT_Wbuiltin_declaration_mismatch);
+			    OPT_Wbuiltin_declaration_mismatch, NULL);
 	}
 
       if (typetail)
@@ -3866,6 +3968,22 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist,
 
   gcc_assert (parmnum == vec_safe_length (values));
 
+  if (0 < parmnum && instr_vec && 0 < vec_safe_length (instr_vec))
+    {
+      tree instr_expr = process_vm_constraints (loc, instr_vec, function, fundecl, values);
+      /* We have to make sure that all parameters are evaluated first,
+	 because we may use size expressions in it to check bounds.  */
+      if (void_node != instr_expr)
+	{
+	  tree parmval = (*values)[0];
+	  parmval = save_expr (parmval);
+	  instr_expr = fold_build2 (COMPOUND_EXPR, void_type_node, parmval, instr_expr);
+	  parmval = fold_build2 (COMPOUND_EXPR, TREE_TYPE (parmval), instr_expr, parmval);
+	  (*values)[0] = parmval;
+	}
+      vec_free (instr_vec);
+    }
+
   if (typetail != NULL_TREE && TREE_VALUE (typetail) != void_type_node)
     {
       error_at (loc, "too few arguments to function %qE", function);
@@ -6944,7 +7062,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type,
 	{
 	  /* We have to make sure that the rhs is evaluated first,
 	     because we may use size expressions in it to check bounds.  */
-	  tree instr_expr = process_vm_constraints (location, instr_vec);
+	  tree instr_expr = process_vm_constraints (location, instr_vec, NULL, NULL, NULL);
 	  if (void_node != instr_expr)
 	    {
 	      ret = save_expr (ret);
diff --git a/gcc/testsuite/gcc.dg/ubsan/vm-bounds-2.c b/gcc/testsuite/gcc.dg/ubsan/vm-bounds-2.c
index ebc63d32144..22f06231eaa 100644
--- a/gcc/testsuite/gcc.dg/ubsan/vm-bounds-2.c
+++ b/gcc/testsuite/gcc.dg/ubsan/vm-bounds-2.c
@@ -51,6 +51,6 @@ int c(int u, char (*a)[u]) { }
 int d(void)
 {
 	char a[3];
-	c(3, &a);
+	c(3, &a);		/* { dg-warning "Function call not instrumented." } */
 }
 
diff --git a/gcc/testsuite/gcc.dg/ubsan/vm-bounds-3.c b/gcc/testsuite/gcc.dg/ubsan/vm-bounds-3.c
new file mode 100644
index 00000000000..9ec95921fb9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/vm-bounds-3.c
@@ -0,0 +1,96 @@
+/* { dg-do run } */
+/* { dg-options "-fsanitize=vla-bound" } */
+
+int m, n;
+
+static void a0(char (*a)[4]) { }
+static void b0(char (*a)[n]) { }
+static void c0(char (*a)[n][m]) { }
+static void d0(char (*a)[4][m]) { }
+static void e0(char (*a)[n][3]) { }
+static void f0(char a[n][m]) { }
+
+static void b1(int u, char (*a)[u]) { }
+static void c1(int u, int v, char (*a)[u][v]) { }
+static void d1(int v, char (*a)[4][v]) { }
+static void e1(int u, char (*a)[u][3]) { }
+static void f1(int u, int v, char a[u][v]) { }
+
+
+
+int main()
+{
+	m = 3, n = 4;
+
+	int u = 4;
+	int v = 3;
+
+	/* function arguments */
+
+	char A0[4];
+	char A1[u];
+	char B0[3];
+	char B1[v];
+
+	a0(&A0);
+	a0(&A1);
+	a0(&B0);	/* { dg-warning "incompatible pointer type" } */
+	a0(&B1);
+	/* { dg-output "bound 4 of type 'char \\\[4\\\]' does not match bound 3 of type 'char \\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b0(&A0);
+	b0(&A1);
+	b0(&B0);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b0(&B1);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b1(4, &A0);
+	b1(4, &B0);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+
+	char C0[4][3];
+	char C1[u][3];
+	char C2[4][v];
+	char C3[u][v];
+	char D0[3][4];
+	char D1[v][4];
+	char D2[3][u];
+	char D3[v][u];
+
+	c0(&C0);
+	c0(&C1);
+	c0(&C2);
+	c0(&C3);
+	c0(&D0);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]\\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]\\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	c0(&D1);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]\\\[\\\*\\\]' does not match bound 3 of type 'char \\\[\\\*\\\]\\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	
+	c0(&D2);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]\\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	
+	c0(&D3);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]\\\[\\\*\\\]' does not match bound 3 of type 'char \\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	
+	d0(&C0);
+	d0(&D0);	/* { dg-warning "incompatible pointer type" } */
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	d1(3, &C0);
+	d1(3, &D0);	/* { dg-warning "incompatible pointer type" } */
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	e0(&C0);
+	e0(&D0);	/* { dg-warning "incompatible pointer type" } */
+	e1(4, &C0);
+	e1(4, &D0);	/* { dg-warning "incompatible pointer type" } */
+
+	f0(C0);
+	f0(D0);
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[4\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	f1(4, 3, C0);
+	f1(4, 3, D0);
+	/* { dg-output "\[^\n\r]*bound 3 of type 'char \\\[\\\*\\\]' does not match bound 4 of type 'char \\\[4\\\]'" } */
+}
+
diff --git a/gcc/testsuite/gcc.dg/ubsan/vm-bounds-4.c b/gcc/testsuite/gcc.dg/ubsan/vm-bounds-4.c
new file mode 100644
index 00000000000..e745b41d159
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/vm-bounds-4.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-fsanitize=vla-bound" } */
+
+
+
+void b0(int n, char (*a)[n]) { }
+void b1(int m, char (*a)[m]);
+void b2(int m; char (*a)[m], int m) { }
+void b3(int m; char (*a)[m], int m);
+int n;
+void b4(char (*a)[n]) { }
+void b5(char (*a)[n]);
+
+int main()
+{
+	char A0[3];
+	b1(4, &A0);
+	/* { dg-output "bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b0(4, &A0);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b2(&A0, 4);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b3(&A0, 4);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	n = 4;
+	b4(&A0);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+	b5(&A0);
+	/* { dg-output "\[^\n\r]*bound 4 of type 'char \\\[\\\*\\\]' does not match bound 3 of type 'char \\\[3\\\]'" } */
+}
+
+
+void b1(int n, char (*a)[n]) { }
+void b3(int m; char (*a)[m], int m) { }
+void b5(char (*a)[n]) { }
+
+



  parent reply	other threads:[~2023-05-29 10:22 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-29 10:19 [C PATCH 1/4] introduce ubsan checking for assigment of VM types 1/4 Martin Uecker
2023-05-29 10:20 ` [C PATCH 2/4] introduce ubsan checking for assigment of VM types 2/4 Martin Uecker
2023-05-29 10:22 ` Martin Uecker [this message]
2023-05-30 22:59   ` [C PATCH 3/4] introduce ubsan checking for assigment of VM types 3/4 Joseph Myers
2023-05-31  8:12     ` Martin Uecker
2023-05-29 10:22 ` [C PATCH 4/4] introduce ubsan checking for assigment of VM types 4/4 Martin Uecker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6a868211b3892c5c9161eb5fae908195eec728ce.camel@tugraz.at \
    --to=uecker@tugraz.at \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=joseph@codesourcery.com \
    --cc=mliska@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).