public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] add -fprolog-pad=N option to c-family
@ 2016-04-27 15:22 Torsten Duwe
  2016-04-27 16:26 ` Szabolcs Nagy
  2016-04-28  8:40 ` [PATCH] " Maxim Kuvyrkov
  0 siblings, 2 replies; 15+ messages in thread
From: Torsten Duwe @ 2016-04-27 15:22 UTC (permalink / raw)
  To: Maxim Kuvyrkov; +Cc: Li Bin, Jiri Kosina, gcc-patches

Hi Maxim,

thanks for starting the work on this; I have added the missing
command line option. It builds now and the resulting compiler generates
a linux kernel with the desired properties, so work can continue there.

	Torsten

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 9bc02fc..57265c5 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -393,6 +393,7 @@ static tree handle_designated_init_attribute (tree *, tree, tree, int, bool *);
 static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *);
 static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
 static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
+static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *);
 
 static void check_function_nonnull (tree, int, tree *);
 static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
@@ -833,6 +834,8 @@ const struct attribute_spec c_common_attribute_table[] =
 			      handle_bnd_legacy, false },
   { "bnd_instrument",         0, 0, true, false, false,
 			      handle_bnd_instrument, false },
+  { "prolog_pad",	      1, 1, false, true, true,
+			      handle_prolog_pad_attribute, false },
   { NULL,                     0, 0, false, false, false, NULL, false }
 };
 
@@ -9663,6 +9666,16 @@ handle_designated_init_attribute (tree *node, tree name, tree, int,
   return NULL_TREE;
 }
 
+static tree
+handle_prolog_pad_attribute (tree *, tree name, tree, int,
+			     bool *)
+{
+  warning (OPT_Wattributes,
+	   "%qE attribute is used", name);
+
+  return NULL_TREE;
+}
+
 \f
 /* Check for valid arguments being passed to a function with FNTYPE.
    There are NARGS arguments in the array ARGARRAY.  */
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 9ae181f..31a8026 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -532,6 +532,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
       cpp_opts->ext_numeric_literals = value;
       break;
 
+    case OPT_fprolog_pad_:
+      prolog_nop_pad_size = value;
+      break;
+
     case OPT_idirafter:
       add_path (xstrdup (arg), AFTER, 0, true);
       break;
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index aafd802..929ebb6 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1407,6 +1407,10 @@ fpreprocessed
 C ObjC C++ ObjC++
 Treat the input file as already preprocessed.
 
+fprolog-pad=
+C ObjC C++ ObjC++ RejectNegative Joined UInteger Var(prolog_nop_pad_size) Init(0)
+Pad NOPs before each function prolog
+
 ftrack-macro-expansion
 C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
 ; converted into ftrack-macro-expansion=
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 1ce7181..9d10b10 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4553,6 +4553,10 @@ will select the smallest suitable mode.
 This section describes the macros that output function entry
 (@dfn{prologue}) and exit (@dfn{epilogue}) code.
 
+@deftypefn {Target Hook} void TARGET_ASM_PRINT_PROLOG_PAD (FILE *@var{file}, unsigned HOST_WIDE_INT @var{pad_size}, bool @var{record_p})
+Generate prologue pad
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_ASM_FUNCTION_PROLOGUE (FILE *@var{file}, HOST_WIDE_INT @var{size})
 If defined, a function that outputs the assembler code for entry to a
 function.  The prologue is responsible for setting up the stack frame,
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index a0a0a81..bda6d5c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3662,6 +3662,8 @@ will select the smallest suitable mode.
 This section describes the macros that output function entry
 (@dfn{prologue}) and exit (@dfn{epilogue}) code.
 
+@hook TARGET_ASM_PRINT_PROLOG_PAD
+
 @hook TARGET_ASM_FUNCTION_PROLOGUE
 
 @hook TARGET_ASM_FUNCTION_END_PROLOGUE
diff --git a/gcc/final.c b/gcc/final.c
index 1edc446..e0cff80 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -1753,6 +1753,7 @@ void
 final_start_function (rtx_insn *first, FILE *file,
 		      int optimize_p ATTRIBUTE_UNUSED)
 {
+  unsigned HOST_WIDE_INT pad_size = prolog_nop_pad_size;
   block_depth = 0;
 
   this_is_asm_operands = 0;
@@ -1765,6 +1766,21 @@ final_start_function (rtx_insn *first, FILE *file,
 
   high_block_linenum = high_function_linenum = last_linenum;
 
+  tree prolog_pad_attr
+    = lookup_attribute ("prolog_pad", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)));
+  if (prolog_pad_attr)
+    {
+      tree prolog_pad_value = TREE_VALUE (TREE_VALUE (prolog_pad_attr));
+
+      if (tree_fits_uhwi_p (prolog_pad_value))
+	pad_size = tree_to_uhwi (prolog_pad_value);
+      else
+	gcc_unreachable ();
+
+    }
+  if (pad_size > 0)
+    targetm.asm_out.print_prolog_pad (file, pad_size, true);
+
   if (flag_sanitize & SANITIZE_ADDRESS)
     asan_function_start ();
 
diff --git a/gcc/target.def b/gcc/target.def
index d754337..63d3285 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -288,6 +288,12 @@ hidden, protected or internal visibility as specified by @var{visibility}.",
  void, (tree decl, int visibility),
  default_assemble_visibility)
 
+DEFHOOK
+(print_prolog_pad,
+ "Generate prologue pad",
+ void, (FILE *file, unsigned HOST_WIDE_INT pad_size, bool record_p),
+ default_print_prolog_pad)
+
 /* Output the assembler code for entry to a function.  */
 DEFHOOK
 (function_prologue,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index dcf0863..05ab26d 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1499,6 +1499,23 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
 }
 
+void
+default_print_prolog_pad (FILE *file, unsigned HOST_WIDE_INT pad_size,
+			  bool record_p)
+{
+  if (record_p)
+    fprintf (file, "1:");
+  for (unsigned i = 0; i < pad_size; ++i)
+    fprintf (file, "\tnop\n");
+  if (record_p)
+    {
+      fprintf (file, "\t.section __prolog_pads_loc, \"a\",@progbits\n");
+      fprintf (file, "\t.quad 1b\n");
+      fprintf (file, "\t.long %lu\n", pad_size);
+      fprintf (file, "\t.previous\n");
+    }
+}
+
 bool
 default_profile_before_prologue (void)
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 47b5cfc..d03f993 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -199,6 +199,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
 						    enum by_pieces_operation,
 						    bool);
 
+extern void default_print_prolog_pad (FILE *, unsigned HOST_WIDE_INT , bool);
 extern bool default_profile_before_prologue (void);
 extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
 extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
diff --git a/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
new file mode 100644
index 0000000..7404dc5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+void f1(void) __attribute__((prolog_pad(1)));
+void f2(void) __attribute__((prolog_pad(2)));
+
+void
+f1 (void)
+{
+  f2 ();
+}
+
+void f2 (void)
+{
+  f1 ();
+}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-27 15:22 [PATCH] add -fprolog-pad=N option to c-family Torsten Duwe
@ 2016-04-27 16:26 ` Szabolcs Nagy
  2016-04-28  8:48   ` Maxim Kuvyrkov
  2016-04-28  8:40 ` [PATCH] " Maxim Kuvyrkov
  1 sibling, 1 reply; 15+ messages in thread
From: Szabolcs Nagy @ 2016-04-27 16:26 UTC (permalink / raw)
  To: Torsten Duwe, Maxim Kuvyrkov
  Cc: nd, Li Bin, Jiri Kosina, gcc-patches, Marcus Shawcroft, Takahiro Akashi

On 27/04/16 16:22, Torsten Duwe wrote:
> Hi Maxim,
> 
> thanks for starting the work on this; I have added the missing
> command line option. It builds now and the resulting compiler generates
> a linux kernel with the desired properties, so work can continue there.
> 
> 	Torsten

i guess the flag should be documented in invoke.texi

it's not clear what N means in -fprolog-pad=N, how
location recording is enabled and how it interacts
with -fipa-ra. (-pg disables -fipa-ra, but -fprolog-pad
works without -pg.)

with -mfentry, by default the user only has to
implement the fentry call (linux wants nops there, but
e.g. glibc could use -pg -mfentry for profiling on
aarch64 and the target specific details are easier to
document for an -m option than for something general).

the nop-padding is more general, but the size and
layout of nops and the call abi will be target
specific and the user will most likely need to modify
the binary (to get the right sequence) which needs
additional tooling.  i don't know who might use it
other than linux (which already has tools to deal with
-mfentry).

i'm not against nop-padding, but i think more evidence
is needed that the generalization is a good idea and
users can deal with the resulting issues.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-27 15:22 [PATCH] add -fprolog-pad=N option to c-family Torsten Duwe
  2016-04-27 16:26 ` Szabolcs Nagy
@ 2016-04-28  8:40 ` Maxim Kuvyrkov
  2016-04-28 11:18   ` Torsten Duwe
  1 sibling, 1 reply; 15+ messages in thread
From: Maxim Kuvyrkov @ 2016-04-28  8:40 UTC (permalink / raw)
  To: Torsten Duwe; +Cc: Li Bin, Jiri Kosina, gcc-patches

> On Apr 27, 2016, at 6:22 PM, Torsten Duwe <duwe@suse.de> wrote:
> 
> Hi Maxim,
> 
> thanks for starting the work on this; I have added the missing
> command line option. It builds now and the resulting compiler generates
> a linux kernel with the desired properties, so work can continue there.

Thanks for working on this!

> 
> 	Torsten
> 
> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> index 9bc02fc..57265c5 100644
> --- a/gcc/c-family/c-common.c
> +++ b/gcc/c-family/c-common.c
> @@ -393,6 +393,7 @@ static tree handle_designated_init_attribute (tree *, tree, tree, int, bool *);
> static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *);
> static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
> static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
> +static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *);
> 
> static void check_function_nonnull (tree, int, tree *);
> static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
> @@ -833,6 +834,8 @@ const struct attribute_spec c_common_attribute_table[] =
> 			      handle_bnd_legacy, false },
>   { "bnd_instrument",         0, 0, true, false, false,
> 			      handle_bnd_instrument, false },
> +  { "prolog_pad",	      1, 1, false, true, true,
> +			      handle_prolog_pad_attribute, false },
>   { NULL,                     0, 0, false, false, false, NULL, false }
> };
> 
> @@ -9663,6 +9666,16 @@ handle_designated_init_attribute (tree *node, tree name, tree, int,
>   return NULL_TREE;
> }
> 
> +static tree
> +handle_prolog_pad_attribute (tree *, tree name, tree, int,
> +			     bool *)
> +{
> +  warning (OPT_Wattributes,
> +	   "%qE attribute is used", name);
> +
> +  return NULL_TREE;
> +}
> +
> 
> /* Check for valid arguments being passed to a function with FNTYPE.
>    There are NARGS arguments in the array ARGARRAY.  */
> diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
> index 9ae181f..31a8026 100644
> --- a/gcc/c-family/c-opts.c
> +++ b/gcc/c-family/c-opts.c
> @@ -532,6 +532,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
>       cpp_opts->ext_numeric_literals = value;
>       break;
> 
> +    case OPT_fprolog_pad_:
> +      prolog_nop_pad_size = value;
> +      break;

As Szabolcs noted in this thread, we need to consider how -fprolog-pad= will play with IPA and LTO.  The decision to use __attribute__ to generate prolog pad for a function is specifically to handle LTO builds.  The option -fprolog-pad=N should set __attribute__((prolog_pad(N))) on every function in current translation unit, and the rest should be handled by the attribute logic.  This is not trivial to implement, and has been what stopped me from finishing the patch.

Your current patch is great for experiments for the kernel engineers to check if suggested approaches to code patching will work.  Still, I prefer to implement LTO-friendly way of handling -fprolog-pad=N via function attributes.

--
Maxim Kuvyrkov
www.linaro.org




> +
>     case OPT_idirafter:
>       add_path (xstrdup (arg), AFTER, 0, true);
>       break;
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index aafd802..929ebb6 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -1407,6 +1407,10 @@ fpreprocessed
> C ObjC C++ ObjC++
> Treat the input file as already preprocessed.
> 
> +fprolog-pad=
> +C ObjC C++ ObjC++ RejectNegative Joined UInteger Var(prolog_nop_pad_size) Init(0)
> +Pad NOPs before each function prolog
> +
> ftrack-macro-expansion
> C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
> ; converted into ftrack-macro-expansion=
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 1ce7181..9d10b10 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -4553,6 +4553,10 @@ will select the smallest suitable mode.
> This section describes the macros that output function entry
> (@dfn{prologue}) and exit (@dfn{epilogue}) code.
> 
> +@deftypefn {Target Hook} void TARGET_ASM_PRINT_PROLOG_PAD (FILE *@var{file}, unsigned HOST_WIDE_INT @var{pad_size}, bool @var{record_p})
> +Generate prologue pad
> +@end deftypefn
> +
> @deftypefn {Target Hook} void TARGET_ASM_FUNCTION_PROLOGUE (FILE *@var{file}, HOST_WIDE_INT @var{size})
> If defined, a function that outputs the assembler code for entry to a
> function.  The prologue is responsible for setting up the stack frame,
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index a0a0a81..bda6d5c 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -3662,6 +3662,8 @@ will select the smallest suitable mode.
> This section describes the macros that output function entry
> (@dfn{prologue}) and exit (@dfn{epilogue}) code.
> 
> +@hook TARGET_ASM_PRINT_PROLOG_PAD
> +
> @hook TARGET_ASM_FUNCTION_PROLOGUE
> 
> @hook TARGET_ASM_FUNCTION_END_PROLOGUE
> diff --git a/gcc/final.c b/gcc/final.c
> index 1edc446..e0cff80 100644
> --- a/gcc/final.c
> +++ b/gcc/final.c
> @@ -1753,6 +1753,7 @@ void
> final_start_function (rtx_insn *first, FILE *file,
> 		      int optimize_p ATTRIBUTE_UNUSED)
> {
> +  unsigned HOST_WIDE_INT pad_size = prolog_nop_pad_size;
>   block_depth = 0;
> 
>   this_is_asm_operands = 0;
> @@ -1765,6 +1766,21 @@ final_start_function (rtx_insn *first, FILE *file,
> 
>   high_block_linenum = high_function_linenum = last_linenum;
> 
> +  tree prolog_pad_attr
> +    = lookup_attribute ("prolog_pad", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)));
> +  if (prolog_pad_attr)
> +    {
> +      tree prolog_pad_value = TREE_VALUE (TREE_VALUE (prolog_pad_attr));
> +
> +      if (tree_fits_uhwi_p (prolog_pad_value))
> +	pad_size = tree_to_uhwi (prolog_pad_value);
> +      else
> +	gcc_unreachable ();
> +
> +    }
> +  if (pad_size > 0)
> +    targetm.asm_out.print_prolog_pad (file, pad_size, true);
> +
>   if (flag_sanitize & SANITIZE_ADDRESS)
>     asan_function_start ();
> 
> diff --git a/gcc/target.def b/gcc/target.def
> index d754337..63d3285 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -288,6 +288,12 @@ hidden, protected or internal visibility as specified by @var{visibility}.",
>  void, (tree decl, int visibility),
>  default_assemble_visibility)
> 
> +DEFHOOK
> +(print_prolog_pad,
> + "Generate prologue pad",
> + void, (FILE *file, unsigned HOST_WIDE_INT pad_size, bool record_p),
> + default_print_prolog_pad)
> +
> /* Output the assembler code for entry to a function.  */
> DEFHOOK
> (function_prologue,
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index dcf0863..05ab26d 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -1499,6 +1499,23 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
>   return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
> }
> 
> +void
> +default_print_prolog_pad (FILE *file, unsigned HOST_WIDE_INT pad_size,
> +			  bool record_p)
> +{
> +  if (record_p)
> +    fprintf (file, "1:");
> +  for (unsigned i = 0; i < pad_size; ++i)
> +    fprintf (file, "\tnop\n");
> +  if (record_p)
> +    {
> +      fprintf (file, "\t.section __prolog_pads_loc, \"a\",@progbits\n");
> +      fprintf (file, "\t.quad 1b\n");
> +      fprintf (file, "\t.long %lu\n", pad_size);
> +      fprintf (file, "\t.previous\n");
> +    }
> +}
> +
> bool
> default_profile_before_prologue (void)
> {
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 47b5cfc..d03f993 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -199,6 +199,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
> 						    enum by_pieces_operation,
> 						    bool);
> 
> +extern void default_print_prolog_pad (FILE *, unsigned HOST_WIDE_INT , bool);
> extern bool default_profile_before_prologue (void);
> extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
> extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
> diff --git a/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
> new file mode 100644
> index 0000000..7404dc5
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +
> +void f1(void) __attribute__((prolog_pad(1)));
> +void f2(void) __attribute__((prolog_pad(2)));
> +
> +void
> +f1 (void)
> +{
> +  f2 ();
> +}
> +
> +void f2 (void)
> +{
> +  f1 ();
> +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-27 16:26 ` Szabolcs Nagy
@ 2016-04-28  8:48   ` Maxim Kuvyrkov
  2016-04-28 10:58     ` Szabolcs Nagy
  2016-09-29 15:18     ` [PATCH v2] " Torsten Duwe
  0 siblings, 2 replies; 15+ messages in thread
From: Maxim Kuvyrkov @ 2016-04-28  8:48 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: Torsten Duwe, nd, Li Bin, Jiri Kosina, GCC Patches,
	Marcus Shawcroft, Takahiro Akashi

> On Apr 27, 2016, at 7:26 PM, Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
> 
> On 27/04/16 16:22, Torsten Duwe wrote:
>> Hi Maxim,
>> 
>> thanks for starting the work on this; I have added the missing
>> command line option. It builds now and the resulting compiler generates
>> a linux kernel with the desired properties, so work can continue there.
>> 
>> 	Torsten
> 
> i guess the flag should be documented in invoke.texi
> 
> it's not clear what N means in -fprolog-pad=N, how
> location recording is enabled and how it interacts
> with -fipa-ra. (-pg disables -fipa-ra, but -fprolog-pad
> works without -pg.)

I think, it should be responsibility of the user to disable -fipa-ra if code intended to be patched-in will be incompatible with IPA-RA.  I agree, though, that documentation of -fprolog-pad= should make a special note of this fact and recommend inclusion of -fno-ipa-ra to the cflags whenever -fprolog-pad= is used..

> 
> with -mfentry, by default the user only has to
> implement the fentry call (linux wants nops there, but
> e.g. glibc could use -pg -mfentry for profiling on
> aarch64 and the target specific details are easier to
> document for an -m option than for something general).

I don't understand your point here, could you elaborate, please?

> 
> the nop-padding is more general, but the size and
> layout of nops and the call abi will be target
> specific and the user will most likely need to modify
> the binary (to get the right sequence) which needs
> additional tooling.  i don't know who might use it
> other than linux (which already has tools to deal with
> -mfentry).

Right, but this tooling will require minimal (if any) changes to be adapted to nop-pad approach.  If I remember correctly, recent versions of GCC and kernel for x86_64 generate NOPs, not the call sequence in the prologs when -mfentry is used.

> 
> i'm not against nop-padding, but i think more evidence
> is needed that the generalization is a good idea and
> users can deal with the resulting issues.

--
Maxim Kuvyrkov
www.linaro.org





^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-28  8:48   ` Maxim Kuvyrkov
@ 2016-04-28 10:58     ` Szabolcs Nagy
  2016-04-28 11:32       ` Torsten Duwe
  2016-05-09  5:53       ` AKASHI Takahiro
  2016-09-29 15:18     ` [PATCH v2] " Torsten Duwe
  1 sibling, 2 replies; 15+ messages in thread
From: Szabolcs Nagy @ 2016-04-28 10:58 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: nd, Torsten Duwe, Li Bin, Jiri Kosina, GCC Patches,
	Marcus Shawcroft, Takahiro Akashi

On 28/04/16 09:47, Maxim Kuvyrkov wrote:
>> On Apr 27, 2016, at 7:26 PM, Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
>>
>> with -mfentry, by default the user only has to
>> implement the fentry call (linux wants nops there, but
>> e.g. glibc could use -pg -mfentry for profiling on
>> aarch64 and the target specific details are easier to
>> document for an -m option than for something general).
> 
> I don't understand your point here, could you elaborate, please?
> 

if we only provide -mfentry then

- the kernel can use it (they have tools to nop patch the binary),

- others who don't want to fiddle with nops, just have the call,
can also use it (e.g. user-space profiling cannot really use
something that needs binary patching in case the user prefers
-pg -mfentry over the current -pg behaviour).

- it's target specific, so the magic abi of the fentry call can
be documented by the target according to the specific instruction
sequence that is used. (with nop-padding there are psabi and
compiler optimization interactions that may be hard to document
in a generic way and letting the user figure it out may cause
problems later in compiler development.. but i'm just speculating
based on the powerpc toc handling and ipa-ra findings.)

>> the nop-padding is more general, but the size and
>> layout of nops and the call abi will be target
>> specific and the user will most likely need to modify
>> the binary (to get the right sequence) which needs
>> additional tooling.  i don't know who might use it
>> other than linux (which already has tools to deal with
>> -mfentry).
> 
> Right, but this tooling will require minimal (if any) changes
> to be adapted to nop-pad approach.  If I remember correctly,
> recent versions of GCC and kernel for x86_64 generate NOPs,
> not the call sequence in the prologs when -mfentry is used.

i'm trying to find where this happens in the kernel, but
i only see scripts/recordmcount.{c,pl} which are based on
nop patching the fentry/mcount call sites.

without such call sites the tools have to be implemented
differently and the way the kernel records the call site
positions might not match the prolog-pad recording.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-28  8:40 ` [PATCH] " Maxim Kuvyrkov
@ 2016-04-28 11:18   ` Torsten Duwe
  2016-04-28 15:07     ` Jeff Law
  0 siblings, 1 reply; 15+ messages in thread
From: Torsten Duwe @ 2016-04-28 11:18 UTC (permalink / raw)
  To: Maxim Kuvyrkov; +Cc: Li Bin, Jiri Kosina, gcc-patches

On Thu, Apr 28, 2016 at 11:39:48AM +0300, Maxim Kuvyrkov wrote:
> > On Apr 27, 2016, at 6:22 PM, Torsten Duwe <duwe@suse.de> wrote:
> 
> Your current patch is great for experiments for the kernel engineers to check if suggested approaches to code patching will work.  Still, I prefer to implement LTO-friendly way of handling -fprolog-pad=N via function attributes.

That was exactly my intention. I only wanted *some* working compiler.
I'm sure you compiler people will have a better way to finally implement this.

All I can say so far about the ipa-ra issue is that it'd be great if
x9(?) could be left as volatile / scratch; the rest can be preserved.

	Torsten

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-28 10:58     ` Szabolcs Nagy
@ 2016-04-28 11:32       ` Torsten Duwe
  2016-05-09  5:53       ` AKASHI Takahiro
  1 sibling, 0 replies; 15+ messages in thread
From: Torsten Duwe @ 2016-04-28 11:32 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: Maxim Kuvyrkov, nd, Li Bin, Jiri Kosina, GCC Patches,
	Marcus Shawcroft, Takahiro Akashi

On Thu, Apr 28, 2016 at 11:58:25AM +0100, Szabolcs Nagy wrote:
> On 28/04/16 09:47, Maxim Kuvyrkov wrote:
> >> On Apr 27, 2016, at 7:26 PM, Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
> >>
> >> with -mfentry, by default the user only has to
> >> implement the fentry call (linux wants nops there, but
> >> e.g. glibc could use -pg -mfentry for profiling on
> >> aarch64 and the target specific details are easier to
> >> document for an -m option than for something general).
> > 
> > I don't understand your point here, could you elaborate, please?
> > 
> 
> if we only provide -mfentry then
> 
> - the kernel can use it (they have tools to nop patch the binary),
> 
> - others who don't want to fiddle with nops, just have the call,
> can also use it (e.g. user-space profiling cannot really use
> something that needs binary patching in case the user prefers
> -pg -mfentry over the current -pg behaviour).

Any examples of users not satisfied with the current -pg ;-) ?

> - it's target specific, so the magic abi of the fentry call can
> be documented by the target according to the specific instruction

There's a downside to this: you will have to reimplement it in gcc

  * for every architecture
  * for every ABI variant

while the generic approach is -- well -- somewhat generic :-]

> sequence that is used. (with nop-padding there are psabi and
> compiler optimization interactions that may be hard to document
> in a generic way and letting the user figure it out may cause
> problems later in compiler development.. but i'm just speculating
> based on the powerpc toc handling and ipa-ra findings.)

ipa-ra is from hell ;) At least from a function-patcher's standpoint.
You may argue that OTOH function binary patching is from hell :)

> >> the nop-padding is more general, but the size and
> >> layout of nops and the call abi will be target
> >> specific and the user will most likely need to modify
> >> the binary (to get the right sequence) which needs
> >> additional tooling.  i don't know who might use it
> >> other than linux (which already has tools to deal with
> >> -mfentry).

On exactly 1 (one!) architecture. s390x uses NOP padding, hint, hint...

> i'm trying to find where this happens in the kernel, but
> i only see scripts/recordmcount.{c,pl} which are based on
> nop patching the fentry/mcount call sites.
> 
> without such call sites the tools have to be implemented
> differently and the way the kernel records the call site
> positions might not match the prolog-pad recording.

AFAICS Maxim has provided a nice mechanism to find the NOP pads.

Let's see how far we can get, then discuss this further,
I suggest.

	Torsten

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-28 11:18   ` Torsten Duwe
@ 2016-04-28 15:07     ` Jeff Law
  0 siblings, 0 replies; 15+ messages in thread
From: Jeff Law @ 2016-04-28 15:07 UTC (permalink / raw)
  To: Torsten Duwe, Maxim Kuvyrkov; +Cc: Li Bin, Jiri Kosina, gcc-patches

On 04/28/2016 05:18 AM, Torsten Duwe wrote:
> On Thu, Apr 28, 2016 at 11:39:48AM +0300, Maxim Kuvyrkov wrote:
>>> On Apr 27, 2016, at 6:22 PM, Torsten Duwe <duwe@suse.de> wrote:
>>
>> Your current patch is great for experiments for the kernel engineers to check if suggested approaches to code patching will work.  Still, I prefer to implement LTO-friendly way of handling -fprolog-pad=N via function attributes.
>
> That was exactly my intention. I only wanted *some* working compiler.
> I'm sure you compiler people will have a better way to finally implement this.
Conceptually we have the concept of nops insn patterns, so generically 
I'd implement this by emitting a suitable set of nops followed by a 
scheduling barrier, then thread the mess at the start of the prologue. 
This would be 99.9% target independent changes.

We'd just punt targets that don't represent prologues as RTL.


>
> All I can say so far about the ipa-ra issue is that it'd be great if
> x9(?) could be left as volatile / scratch; the rest can be preserved.
ipa-ra doesn't really work that way.  It just notes what's used in the 
callee and the caller is allowed to look at that information and use it 
to optimize stuff on the caller side.

For example, call-clobbered registers that are not used in the callee 
can be used in the caller to hold values across the call.

This is going to wreck havoc for anything that assumes a call-clobbered 
register can always be safely used in the callee, particularly in the 
patched codepath.  One could argue that the patched codepath is the 
uncommon case and should be responsible for saving/restoring any 
register it uses to ensure it doesn't mess up any visible state.

Jeff


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] add -fprolog-pad=N option to c-family
  2016-04-28 10:58     ` Szabolcs Nagy
  2016-04-28 11:32       ` Torsten Duwe
@ 2016-05-09  5:53       ` AKASHI Takahiro
  1 sibling, 0 replies; 15+ messages in thread
From: AKASHI Takahiro @ 2016-05-09  5:53 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: Maxim Kuvyrkov, nd, Torsten Duwe, Li Bin, Jiri Kosina,
	GCC Patches, Marcus Shawcroft

Hi,

Let me make some comments from the kernel side.

On Thu, Apr 28, 2016 at 11:58:25AM +0100, Szabolcs Nagy wrote:
> On 28/04/16 09:47, Maxim Kuvyrkov wrote:
> >> On Apr 27, 2016, at 7:26 PM, Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
> >>
> >> with -mfentry, by default the user only has to
> >> implement the fentry call (linux wants nops there, but
> >> e.g. glibc could use -pg -mfentry for profiling on
> >> aarch64 and the target specific details are easier to
> >> document for an -m option than for something general).
> > 
> > I don't understand your point here, could you elaborate, please?
> > 
> 
> if we only provide -mfentry then
> 
> - the kernel can use it (they have tools to nop patch the binary),

Do you mean scripts/recordmcount.c,.pl?
This tool is intended to generate __mcount_loc section, which contains
a list of locations of callsites of mcount/fentry, and won't make any
changes to the kernel binary.

> - others who don't want to fiddle with nops, just have the call,
> can also use it (e.g. user-space profiling cannot really use
> something that needs binary patching in case the user prefers
> -pg -mfentry over the current -pg behaviour).

Well, -mfentry is simple and perfect on x86, but seems to be not best-fit
to arm, thinking that -mfentry means that it inserts a callsite at the very
beginning of a function. See a thread of discussions about -mfentry on arm64.

> - it's target specific, so the magic abi of the fentry call can
> be documented by the target according to the specific instruction
> sequence that is used. (with nop-padding there are psabi and
> compiler optimization interactions that may be hard to document
> in a generic way and letting the user figure it out may cause
> problems later in compiler development.. but i'm just speculating
> based on the powerpc toc handling and ipa-ra findings.)
> 
> >> the nop-padding is more general, but the size and
> >> layout of nops and the call abi will be target
> >> specific and the user will most likely need to modify
> >> the binary (to get the right sequence) which needs
> >> additional tooling.  i don't know who might use it
> >> other than linux (which already has tools to deal with
> >> -mfentry).

Please note that code-patching(/nop-padding) is totally up to the kernel
and arch-specific code. The kernel will do that either
- at the initialization of kernel ftrace, or
- at runtime dynamically by user's instructions (through sysfs)

The tool (recordmcount) will never interact with the kernel at runtime.

> > 
> > Right, but this tooling will require minimal (if any) changes
> > to be adapted to nop-pad approach.  If I remember correctly,
> > recent versions of GCC and kernel for x86_64 generate NOPs,
> > not the call sequence in the prologs when -mfentry is used.

I think that Maxim mentioned the following x86-specific gcc options:
- -mrecord-mcount
- -mnop-mcount
but as far as I checked, the current kernel does not utilizes these
options.

> i'm trying to find where this happens in the kernel, but
> i only see scripts/recordmcount.{c,pl} which are based on
> nop patching the fentry/mcount call sites.
> 
> without such call sites the tools have to be implemented
> differently and the way the kernel records the call site
> positions might not match the prolog-pad recording.

Where the callsite resides in a given nop sequence will depend on arch,
but again, this issue can be handled by arch-specific code.

Thanks,
-Takahiro AKASHI

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] add -fprolog-pad=N option to c-family
  2016-04-28  8:48   ` Maxim Kuvyrkov
  2016-04-28 10:58     ` Szabolcs Nagy
@ 2016-09-29 15:18     ` Torsten Duwe
  2016-09-30  9:59       ` Jose E. Marchesi
  2016-10-04 21:46       ` Maxim Kuvyrkov
  1 sibling, 2 replies; 15+ messages in thread
From: Torsten Duwe @ 2016-09-29 15:18 UTC (permalink / raw)
  To: GCC Patches
  Cc: Szabolcs Nagy, nd, Maxim Kuvyrkov, Li Bin, Jiri Kosina,
	Marcus Shawcroft, Takahiro Akashi, Andrew Wafaa

In case anybody missed it, the Linux kernel side to make use
of this has also been finished meanwhile. Of course it can not
be accepted without compiler support; and this feature patch
is much more versatile than just Linux kernel live patching
on a single architecture.

Changes since the previous version
(which in turn was based on Maxim's suggestion):

* Document the feature in *.texi

* Automatically disable IPA-RA, like normal profiling does.
  You never know in advance what the code patched in at run time will do.
  Any optimisation here is potentially wrong.

* record a prolog_nop_pad_size value specified on the command line
  in each function's attributes, so that it survives an LTO pipe.

Signed-off-by: Torsten Duwe <duwe@suse.de>

diff --git a/gcc/attribs.c b/gcc/attribs.c
index 16996e9..a5cf2aa 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -365,6 +365,21 @@ decl_attributes (tree *node, tree attributes, int flags)
   if (!attributes_initialized)
     init_attributes ();
 
+  /* If we're building NOP pads because of a command line arg, note the size
+     for LTO builds, unless the attribute has already been overridden. */
+  if (TREE_CODE (*node) == FUNCTION_DECL && prolog_nop_pad_size > 0)
+    {
+      tree pp_attr = lookup_attribute ("prolog_pad", attributes);
+      if (! pp_attr )
+	{
+	  tree pp_size = build_int_cstu (integer_type_node, prolog_nop_pad_size);
+
+	  attributes = tree_cons (get_identifier ("prolog_pad"),
+				  tree_cons ( NULL_TREE, pp_size, NULL_TREE),
+				  attributes);
+	}
+    }
+
   /* If this is a function and the user used #pragma GCC optimize, add the
      options to the attribute((optimize(...))) list.  */
   if (TREE_CODE (*node) == FUNCTION_DECL && current_optimize_pragma)
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index f2846bb..278a99e 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -393,6 +393,7 @@ static tree handle_designated_init_attribute (tree *, tree, tree, int, bool *);
 static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *);
 static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
 static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
+static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *);
 
 static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
 static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
@@ -834,6 +835,8 @@ const struct attribute_spec c_common_attribute_table[] =
 			      handle_bnd_legacy, false },
   { "bnd_instrument",         0, 0, true, false, false,
 			      handle_bnd_instrument, false },
+  { "prolog_pad",	      1, 1, false, true, true,
+			      handle_prolog_pad_attribute, false },
   { NULL,                     0, 0, false, false, false, NULL, false }
 };
 
@@ -9687,6 +9690,14 @@ handle_designated_init_attribute (tree *node, tree name, tree, int,
   return NULL_TREE;
 }
 
+static tree
+handle_prolog_pad_attribute (tree *, tree, tree, int,
+			     bool *)
+{
+  /* Nothing to be done here. */
+  return NULL_TREE;
+}
+
 \f
 /* Check for valid arguments being passed to a function with FNTYPE.
    There are NARGS arguments in the array ARGARRAY.  LOC should be used for
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index fec58bc..0220faa 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -536,6 +536,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
       cpp_opts->ext_numeric_literals = value;
       break;
 
+    case OPT_fprolog_pad_:
+      prolog_nop_pad_size = value;
+      break;
+
     case OPT_idirafter:
       add_path (xstrdup (arg), AFTER, 0, true);
       break;
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 88038a0..804c654 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1427,6 +1427,10 @@ fpreprocessed
 C ObjC C++ ObjC++
 Treat the input file as already preprocessed.
 
+fprolog-pad=
+C ObjC C++ ObjC++ RejectNegative Joined UInteger Var(prolog_nop_pad_size) Init(0)
+Pad NOPs before each function prolog
+
 ftrack-macro-expansion
 C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
 ; converted into ftrack-macro-expansion=
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 2ed9285..463a5a5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10387,6 +10387,16 @@ of the function name, it is considered to be a match.  For C99 and C++
 extended identifiers, the function name must be given in UTF-8, not
 using universal character names.
 
+@item -fprolog-pad=N
+@opindex fprolog-pad
+Generate a pad of N nops right at the beginning
+of each function, which can be used to patch in any desired
+instrumentation at run time, provided that the code segment
+is writeable. For run time identification, the starting addresses
+of these pads, which correspond to their respective functions,
+are additionally collected in the @code{__prolog_pads_loc} section
+of the resulting binary.
+
 @end table
 
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 745910f..f6f9d59 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4553,6 +4553,10 @@ will select the smallest suitable mode.
 This section describes the macros that output function entry
 (@dfn{prologue}) and exit (@dfn{epilogue}) code.
 
+@deftypefn {Target Hook} void TARGET_ASM_PRINT_PROLOG_PAD (FILE *@var{file}, unsigned HOST_WIDE_INT @var{pad_size}, bool @var{record_p})
+Generate prologue pad
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_ASM_FUNCTION_PROLOGUE (FILE *@var{file}, HOST_WIDE_INT @var{size})
 If defined, a function that outputs the assembler code for entry to a
 function.  The prologue is responsible for setting up the stack frame,
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f31c763..b9585da 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3662,6 +3662,8 @@ will select the smallest suitable mode.
 This section describes the macros that output function entry
 (@dfn{prologue}) and exit (@dfn{epilogue}) code.
 
+@hook TARGET_ASM_PRINT_PROLOG_PAD
+
 @hook TARGET_ASM_FUNCTION_PROLOGUE
 
 @hook TARGET_ASM_FUNCTION_END_PROLOGUE
diff --git a/gcc/final.c b/gcc/final.c
index 55cf509..94bbf8f 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -1754,6 +1754,7 @@ void
 final_start_function (rtx_insn *first, FILE *file,
 		      int optimize_p ATTRIBUTE_UNUSED)
 {
+  unsigned HOST_WIDE_INT pad_size = prolog_nop_pad_size;
   block_depth = 0;
 
   this_is_asm_operands = 0;
@@ -1766,6 +1767,21 @@ final_start_function (rtx_insn *first, FILE *file,
 
   high_block_linenum = high_function_linenum = last_linenum;
 
+  tree prolog_pad_attr
+    = lookup_attribute ("prolog_pad", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)));
+  if (prolog_pad_attr)
+    {
+      tree prolog_pad_value = TREE_VALUE (TREE_VALUE (prolog_pad_attr));
+
+      if (tree_fits_uhwi_p (prolog_pad_value))
+	pad_size = tree_to_uhwi (prolog_pad_value);
+      else
+	gcc_unreachable ();
+
+    }
+  if (pad_size > 0)
+    targetm.asm_out.print_prolog_pad (file, pad_size, true);
+
   if (flag_sanitize & SANITIZE_ADDRESS)
     asan_function_start ();
 
diff --git a/gcc/target.def b/gcc/target.def
index 20f2b32..e0a4cc4 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -288,6 +288,12 @@ hidden, protected or internal visibility as specified by @var{visibility}.",
  void, (tree decl, int visibility),
  default_assemble_visibility)
 
+DEFHOOK
+(print_prolog_pad,
+ "Generate prologue pad",
+ void, (FILE *file, unsigned HOST_WIDE_INT pad_size, bool record_p),
+ default_print_prolog_pad)
+
 /* Output the assembler code for entry to a function.  */
 DEFHOOK
 (function_prologue,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index a342277..41e9850 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1503,6 +1503,22 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
 }
 
+void
+default_print_prolog_pad (FILE *file, unsigned HOST_WIDE_INT pad_size,
+			  bool record_p)
+{
+  if (record_p)
+    fprintf (file, "1:");
+  for (unsigned i = 0; i < pad_size; ++i)
+    fprintf (file, "\tnop\n");
+  if (record_p)
+    {
+      fprintf (file, "\t.section __prolog_pads_loc, \"a\",@progbits\n");
+      fprintf (file, "\t.quad 1b\n");
+      fprintf (file, "\t.previous\n");
+    }
+}
+
 bool
 default_profile_before_prologue (void)
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39..6bb41c4 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -200,6 +200,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
 						    enum by_pieces_operation,
 						    bool);
 
+extern void default_print_prolog_pad (FILE *, unsigned HOST_WIDE_INT , bool);
 extern bool default_profile_before_prologue (void);
 extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
 extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
diff --git a/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
new file mode 100644
index 0000000..7404dc5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+void f1(void) __attribute__((prolog_pad(1)));
+void f2(void) __attribute__((prolog_pad(2)));
+
+void
+f1 (void)
+{
+  f2 ();
+}
+
+void f2 (void)
+{
+  f1 ();
+}
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 8979d26..2339ce8 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1580,8 +1580,10 @@ process_options (void)
     }
 
  /* Do not use IPA optimizations for register allocation if profiler is active
+    or prolog-pads are inserted for run-time instrumentation
     or port does not emit prologue and epilogue as RTL.  */
-  if (profile_flag || !targetm.have_prologue () || !targetm.have_epilogue ())
+  if (profile_flag || prolog_nop_pad_size ||
+      !targetm.have_prologue () || !targetm.have_epilogue ())
     flag_ipa_ra = 0;
 
   /* Enable -Werror=coverage-mismatch when -Werror and -Wno-error

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] add -fprolog-pad=N option to c-family
  2016-09-29 15:18     ` [PATCH v2] " Torsten Duwe
@ 2016-09-30  9:59       ` Jose E. Marchesi
  2016-09-30 10:22         ` Torsten Duwe
  2016-10-03  9:45         ` AKASHI Takahiro
  2016-10-04 21:46       ` Maxim Kuvyrkov
  1 sibling, 2 replies; 15+ messages in thread
From: Jose E. Marchesi @ 2016-09-30  9:59 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: GCC Patches, Szabolcs Nagy, nd, Maxim Kuvyrkov, Li Bin,
	Jiri Kosina, Marcus Shawcroft, Takahiro Akashi, Andrew Wafaa


    In case anybody missed it, the Linux kernel side to make use
    of this has also been finished meanwhile. Of course it can not
    be accepted without compiler support; and this feature patch
    is much more versatile than just Linux kernel live patching
    on a single architecture.

How is this supposed to be exploited atomically in RISC arches such as
sparc?  In such architectures you usually need to patch several
instructions to load an absolute address into a register.

If a general mechanism is what is intended I would suggest to offer the
possibility of extending the nops _before_ the function entry point,
like in:

(a) nop   ! Load address
    nop   ! Load address
    nop   ! Load address
    nop   ! Load address
    nop   ! Jump to loaded address.
entry:
(b) nop   ! PC-relative jump to (a)
    save %sp, bleh, %sp
    ...

So after the live-patcher patches the loading of the destination address
and the jump, it can atomically patch (b) to effectively replace the
implementation of `entry'.

Wdyt?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] add -fprolog-pad=N option to c-family
  2016-09-30  9:59       ` Jose E. Marchesi
@ 2016-09-30 10:22         ` Torsten Duwe
  2016-10-03  9:45         ` AKASHI Takahiro
  1 sibling, 0 replies; 15+ messages in thread
From: Torsten Duwe @ 2016-09-30 10:22 UTC (permalink / raw)
  To: Jose E. Marchesi
  Cc: GCC Patches, Szabolcs Nagy, nd, Maxim Kuvyrkov, Li Bin,
	Jiri Kosina, Marcus Shawcroft, Takahiro Akashi, Andrew Wafaa

On Fri, Sep 30, 2016 at 12:01:47PM +0200, Jose E. Marchesi wrote:
> 
> How is this supposed to be exploited atomically in RISC arches such as
> sparc?  In such architectures you usually need to patch several
> instructions to load an absolute address into a register.

http://mpe.github.io/posts/2016/05/23/kernel-live-patching-for-ppc64le/

Haven't looked at sparc yet.

HTH,
	Torsten

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] add -fprolog-pad=N option to c-family
  2016-09-30  9:59       ` Jose E. Marchesi
  2016-09-30 10:22         ` Torsten Duwe
@ 2016-10-03  9:45         ` AKASHI Takahiro
  1 sibling, 0 replies; 15+ messages in thread
From: AKASHI Takahiro @ 2016-10-03  9:45 UTC (permalink / raw)
  To: Jose E. Marchesi
  Cc: Torsten Duwe, GCC Patches, Szabolcs Nagy, nd, Maxim Kuvyrkov,
	Li Bin, Jiri Kosina, Marcus Shawcroft, Andrew Wafaa

On Fri, Sep 30, 2016 at 12:01:47PM +0200, Jose E. Marchesi wrote:
> 
>     In case anybody missed it, the Linux kernel side to make use
>     of this has also been finished meanwhile. Of course it can not
>     be accepted without compiler support; and this feature patch
>     is much more versatile than just Linux kernel live patching
>     on a single architecture.
> 
> How is this supposed to be exploited atomically in RISC arches such as
> sparc?  In such architectures you usually need to patch several
> instructions to load an absolute address into a register.

We had some disucssions in the context of arm64:
https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01093.html

But I don't think that we reached a final consensus at that time.

Thanks,
-Takahiro AKASHI

> If a general mechanism is what is intended I would suggest to offer the
> possibility of extending the nops _before_ the function entry point,
> like in:
> 
> (a) nop   ! Load address
>     nop   ! Load address
>     nop   ! Load address
>     nop   ! Load address
>     nop   ! Jump to loaded address.
> entry:
> (b) nop   ! PC-relative jump to (a)
>     save %sp, bleh, %sp
>     ...
> 
> So after the live-patcher patches the loading of the destination address
> and the jump, it can atomically patch (b) to effectively replace the
> implementation of `entry'.
> 
> Wdyt?
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] add -fprolog-pad=N option to c-family
  2016-09-29 15:18     ` [PATCH v2] " Torsten Duwe
  2016-09-30  9:59       ` Jose E. Marchesi
@ 2016-10-04 21:46       ` Maxim Kuvyrkov
  2016-12-06 14:02         ` Maxim Kuvyrkov
  1 sibling, 1 reply; 15+ messages in thread
From: Maxim Kuvyrkov @ 2016-10-04 21:46 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: GCC Patches, Szabolcs Nagy, nd, Li Bin, Jiri Kosina,
	Marcus Shawcroft, Takahiro Akashi, Andrew Wafaa,
	Richard Earnshaw

> On Sep 29, 2016, at 11:14 AM, Torsten Duwe <duwe@suse.de> wrote:
> 
> In case anybody missed it, the Linux kernel side to make use
> of this has also been finished meanwhile. Of course it can not
> be accepted without compiler support; and this feature patch
> is much more versatile than just Linux kernel live patching
> on a single architecture.

Hi Torsten,

Good job moving -fprolog-pad= forward!  I've reviewed your patch, and it looks OK with the minor comments inline.  I've CC'ed Richard E. since you will need a global maintainer approval for this change.

Ideally, I want to improve support for -fprolog-pad=N and __attribute__((prolog_pad(N))) to provide functionality to also output pad before the function label to address use-cases for s390, sparc, etc (what Jose E. Marchesi was referring to).  I.e., -fprolog-pad= option would accept both -fprolog-pad=N and -fprolog-pad=M,N forms -- issue M nops before function label and N nops after function label.  Similarly for __attribute__((prolog_pad(N,M))).  I (or you :-) ) can attempt to implement this functionality before stage1 closes, but it should not block this initial patch.

Comments on the patch below.

> 
> Changes since the previous version
> (which in turn was based on Maxim's suggestion):
> 
> * Document the feature in *.texi
> 
> * Automatically disable IPA-RA, like normal profiling does.
>  You never know in advance what the code patched in at run time will do.
>  Any optimisation here is potentially wrong.
> 
> * record a prolog_nop_pad_size value specified on the command line
>  in each function's attributes, so that it survives an LTO pipe.
> 
> Signed-off-by: Torsten Duwe <duwe@suse.de>
> 
> diff --git a/gcc/attribs.c b/gcc/attribs.c
> index 16996e9..a5cf2aa 100644
> --- a/gcc/attribs.c
> +++ b/gcc/attribs.c
> @@ -365,6 +365,21 @@ decl_attributes (tree *node, tree attributes, int flags)
>   if (!attributes_initialized)
>     init_attributes ();
> 
> +  /* If we're building NOP pads because of a command line arg, note the size
> +     for LTO builds, unless the attribute has already been overridden. */
> +  if (TREE_CODE (*node) == FUNCTION_DECL && prolog_nop_pad_size > 0)
> +    {
> +      tree pp_attr = lookup_attribute ("prolog_pad", attributes);
> +      if (! pp_attr )

Coding style: should be "if (!pp_attr)"

Also, this clause implies that attribute takes precedence over command-line option, and the two are not attempted to be merged.  I agree that this is a reasonable approach, but consider adding a warning if prolog_nop_pad_size is bigger than the value of the existing attribute.  Something like ...

> +	{
> +	  tree pp_size = build_int_cstu (integer_type_node, prolog_nop_pad_size);
> +
> +	  attributes = tree_cons (get_identifier ("prolog_pad"),
> +				  tree_cons ( NULL_TREE, pp_size, NULL_TREE),
> +				  attributes);
> +	}

... here:

else if (ATTRIBUTE_VALUE (pp_attr) > prolog_nop_pad_size)
  warning (OPT_Wattributes, "Prologue pad might be truncated: <FUNCTION_NAME>", node);

Also, I suggest to issue a warning here if ipa-ra is not disabled, and adding a comment to the documentation about issues with interoperability of -fprolog-pad/__attribute__((prolog_pad)) and IPA RA..

> +    }
> +
>   /* If this is a function and the user used #pragma GCC optimize, add the
>      options to the attribute((optimize(...))) list.  */
>   if (TREE_CODE (*node) == FUNCTION_DECL && current_optimize_pragma)
> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> index f2846bb..278a99e 100644
> --- a/gcc/c-family/c-common.c
> +++ b/gcc/c-family/c-common.c
> @@ -393,6 +393,7 @@ static tree handle_designated_init_attribute (tree *, tree, tree, int, bool *);
> static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *);
> static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
> static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
> +static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *);
> 
> static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
> static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
> @@ -834,6 +835,8 @@ const struct attribute_spec c_common_attribute_table[] =
> 			      handle_bnd_legacy, false },
>   { "bnd_instrument",         0, 0, true, false, false,
> 			      handle_bnd_instrument, false },
> +  { "prolog_pad",	      1, 1, false, true, true,
> +			      handle_prolog_pad_attribute, false },

The patch also needs to document new attribute in doc/extend.texi

>   { NULL,                     0, 0, false, false, false, NULL, false }
> };
> 
> @@ -9687,6 +9690,14 @@ handle_designated_init_attribute (tree *node, tree name, tree, int,
>   return NULL_TREE;
> }
> 
> +static tree
> +handle_prolog_pad_attribute (tree *, tree, tree, int,
> +			     bool *)
> +{
> +  /* Nothing to be done here. */
> +  return NULL_TREE;
> +}
> +
> 
> /* Check for valid arguments being passed to a function with FNTYPE.
>    There are NARGS arguments in the array ARGARRAY.  LOC should be used for
> diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
> index fec58bc..0220faa 100644
> --- a/gcc/c-family/c-opts.c
> +++ b/gcc/c-family/c-opts.c
> @@ -536,6 +536,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
>       cpp_opts->ext_numeric_literals = value;
>       break;
> 
> +    case OPT_fprolog_pad_:
> +      prolog_nop_pad_size = value;
> +      break;
> +
>     case OPT_idirafter:
>       add_path (xstrdup (arg), AFTER, 0, true);
>       break;
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index 88038a0..804c654 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -1427,6 +1427,10 @@ fpreprocessed
> C ObjC C++ ObjC++
> Treat the input file as already preprocessed.
> 
> +fprolog-pad=
> +C ObjC C++ ObjC++ RejectNegative Joined UInteger Var(prolog_nop_pad_size) Init(0)
> +Pad NOPs before each function prolog
> +
> ftrack-macro-expansion
> C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
> ; converted into ftrack-macro-expansion=
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 2ed9285..463a5a5 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -10387,6 +10387,16 @@ of the function name, it is considered to be a match.  For C99 and C++
> extended identifiers, the function name must be given in UTF-8, not
> using universal character names.
> 
> +@item -fprolog-pad=N
> +@opindex fprolog-pad
> +Generate a pad of N nops right at the beginning
> +of each function, which can be used to patch in any desired
> +instrumentation at run time, provided that the code segment
> +is writeable. For run time identification, the starting addresses
> +of these pads, which correspond to their respective functions,
> +are additionally collected in the @code{__prolog_pads_loc} section
> +of the resulting binary.

... Note that value of @code{__attribute__((prolog_pad(N)))} takes precedence over command-line option -fprolog_pad=N.

> +
> @end table
> 
> 
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 745910f..f6f9d59 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -4553,6 +4553,10 @@ will select the smallest suitable mode.
> This section describes the macros that output function entry
> (@dfn{prologue}) and exit (@dfn{epilogue}) code.
> 
> +@deftypefn {Target Hook} void TARGET_ASM_PRINT_PROLOG_PAD (FILE *@var{file}, unsigned HOST_WIDE_INT @var{pad_size}, bool @var{record_p})
> +Generate prologue pad
> +@end deftypefn
> +
> @deftypefn {Target Hook} void TARGET_ASM_FUNCTION_PROLOGUE (FILE *@var{file}, HOST_WIDE_INT @var{size})
> If defined, a function that outputs the assembler code for entry to a
> function.  The prologue is responsible for setting up the stack frame,
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index f31c763..b9585da 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -3662,6 +3662,8 @@ will select the smallest suitable mode.
> This section describes the macros that output function entry
> (@dfn{prologue}) and exit (@dfn{epilogue}) code.
> 
> +@hook TARGET_ASM_PRINT_PROLOG_PAD
> +
> @hook TARGET_ASM_FUNCTION_PROLOGUE
> 
> @hook TARGET_ASM_FUNCTION_END_PROLOGUE
> diff --git a/gcc/final.c b/gcc/final.c
> index 55cf509..94bbf8f 100644
> --- a/gcc/final.c
> +++ b/gcc/final.c
> @@ -1754,6 +1754,7 @@ void
> final_start_function (rtx_insn *first, FILE *file,
> 		      int optimize_p ATTRIBUTE_UNUSED)
> {
> +  unsigned HOST_WIDE_INT pad_size = prolog_nop_pad_size;

Declaration of pad_size can be moved under "if (prolog_pad_attr)" clause, since we don't need value of prolog_nop_pad_size here anymore (it's already encoded in the __attribute__).

>   block_depth = 0;
> 
>   this_is_asm_operands = 0;
> @@ -1766,6 +1767,21 @@ final_start_function (rtx_insn *first, FILE *file,
> 
>   high_block_linenum = high_function_linenum = last_linenum;
> 
> +  tree prolog_pad_attr
> +    = lookup_attribute ("prolog_pad", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)));
> +  if (prolog_pad_attr)
> +    {
> +      tree prolog_pad_value = TREE_VALUE (TREE_VALUE (prolog_pad_attr));
> +
> +      if (tree_fits_uhwi_p (prolog_pad_value))
> +	pad_size = tree_to_uhwi (prolog_pad_value);
> +      else
> +	gcc_unreachable ();
> +
> +    }
> +  if (pad_size > 0)
> +    targetm.asm_out.print_prolog_pad (file, pad_size, true);

Similarly, this clause can be moved inside "if (prolog_pad_attr)" clause.

> +
>   if (flag_sanitize & SANITIZE_ADDRESS)
>     asan_function_start ();
> 
> diff --git a/gcc/target.def b/gcc/target.def
> index 20f2b32..e0a4cc4 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -288,6 +288,12 @@ hidden, protected or internal visibility as specified by @var{visibility}.",
>  void, (tree decl, int visibility),
>  default_assemble_visibility)
> 
> +DEFHOOK
> +(print_prolog_pad,
> + "Generate prologue pad",
> + void, (FILE *file, unsigned HOST_WIDE_INT pad_size, bool record_p),
> + default_print_prolog_pad)
> +
> /* Output the assembler code for entry to a function.  */
> DEFHOOK
> (function_prologue,
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index a342277..41e9850 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -1503,6 +1503,22 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
>   return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
> }
> 
> +void
> +default_print_prolog_pad (FILE *file, unsigned HOST_WIDE_INT pad_size,
> +			  bool record_p)
> +{
> +  if (record_p)
> +    fprintf (file, "1:");
> +  for (unsigned i = 0; i < pad_size; ++i)
> +    fprintf (file, "\tnop\n");
> +  if (record_p)
> +    {
> +      fprintf (file, "\t.section __prolog_pads_loc, \"a\",@progbits\n");
> +      fprintf (file, "\t.quad 1b\n");
> +      fprintf (file, "\t.previous\n");
> +    }
> +}
> +
> bool
> default_profile_before_prologue (void)
> {
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 7687c39..6bb41c4 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -200,6 +200,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
> 						    enum by_pieces_operation,
> 						    bool);
> 
> +extern void default_print_prolog_pad (FILE *, unsigned HOST_WIDE_INT , bool);
> extern bool default_profile_before_prologue (void);
> extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
> extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
> diff --git a/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
> new file mode 100644
> index 0000000..7404dc5
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +
> +void f1(void) __attribute__((prolog_pad(1)));
> +void f2(void) __attribute__((prolog_pad(2)));
> +
> +void
> +f1 (void)
> +{
> +  f2 ();
> +}
> +
> +void f2 (void)
> +{
> +  f1 ();
> +}
> diff --git a/gcc/toplev.c b/gcc/toplev.c
> index 8979d26..2339ce8 100644
> --- a/gcc/toplev.c
> +++ b/gcc/toplev.c
> @@ -1580,8 +1580,10 @@ process_options (void)
>     }
> 
>  /* Do not use IPA optimizations for register allocation if profiler is active
> +    or prolog-pads are inserted for run-time instrumentation
>     or port does not emit prologue and epilogue as RTL.  */
> -  if (profile_flag || !targetm.have_prologue () || !targetm.have_epilogue ())
> +  if (profile_flag || prolog_nop_pad_size ||
> +      !targetm.have_prologue () || !targetm.have_epilogue ())
>     flag_ipa_ra = 0;
> 
>   /* Enable -Werror=coverage-mismatch when -Werror and -Wno-error

Thanks!

--
Maxim Kuvyrkov
www.linaro.org





^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] add -fprolog-pad=N option to c-family
  2016-10-04 21:46       ` Maxim Kuvyrkov
@ 2016-12-06 14:02         ` Maxim Kuvyrkov
  0 siblings, 0 replies; 15+ messages in thread
From: Maxim Kuvyrkov @ 2016-12-06 14:02 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Torsten Duwe, GCC Patches, Szabolcs Nagy, nd, Li Bin,
	Jiri Kosina, Marcus Shawcroft, Takahiro Akashi, Andrew Wafaa,
	Richard Earnshaw

> On Oct 5, 2016, at 12:45 AM, Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> wrote:
> 
>> On Sep 29, 2016, at 11:14 AM, Torsten Duwe <duwe@suse.de> wrote:
>> 
>> In case anybody missed it, the Linux kernel side to make use
>> of this has also been finished meanwhile. Of course it can not
>> be accepted without compiler support; and this feature patch
>> is much more versatile than just Linux kernel live patching
>> on a single architecture.
> 
> Hi Torsten,
> 
> Good job moving -fprolog-pad= forward!  I've reviewed your patch, and it looks OK with the minor comments inline.  I've CC'ed Richard E. since you will need a global maintainer approval for this change.

Ping?

--
Maxim Kuvyrkov
www.linaro.org


> 
> Ideally, I want to improve support for -fprolog-pad=N and __attribute__((prolog_pad(N))) to provide functionality to also output pad before the function label to address use-cases for s390, sparc, etc (what Jose E. Marchesi was referring to).  I.e., -fprolog-pad= option would accept both -fprolog-pad=N and -fprolog-pad=M,N forms -- issue M nops before function label and N nops after function label.  Similarly for __attribute__((prolog_pad(N,M))).  I (or you :-) ) can attempt to implement this functionality before stage1 closes, but it should not block this initial patch.
> 
> Comments on the patch below.
> 
>> 
>> Changes since the previous version
>> (which in turn was based on Maxim's suggestion):
>> 
>> * Document the feature in *.texi
>> 
>> * Automatically disable IPA-RA, like normal profiling does.
>> You never know in advance what the code patched in at run time will do.
>> Any optimisation here is potentially wrong.
>> 
>> * record a prolog_nop_pad_size value specified on the command line
>> in each function's attributes, so that it survives an LTO pipe.
>> 
>> Signed-off-by: Torsten Duwe <duwe@suse.de>
>> 
>> diff --git a/gcc/attribs.c b/gcc/attribs.c
>> index 16996e9..a5cf2aa 100644
>> --- a/gcc/attribs.c
>> +++ b/gcc/attribs.c
>> @@ -365,6 +365,21 @@ decl_attributes (tree *node, tree attributes, int flags)
>>  if (!attributes_initialized)
>>    init_attributes ();
>> 
>> +  /* If we're building NOP pads because of a command line arg, note the size
>> +     for LTO builds, unless the attribute has already been overridden. */
>> +  if (TREE_CODE (*node) == FUNCTION_DECL && prolog_nop_pad_size > 0)
>> +    {
>> +      tree pp_attr = lookup_attribute ("prolog_pad", attributes);
>> +      if (! pp_attr )
> 
> Coding style: should be "if (!pp_attr)"
> 
> Also, this clause implies that attribute takes precedence over command-line option, and the two are not attempted to be merged.  I agree that this is a reasonable approach, but consider adding a warning if prolog_nop_pad_size is bigger than the value of the existing attribute.  Something like ...
> 
>> +	{
>> +	  tree pp_size = build_int_cstu (integer_type_node, prolog_nop_pad_size);
>> +
>> +	  attributes = tree_cons (get_identifier ("prolog_pad"),
>> +				  tree_cons ( NULL_TREE, pp_size, NULL_TREE),
>> +				  attributes);
>> +	}
> 
> ... here:
> 
> else if (ATTRIBUTE_VALUE (pp_attr) > prolog_nop_pad_size)
>  warning (OPT_Wattributes, "Prologue pad might be truncated: <FUNCTION_NAME>", node);
> 
> Also, I suggest to issue a warning here if ipa-ra is not disabled, and adding a comment to the documentation about issues with interoperability of -fprolog-pad/__attribute__((prolog_pad)) and IPA RA..
> 
>> +    }
>> +
>>  /* If this is a function and the user used #pragma GCC optimize, add the
>>     options to the attribute((optimize(...))) list.  */
>>  if (TREE_CODE (*node) == FUNCTION_DECL && current_optimize_pragma)
>> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
>> index f2846bb..278a99e 100644
>> --- a/gcc/c-family/c-common.c
>> +++ b/gcc/c-family/c-common.c
>> @@ -393,6 +393,7 @@ static tree handle_designated_init_attribute (tree *, tree, tree, int, bool *);
>> static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *);
>> static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
>> static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
>> +static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *);
>> 
>> static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
>> static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
>> @@ -834,6 +835,8 @@ const struct attribute_spec c_common_attribute_table[] =
>> 			      handle_bnd_legacy, false },
>>  { "bnd_instrument",         0, 0, true, false, false,
>> 			      handle_bnd_instrument, false },
>> +  { "prolog_pad",	      1, 1, false, true, true,
>> +			      handle_prolog_pad_attribute, false },
> 
> The patch also needs to document new attribute in doc/extend.texi
> 
>>  { NULL,                     0, 0, false, false, false, NULL, false }
>> };
>> 
>> @@ -9687,6 +9690,14 @@ handle_designated_init_attribute (tree *node, tree name, tree, int,
>>  return NULL_TREE;
>> }
>> 
>> +static tree
>> +handle_prolog_pad_attribute (tree *, tree, tree, int,
>> +			     bool *)
>> +{
>> +  /* Nothing to be done here. */
>> +  return NULL_TREE;
>> +}
>> +
>> 
>> /* Check for valid arguments being passed to a function with FNTYPE.
>>   There are NARGS arguments in the array ARGARRAY.  LOC should be used for
>> diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
>> index fec58bc..0220faa 100644
>> --- a/gcc/c-family/c-opts.c
>> +++ b/gcc/c-family/c-opts.c
>> @@ -536,6 +536,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
>>      cpp_opts->ext_numeric_literals = value;
>>      break;
>> 
>> +    case OPT_fprolog_pad_:
>> +      prolog_nop_pad_size = value;
>> +      break;
>> +
>>    case OPT_idirafter:
>>      add_path (xstrdup (arg), AFTER, 0, true);
>>      break;
>> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
>> index 88038a0..804c654 100644
>> --- a/gcc/c-family/c.opt
>> +++ b/gcc/c-family/c.opt
>> @@ -1427,6 +1427,10 @@ fpreprocessed
>> C ObjC C++ ObjC++
>> Treat the input file as already preprocessed.
>> 
>> +fprolog-pad=
>> +C ObjC C++ ObjC++ RejectNegative Joined UInteger Var(prolog_nop_pad_size) Init(0)
>> +Pad NOPs before each function prolog
>> +
>> ftrack-macro-expansion
>> C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
>> ; converted into ftrack-macro-expansion=
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index 2ed9285..463a5a5 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -10387,6 +10387,16 @@ of the function name, it is considered to be a match.  For C99 and C++
>> extended identifiers, the function name must be given in UTF-8, not
>> using universal character names.
>> 
>> +@item -fprolog-pad=N
>> +@opindex fprolog-pad
>> +Generate a pad of N nops right at the beginning
>> +of each function, which can be used to patch in any desired
>> +instrumentation at run time, provided that the code segment
>> +is writeable. For run time identification, the starting addresses
>> +of these pads, which correspond to their respective functions,
>> +are additionally collected in the @code{__prolog_pads_loc} section
>> +of the resulting binary.
> 
> ... Note that value of @code{__attribute__((prolog_pad(N)))} takes precedence over command-line option -fprolog_pad=N.
> 
>> +
>> @end table
>> 
>> 
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index 745910f..f6f9d59 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -4553,6 +4553,10 @@ will select the smallest suitable mode.
>> This section describes the macros that output function entry
>> (@dfn{prologue}) and exit (@dfn{epilogue}) code.
>> 
>> +@deftypefn {Target Hook} void TARGET_ASM_PRINT_PROLOG_PAD (FILE *@var{file}, unsigned HOST_WIDE_INT @var{pad_size}, bool @var{record_p})
>> +Generate prologue pad
>> +@end deftypefn
>> +
>> @deftypefn {Target Hook} void TARGET_ASM_FUNCTION_PROLOGUE (FILE *@var{file}, HOST_WIDE_INT @var{size})
>> If defined, a function that outputs the assembler code for entry to a
>> function.  The prologue is responsible for setting up the stack frame,
>> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
>> index f31c763..b9585da 100644
>> --- a/gcc/doc/tm.texi.in
>> +++ b/gcc/doc/tm.texi.in
>> @@ -3662,6 +3662,8 @@ will select the smallest suitable mode.
>> This section describes the macros that output function entry
>> (@dfn{prologue}) and exit (@dfn{epilogue}) code.
>> 
>> +@hook TARGET_ASM_PRINT_PROLOG_PAD
>> +
>> @hook TARGET_ASM_FUNCTION_PROLOGUE
>> 
>> @hook TARGET_ASM_FUNCTION_END_PROLOGUE
>> diff --git a/gcc/final.c b/gcc/final.c
>> index 55cf509..94bbf8f 100644
>> --- a/gcc/final.c
>> +++ b/gcc/final.c
>> @@ -1754,6 +1754,7 @@ void
>> final_start_function (rtx_insn *first, FILE *file,
>> 		      int optimize_p ATTRIBUTE_UNUSED)
>> {
>> +  unsigned HOST_WIDE_INT pad_size = prolog_nop_pad_size;
> 
> Declaration of pad_size can be moved under "if (prolog_pad_attr)" clause, since we don't need value of prolog_nop_pad_size here anymore (it's already encoded in the __attribute__).
> 
>>  block_depth = 0;
>> 
>>  this_is_asm_operands = 0;
>> @@ -1766,6 +1767,21 @@ final_start_function (rtx_insn *first, FILE *file,
>> 
>>  high_block_linenum = high_function_linenum = last_linenum;
>> 
>> +  tree prolog_pad_attr
>> +    = lookup_attribute ("prolog_pad", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)));
>> +  if (prolog_pad_attr)
>> +    {
>> +      tree prolog_pad_value = TREE_VALUE (TREE_VALUE (prolog_pad_attr));
>> +
>> +      if (tree_fits_uhwi_p (prolog_pad_value))
>> +	pad_size = tree_to_uhwi (prolog_pad_value);
>> +      else
>> +	gcc_unreachable ();
>> +
>> +    }
>> +  if (pad_size > 0)
>> +    targetm.asm_out.print_prolog_pad (file, pad_size, true);
> 
> Similarly, this clause can be moved inside "if (prolog_pad_attr)" clause.
> 
>> +
>>  if (flag_sanitize & SANITIZE_ADDRESS)
>>    asan_function_start ();
>> 
>> diff --git a/gcc/target.def b/gcc/target.def
>> index 20f2b32..e0a4cc4 100644
>> --- a/gcc/target.def
>> +++ b/gcc/target.def
>> @@ -288,6 +288,12 @@ hidden, protected or internal visibility as specified by @var{visibility}.",
>> void, (tree decl, int visibility),
>> default_assemble_visibility)
>> 
>> +DEFHOOK
>> +(print_prolog_pad,
>> + "Generate prologue pad",
>> + void, (FILE *file, unsigned HOST_WIDE_INT pad_size, bool record_p),
>> + default_print_prolog_pad)
>> +
>> /* Output the assembler code for entry to a function.  */
>> DEFHOOK
>> (function_prologue,
>> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
>> index a342277..41e9850 100644
>> --- a/gcc/targhooks.c
>> +++ b/gcc/targhooks.c
>> @@ -1503,6 +1503,22 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
>>  return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
>> }
>> 
>> +void
>> +default_print_prolog_pad (FILE *file, unsigned HOST_WIDE_INT pad_size,
>> +			  bool record_p)
>> +{
>> +  if (record_p)
>> +    fprintf (file, "1:");
>> +  for (unsigned i = 0; i < pad_size; ++i)
>> +    fprintf (file, "\tnop\n");
>> +  if (record_p)
>> +    {
>> +      fprintf (file, "\t.section __prolog_pads_loc, \"a\",@progbits\n");
>> +      fprintf (file, "\t.quad 1b\n");
>> +      fprintf (file, "\t.previous\n");
>> +    }
>> +}
>> +
>> bool
>> default_profile_before_prologue (void)
>> {
>> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
>> index 7687c39..6bb41c4 100644
>> --- a/gcc/targhooks.h
>> +++ b/gcc/targhooks.h
>> @@ -200,6 +200,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
>> 						    enum by_pieces_operation,
>> 						    bool);
>> 
>> +extern void default_print_prolog_pad (FILE *, unsigned HOST_WIDE_INT , bool);
>> extern bool default_profile_before_prologue (void);
>> extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
>> extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
>> diff --git a/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
>> new file mode 100644
>> index 0000000..7404dc5
>> --- /dev/null
>> +++ b/gcc/testsuite/c-c++-common/attribute-prolog_pad-1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do compile } */
>> +
>> +void f1(void) __attribute__((prolog_pad(1)));
>> +void f2(void) __attribute__((prolog_pad(2)));
>> +
>> +void
>> +f1 (void)
>> +{
>> +  f2 ();
>> +}
>> +
>> +void f2 (void)
>> +{
>> +  f1 ();
>> +}
>> diff --git a/gcc/toplev.c b/gcc/toplev.c
>> index 8979d26..2339ce8 100644
>> --- a/gcc/toplev.c
>> +++ b/gcc/toplev.c
>> @@ -1580,8 +1580,10 @@ process_options (void)
>>    }
>> 
>> /* Do not use IPA optimizations for register allocation if profiler is active
>> +    or prolog-pads are inserted for run-time instrumentation
>>    or port does not emit prologue and epilogue as RTL.  */
>> -  if (profile_flag || !targetm.have_prologue () || !targetm.have_epilogue ())
>> +  if (profile_flag || prolog_nop_pad_size ||
>> +      !targetm.have_prologue () || !targetm.have_epilogue ())
>>    flag_ipa_ra = 0;
>> 
>>  /* Enable -Werror=coverage-mismatch when -Werror and -Wno-error
> 
> Thanks!
> 
> --
> Maxim Kuvyrkov
> www.linaro.org

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2016-12-06 14:02 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-27 15:22 [PATCH] add -fprolog-pad=N option to c-family Torsten Duwe
2016-04-27 16:26 ` Szabolcs Nagy
2016-04-28  8:48   ` Maxim Kuvyrkov
2016-04-28 10:58     ` Szabolcs Nagy
2016-04-28 11:32       ` Torsten Duwe
2016-05-09  5:53       ` AKASHI Takahiro
2016-09-29 15:18     ` [PATCH v2] " Torsten Duwe
2016-09-30  9:59       ` Jose E. Marchesi
2016-09-30 10:22         ` Torsten Duwe
2016-10-03  9:45         ` AKASHI Takahiro
2016-10-04 21:46       ` Maxim Kuvyrkov
2016-12-06 14:02         ` Maxim Kuvyrkov
2016-04-28  8:40 ` [PATCH] " Maxim Kuvyrkov
2016-04-28 11:18   ` Torsten Duwe
2016-04-28 15:07     ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).