public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
@ 2015-12-16 23:29 H.J. Lu
  2015-12-17 10:04 ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-16 23:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Since sibcall never returns, we can only use call-clobbered register
as GOT base.  Otherwise, callee-saved register used as GOT base won't
be properly restored.

Tested on x86-64 with -m32.  OK for trunk?


H.J.
---
gcc/

	PR target/68937
	* config/i386/i386.c (ix86_function_ok_for_sibcall): Count
	call via GOT slot as indirect call.
	(ix86_expand_call): Mark PIC register used for sibcall as
	call-clobbered.
	* config/i386/i386.md (*sibcall_GOT_32): New pattern.
	(*sibcall_value_GOT_32): Likewise.

gcc/testsuite/

	PR target/68937
	* gcc.target/i386/pr68937-1.c: New test.
	* gcc.target/i386/pr68937-2.c: Likewise.
	* gcc.target/i386/pr68937-3.c: Likewise.
	* gcc.target/i386/pr68937-4.c: Likewise.
---
 gcc/config/i386/i386.c                    | 15 ++++++++---
 gcc/config/i386/i386.md                   | 45 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-1.c | 13 +++++++++
 gcc/testsuite/gcc.target/i386/pr68937-2.c | 13 +++++++++
 gcc/testsuite/gcc.target/i386/pr68937-3.c | 13 +++++++++
 gcc/testsuite/gcc.target/i386/pr68937-4.c | 13 +++++++++
 6 files changed, 109 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-4.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cecea24..ebc9d09 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6723,8 +6723,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       /* If this call is indirect, we'll need to be able to use a
 	 call-clobbered register for the address of the target function.
 	 Make sure that all such registers are not used for passing
-	 parameters.  Note that DLLIMPORT functions are indirect.  */
+	 parameters.  Note that DLLIMPORT functions and call via GOT
+	 slot are indirect.  */
       if (!decl
+	  || (flag_pic && !flag_plt)
 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
 	{
 	  /* Check if regparm >= 3 since arg_reg_available is set to
@@ -27019,8 +27021,8 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 		  rtx callarg2,
 		  rtx pop, bool sibcall)
 {
-  rtx vec[3];
-  rtx use = NULL, call;
+  rtx vec[4];
+  rtx use = NULL, call, clobber = NULL;
   unsigned int vec_len = 0;
 
   if (pop == const0_rtx)
@@ -27075,6 +27077,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 		  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
 					   UNSPEC_GOT);
 		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		  /* Since sibcall never returns, mark PIC register as
+		     call-clobbered.  */
+		  if (sibcall)
+		    clobber = pic_offset_table_rtx;
 		  fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
 					 fnaddr);
 		}
@@ -27151,6 +27157,9 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
     }
   vec[vec_len++] = call;
 
+  if (clobber)
+    vec[vec_len++] = gen_rtx_CLOBBER (VOIDmode, clobber);
+
   if (pop)
     {
       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..65c1534 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11865,6 +11865,28 @@
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_GOT_32"
+  [(call (mem:QI
+	   (mem:SI (plus:SI
+		     (match_operand:SI 0 "register_operand" "U")
+		     (const:SI
+		       (unspec:SI [(match_operand:SI 1 "symbol_operand")]
+			UNSPEC_GOT)))))
+	 (match_operand 2))
+   (clobber (match_dup 0))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]),
+			       UNSPEC_GOT);
+  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+  fnaddr = gen_rtx_PLUS (Pmode, operands[0], fnaddr);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall"
   [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
 	 (match_operand 1))]
@@ -12042,6 +12064,29 @@
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_value_GOT_32"
+  [(set (match_operand 0)
+        (call (mem:QI
+	        (mem:SI (plus:SI
+			  (match_operand:SI 1 "register_operand" "U")
+			  (const:SI
+			    (unspec:SI [(match_operand:SI 2 "symbol_operand")]
+			     UNSPEC_GOT)))))
+	 (match_operand 3)))
+   (clobber (match_dup 1))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[2]),
+			       UNSPEC_GOT);
+  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+  fnaddr = gen_rtx_PLUS (Pmode, operands[1], fnaddr);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-1.c b/gcc/testsuite/gcc.target/i386/pr68937-1.c
new file mode 100644
index 0000000..166c309
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int);
+
+void
+foo (int b)
+{
+  bar (b);
+  bar (b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-2.c b/gcc/testsuite/gcc.target/i386/pr68937-2.c
new file mode 100644
index 0000000..9aa4f4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int);
+
+void
+foo (int a, int b)
+{
+  bar (a, b);
+  bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-3.c b/gcc/testsuite/gcc.target/i386/pr68937-3.c
new file mode 100644
index 0000000..6d8e40f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int, int);
+
+void
+foo (int a, int b, int c)
+{
+  bar (a, b, c);
+  bar (a, b, c);
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*.bar@GOT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-4.c b/gcc/testsuite/gcc.target/i386/pr68937-4.c
new file mode 100644
index 0000000..fc5f2a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern int bar (int, int);
+
+int
+foo (int a, int b)
+{
+  (void) bar (a, b);
+  return bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" { target ia32 } } } */
-- 
2.5.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-16 23:29 [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall H.J. Lu
@ 2015-12-17 10:04 ` Uros Bizjak
  2015-12-17 13:00   ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2015-12-17 10:04 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Since sibcall never returns, we can only use call-clobbered register
> as GOT base.  Otherwise, callee-saved register used as GOT base won't
> be properly restored.
>
> Tested on x86-64 with -m32.  OK for trunk?

You don't have to add explicit clobber for members of "CLOBBERED_REGS"
class, and register_no_elim_operand predicate should be used with "U"
constraint. Also, please introduce new predicate, similar to how
GOT_memory_operand is defined and handled.

Uros.

>
> H.J.
> ---
> gcc/
>
>         PR target/68937
>         * config/i386/i386.c (ix86_function_ok_for_sibcall): Count
>         call via GOT slot as indirect call.
>         (ix86_expand_call): Mark PIC register used for sibcall as
>         call-clobbered.
>         * config/i386/i386.md (*sibcall_GOT_32): New pattern.
>         (*sibcall_value_GOT_32): Likewise.
>
> gcc/testsuite/
>
>         PR target/68937
>         * gcc.target/i386/pr68937-1.c: New test.
>         * gcc.target/i386/pr68937-2.c: Likewise.
>         * gcc.target/i386/pr68937-3.c: Likewise.
>         * gcc.target/i386/pr68937-4.c: Likewise.
> ---
>  gcc/config/i386/i386.c                    | 15 ++++++++---
>  gcc/config/i386/i386.md                   | 45 +++++++++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr68937-1.c | 13 +++++++++
>  gcc/testsuite/gcc.target/i386/pr68937-2.c | 13 +++++++++
>  gcc/testsuite/gcc.target/i386/pr68937-3.c | 13 +++++++++
>  gcc/testsuite/gcc.target/i386/pr68937-4.c | 13 +++++++++
>  6 files changed, 109 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-4.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index cecea24..ebc9d09 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -6723,8 +6723,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
>        /* If this call is indirect, we'll need to be able to use a
>          call-clobbered register for the address of the target function.
>          Make sure that all such registers are not used for passing
> -        parameters.  Note that DLLIMPORT functions are indirect.  */
> +        parameters.  Note that DLLIMPORT functions and call via GOT
> +        slot are indirect.  */
>        if (!decl
> +         || (flag_pic && !flag_plt)
>           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
>         {
>           /* Check if regparm >= 3 since arg_reg_available is set to
> @@ -27019,8 +27021,8 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>                   rtx callarg2,
>                   rtx pop, bool sibcall)
>  {
> -  rtx vec[3];
> -  rtx use = NULL, call;
> +  rtx vec[4];
> +  rtx use = NULL, call, clobber = NULL;
>    unsigned int vec_len = 0;
>
>    if (pop == const0_rtx)
> @@ -27075,6 +27077,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>                   fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
>                                            UNSPEC_GOT);
>                   fnaddr = gen_rtx_CONST (Pmode, fnaddr);
> +                 /* Since sibcall never returns, mark PIC register as
> +                    call-clobbered.  */
> +                 if (sibcall)
> +                   clobber = pic_offset_table_rtx;
>                   fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
>                                          fnaddr);
>                 }
> @@ -27151,6 +27157,9 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>      }
>    vec[vec_len++] = call;
>
> +  if (clobber)
> +    vec[vec_len++] = gen_rtx_CLOBBER (VOIDmode, clobber);
> +
>    if (pop)
>      {
>        pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 49b2216..65c1534 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -11865,6 +11865,28 @@
>    "* return ix86_output_call_insn (insn, operands[0]);"
>    [(set_attr "type" "call")])
>
> +;; Since sibcall never returns, we can only use call-clobbered register
> +;; as GOT base.
> +(define_insn "*sibcall_GOT_32"
> +  [(call (mem:QI
> +          (mem:SI (plus:SI
> +                    (match_operand:SI 0 "register_operand" "U")
> +                    (const:SI
> +                      (unspec:SI [(match_operand:SI 1 "symbol_operand")]
> +                       UNSPEC_GOT)))))
> +        (match_operand 2))
> +   (clobber (match_dup 0))]
> +  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
> +{
> +  rtx fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]),
> +                              UNSPEC_GOT);
> +  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
> +  fnaddr = gen_rtx_PLUS (Pmode, operands[0], fnaddr);
> +  fnaddr = gen_const_mem (Pmode, fnaddr);
> +  return ix86_output_call_insn (insn, fnaddr);
> +}
> +  [(set_attr "type" "call")])
> +
>  (define_insn "*sibcall"
>    [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
>          (match_operand 1))]
> @@ -12042,6 +12064,29 @@
>    "* return ix86_output_call_insn (insn, operands[1]);"
>    [(set_attr "type" "callv")])
>
> +;; Since sibcall never returns, we can only use call-clobbered register
> +;; as GOT base.
> +(define_insn "*sibcall_value_GOT_32"
> +  [(set (match_operand 0)
> +        (call (mem:QI
> +               (mem:SI (plus:SI
> +                         (match_operand:SI 1 "register_operand" "U")
> +                         (const:SI
> +                           (unspec:SI [(match_operand:SI 2 "symbol_operand")]
> +                            UNSPEC_GOT)))))
> +        (match_operand 3)))
> +   (clobber (match_dup 1))]
> +  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
> +{
> +  rtx fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[2]),
> +                              UNSPEC_GOT);
> +  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
> +  fnaddr = gen_rtx_PLUS (Pmode, operands[1], fnaddr);
> +  fnaddr = gen_const_mem (Pmode, fnaddr);
> +  return ix86_output_call_insn (insn, fnaddr);
> +}
> +  [(set_attr "type" "callv")])
> +
>  (define_insn "*sibcall_value"
>    [(set (match_operand 0)
>         (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
> diff --git a/gcc/testsuite/gcc.target/i386/pr68937-1.c b/gcc/testsuite/gcc.target/i386/pr68937-1.c
> new file mode 100644
> index 0000000..166c309
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr68937-1.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { *-*-linux* && ia32 } } } */
> +/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
> +
> +extern void bar (int);
> +
> +void
> +foo (int b)
> +{
> +  bar (b);
> +  bar (b);
> +}
> +
> +/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" { target ia32 } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr68937-2.c b/gcc/testsuite/gcc.target/i386/pr68937-2.c
> new file mode 100644
> index 0000000..9aa4f4d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr68937-2.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { *-*-linux* && ia32 } } } */
> +/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
> +
> +extern void bar (int, int);
> +
> +void
> +foo (int a, int b)
> +{
> +  bar (a, b);
> +  bar (a, b);
> +}
> +
> +/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" { target ia32 } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr68937-3.c b/gcc/testsuite/gcc.target/i386/pr68937-3.c
> new file mode 100644
> index 0000000..6d8e40f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr68937-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { *-*-linux* && ia32 } } } */
> +/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
> +
> +extern void bar (int, int, int);
> +
> +void
> +foo (int a, int b, int c)
> +{
> +  bar (a, b, c);
> +  bar (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler-not "jmp\[ \t\]*.bar@GOT" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr68937-4.c b/gcc/testsuite/gcc.target/i386/pr68937-4.c
> new file mode 100644
> index 0000000..fc5f2a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr68937-4.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { *-*-linux* && ia32 } } } */
> +/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
> +
> +extern int bar (int, int);
> +
> +int
> +foo (int a, int b)
> +{
> +  (void) bar (a, b);
> +  return bar (a, b);
> +}
> +
> +/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" { target ia32 } } } */
> --
> 2.5.0
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 10:04 ` Uros Bizjak
@ 2015-12-17 13:00   ` H.J. Lu
  2015-12-17 13:42     ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-17 13:00 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1137 bytes --]

On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>> Since sibcall never returns, we can only use call-clobbered register
>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>> be properly restored.
>>
>> Tested on x86-64 with -m32.  OK for trunk?
>
> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
> class, and register_no_elim_operand predicate should be used with "U"
> constraint. Also, please introduce new predicate, similar to how
> GOT_memory_operand is defined and handled.
>

Here is the updated patch.  There is a predicate already,
sibcall_memory_operand.  It allows any registers to
be as GOT base, which is the root of our problem.
This patch removes GOT slot from it and handles
sibcall over GOT slot with *sibcall_GOT_32 and
*sibcall_value_GOT_32 patterns.  Since I need to
expose constraints on GOT base register to RA,
I have to use 2 operands, GOT base and function
symbol, to describe sibcall over 32-bit GOT slot.

OK for master if there is no regression.

Thanks.

-- 
H.J.

[-- Attachment #2: 0001-Use-call-clobbered-register-for-sibcall-via-GOT.patch --]
[-- Type: text/x-patch, Size: 8201 bytes --]

From e055e1ea71353897aa7ce4b38a5186c8b64ddc7c Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 16 Dec 2015 12:34:57 -0800
Subject: [PATCH] Use call-clobbered register for sibcall via GOT

Since sibcall never returns, we can only use call-clobbered register
as GOT base.  Otherwise, callee-saved register used as GOT base won't
be properly restored.

gcc/

	PR target/68937
	* config/i386/i386.c (ix86_function_ok_for_sibcall): Count
	call via GOT slot as indirect call.
	* config/i386/i386.md (*sibcall_GOT_32): New pattern.
	(*sibcall_value_GOT_32): Likewise.
	* config/i386/predicates.md (sibcall_memory_operand): Remove
	GOT slot.

gcc/testsuite/

	PR target/68937
	* gcc.target/i386/pr68937-1.c: New test.
	* gcc.target/i386/pr68937-2.c: Likewise.
	* gcc.target/i386/pr68937-3.c: Likewise.
	* gcc.target/i386/pr68937-4.c: Likewise.
	* gcc.target/i386/pr68937-5.c: Likewise.
---
 gcc/config/i386/i386.c                    |  4 ++-
 gcc/config/i386/i386.md                   | 43 +++++++++++++++++++++++++++++++
 gcc/config/i386/predicates.md             | 10 +++----
 gcc/testsuite/gcc.target/i386/pr68937-1.c | 13 ++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-2.c | 13 ++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-3.c | 13 ++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-4.c | 13 ++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-5.c |  9 +++++++
 8 files changed, 111 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-5.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cecea24..0e2bec3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6723,8 +6723,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       /* If this call is indirect, we'll need to be able to use a
 	 call-clobbered register for the address of the target function.
 	 Make sure that all such registers are not used for passing
-	 parameters.  Note that DLLIMPORT functions are indirect.  */
+	 parameters.  Note that DLLIMPORT functions and call via GOT
+	 slot are indirect.  */
       if (!decl
+	  || (flag_pic && !flag_plt)
 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
 	{
 	  /* Check if regparm >= 3 since arg_reg_available is set to
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..7c62586 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11865,6 +11865,27 @@
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_GOT_32"
+  [(call (mem:QI
+	   (mem:SI (plus:SI
+		     (match_operand:SI 0 "register_no_elim_operand" "U")
+		     (const:SI
+		       (unspec:SI [(match_operand:SI 1 "symbol_operand")]
+			UNSPEC_GOT)))))
+	 (match_operand 2))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]),
+			       UNSPEC_GOT);
+  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+  fnaddr = gen_rtx_PLUS (Pmode, operands[0], fnaddr);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall"
   [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
 	 (match_operand 1))]
@@ -12042,6 +12063,28 @@
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_value_GOT_32"
+  [(set (match_operand 0)
+        (call (mem:QI
+		(mem:SI (plus:SI
+			  (match_operand:SI 1 "register_no_elim_operand" "U")
+			  (const:SI
+			    (unspec:SI [(match_operand:SI 2 "symbol_operand")]
+			     UNSPEC_GOT)))))
+	 (match_operand 3)))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[2]),
+			       UNSPEC_GOT);
+  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+  fnaddr = gen_rtx_PLUS (Pmode, operands[1], fnaddr);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8bdd5d8..006bb2c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -597,14 +597,12 @@
 	    (match_operand 0 "memory_operand"))))
 
 ;; Return true if OP is a memory operands that can be used in sibcalls.
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.  Don't allow GOT slot here.  Handle sibcall over GOT
+;; slot with *sibcall_GOT_32 and *sibcall_value_GOT_32 patterns.
 (define_predicate "sibcall_memory_operand"
   (and (match_operand 0 "memory_operand")
-       (match_test "CONSTANT_P (XEXP (op, 0))
-		    || (GET_CODE (XEXP (op, 0)) == PLUS
-			&& REG_P (XEXP (XEXP (op, 0), 0))
-			&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
-			&& GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
-			&& XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
+       (match_test "CONSTANT_P (XEXP (op, 0))")))
 
 ;; Test for a valid operand for a call instruction.
 ;; Allow constant call address operands in Pmode only.
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-1.c b/gcc/testsuite/gcc.target/i386/pr68937-1.c
new file mode 100644
index 0000000..897856b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int);
+
+void
+foo (int b)
+{
+  bar (b);
+  bar (b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-2.c b/gcc/testsuite/gcc.target/i386/pr68937-2.c
new file mode 100644
index 0000000..257f4e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int);
+
+void
+foo (int a, int b)
+{
+  bar (a, b);
+  bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-3.c b/gcc/testsuite/gcc.target/i386/pr68937-3.c
new file mode 100644
index 0000000..6d8e40f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int, int);
+
+void
+foo (int a, int b, int c)
+{
+  bar (a, b, c);
+  bar (a, b, c);
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*.bar@GOT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-4.c b/gcc/testsuite/gcc.target/i386/pr68937-4.c
new file mode 100644
index 0000000..9c19956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern int bar (int, int);
+
+int
+foo (int a, int b)
+{
+  (void) bar (a, b);
+  return bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-5.c b/gcc/testsuite/gcc.target/i386/pr68937-5.c
new file mode 100644
index 0000000..f7e3ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { *-*-linux* } } } */
+/* { dg-options "-O2 -fpic -fno-plt -funroll-loops" } */
+
+extern void *f();
+void dmi_scan_machine(void) {
+  char *p = f(), *q;
+  for (q = p; q < p + 10; q++)
+    ;
+}
-- 
2.5.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 13:00   ` H.J. Lu
@ 2015-12-17 13:42     ` Uros Bizjak
  2015-12-17 15:50       ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2015-12-17 13:42 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> Since sibcall never returns, we can only use call-clobbered register
>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>> be properly restored.
>>>
>>> Tested on x86-64 with -m32.  OK for trunk?
>>
>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>> class, and register_no_elim_operand predicate should be used with "U"
>> constraint. Also, please introduce new predicate, similar to how
>> GOT_memory_operand is defined and handled.
>>
>
> Here is the updated patch.  There is a predicate already,
> sibcall_memory_operand.  It allows any registers to
> be as GOT base, which is the root of our problem.
> This patch removes GOT slot from it and handles
> sibcall over GOT slot with *sibcall_GOT_32 and
> *sibcall_value_GOT_32 patterns.  Since I need to
> expose constraints on GOT base register to RA,
> I have to use 2 operands, GOT base and function
> symbol, to describe sibcall over 32-bit GOT slot.

Please use

       (mem:SI (plus:SI
             (match_operand:SI 0 "register_no_elim_operand" "U")
             (match_operand:SI 1 "GOT32_symbol_operand")))
...

to avoid manual rebuild of the operand.

Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 13:42     ` Uros Bizjak
@ 2015-12-17 15:50       ` H.J. Lu
  2015-12-17 16:12         ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-17 15:50 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1531 bytes --]

On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>> Since sibcall never returns, we can only use call-clobbered register
>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>> be properly restored.
>>>>
>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>
>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>> class, and register_no_elim_operand predicate should be used with "U"
>>> constraint. Also, please introduce new predicate, similar to how
>>> GOT_memory_operand is defined and handled.
>>>
>>
>> Here is the updated patch.  There is a predicate already,
>> sibcall_memory_operand.  It allows any registers to
>> be as GOT base, which is the root of our problem.
>> This patch removes GOT slot from it and handles
>> sibcall over GOT slot with *sibcall_GOT_32 and
>> *sibcall_value_GOT_32 patterns.  Since I need to
>> expose constraints on GOT base register to RA,
>> I have to use 2 operands, GOT base and function
>> symbol, to describe sibcall over 32-bit GOT slot.
>
> Please use
>
>        (mem:SI (plus:SI
>              (match_operand:SI 0 "register_no_elim_operand" "U")
>              (match_operand:SI 1 "GOT32_symbol_operand")))
> ...
>
> to avoid manual rebuild of the operand.
>

Is this OK?

Thanks.

-- 
H.J.

[-- Attachment #2: 0001-Use-call-clobbered-register-for-sibcall-via-GOT.patch --]
[-- Type: text/x-patch, Size: 8375 bytes --]

From 9a5818415f9de92454ee555e8d8c3bd675fe30dd Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 16 Dec 2015 12:34:57 -0800
Subject: [PATCH] Use call-clobbered register for sibcall via GOT

Since sibcall never returns, we can only use call-clobbered register
as GOT base.  Otherwise, callee-saved register used as GOT base won't
be properly restored.

gcc/

	PR target/68937
	* config/i386/i386.c (ix86_function_ok_for_sibcall): Count
	call via GOT slot as indirect call.
	* config/i386/i386.md (*sibcall_GOT_32): New pattern.
	(*sibcall_value_GOT_32): Likewise.
	* config/i386/predicates.md (sibcall_memory_operand): Remove
	GOT slot.
	(GOT32_symbol_operand): New predicate.

gcc/testsuite/

	PR target/68937
	* gcc.target/i386/pr68937-1.c: New test.
	* gcc.target/i386/pr68937-2.c: Likewise.
	* gcc.target/i386/pr68937-3.c: Likewise.
	* gcc.target/i386/pr68937-4.c: Likewise.
	* gcc.target/i386/pr68937-5.c: Likewise.
---
 gcc/config/i386/i386.c                    |  4 +++-
 gcc/config/i386/i386.md                   | 33 +++++++++++++++++++++++++++++++
 gcc/config/i386/predicates.md             | 16 +++++++++------
 gcc/testsuite/gcc.target/i386/pr68937-1.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-2.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-3.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-4.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-5.c |  9 +++++++++
 8 files changed, 107 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-5.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cecea24..0e2bec3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6723,8 +6723,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       /* If this call is indirect, we'll need to be able to use a
 	 call-clobbered register for the address of the target function.
 	 Make sure that all such registers are not used for passing
-	 parameters.  Note that DLLIMPORT functions are indirect.  */
+	 parameters.  Note that DLLIMPORT functions and call via GOT
+	 slot are indirect.  */
       if (!decl
+	  || (flag_pic && !flag_plt)
 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
 	{
 	  /* Check if regparm >= 3 since arg_reg_available is set to
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..6ab8eaa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11865,6 +11865,22 @@
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_GOT_32"
+  [(call (mem:QI
+	   (mem:SI (plus:SI
+		     (match_operand:SI 0 "register_no_elim_operand" "U")
+		     (match_operand:SI 1 "GOT32_symbol_operand"))))
+	 (match_operand 2))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_PLUS (Pmode, operands[0], operands[1]);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall"
   [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
 	 (match_operand 1))]
@@ -12042,6 +12058,23 @@
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_value_GOT_32"
+  [(set (match_operand 0)
+        (call (mem:QI
+		(mem:SI (plus:SI
+			  (match_operand:SI 1 "register_no_elim_operand" "U")
+			  (match_operand:SI 2 "GOT32_symbol_operand"))))
+	 (match_operand 3)))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8bdd5d8..e750e96 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -597,14 +597,12 @@
 	    (match_operand 0 "memory_operand"))))
 
 ;; Return true if OP is a memory operands that can be used in sibcalls.
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.  Don't allow GOT slot here.  Handle sibcall over GOT
+;; slot with *sibcall_GOT_32 and *sibcall_value_GOT_32 patterns.
 (define_predicate "sibcall_memory_operand"
   (and (match_operand 0 "memory_operand")
-       (match_test "CONSTANT_P (XEXP (op, 0))
-		    || (GET_CODE (XEXP (op, 0)) == PLUS
-			&& REG_P (XEXP (XEXP (op, 0), 0))
-			&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
-			&& GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
-			&& XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
+       (match_test "CONSTANT_P (XEXP (op, 0))")))
 
 ;; Test for a valid operand for a call instruction.
 ;; Allow constant call address operands in Pmode only.
@@ -633,6 +631,12 @@
 	  && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL);
 })
 
+;; Return true if OP is a 32-bit GOT symbol operand.
+(define_predicate "GOT32_symbol_operand"
+  (match_test "GET_CODE (op) == CONST
+               && GET_CODE (XEXP (op, 0)) == UNSPEC
+               && XINT (XEXP (op, 0), 1) == UNSPEC_GOT"))
+
 ;; Match exactly zero.
 (define_predicate "const0_operand"
   (match_code "const_int,const_wide_int,const_double,const_vector")
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-1.c b/gcc/testsuite/gcc.target/i386/pr68937-1.c
new file mode 100644
index 0000000..897856b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int);
+
+void
+foo (int b)
+{
+  bar (b);
+  bar (b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-2.c b/gcc/testsuite/gcc.target/i386/pr68937-2.c
new file mode 100644
index 0000000..257f4e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int);
+
+void
+foo (int a, int b)
+{
+  bar (a, b);
+  bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-3.c b/gcc/testsuite/gcc.target/i386/pr68937-3.c
new file mode 100644
index 0000000..6d8e40f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int, int);
+
+void
+foo (int a, int b, int c)
+{
+  bar (a, b, c);
+  bar (a, b, c);
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*.bar@GOT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-4.c b/gcc/testsuite/gcc.target/i386/pr68937-4.c
new file mode 100644
index 0000000..9c19956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern int bar (int, int);
+
+int
+foo (int a, int b)
+{
+  (void) bar (a, b);
+  return bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-5.c b/gcc/testsuite/gcc.target/i386/pr68937-5.c
new file mode 100644
index 0000000..f7e3ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { *-*-linux* } } } */
+/* { dg-options "-O2 -fpic -fno-plt -funroll-loops" } */
+
+extern void *f();
+void dmi_scan_machine(void) {
+  char *p = f(), *q;
+  for (q = p; q < p + 10; q++)
+    ;
+}
-- 
2.5.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 15:50       ` H.J. Lu
@ 2015-12-17 16:12         ` H.J. Lu
  2015-12-17 18:09           ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-17 16:12 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1753 bytes --]

On Thu, Dec 17, 2015 at 7:50 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Since sibcall never returns, we can only use call-clobbered register
>>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>>> be properly restored.
>>>>>
>>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>>
>>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>>> class, and register_no_elim_operand predicate should be used with "U"
>>>> constraint. Also, please introduce new predicate, similar to how
>>>> GOT_memory_operand is defined and handled.
>>>>
>>>
>>> Here is the updated patch.  There is a predicate already,
>>> sibcall_memory_operand.  It allows any registers to
>>> be as GOT base, which is the root of our problem.
>>> This patch removes GOT slot from it and handles
>>> sibcall over GOT slot with *sibcall_GOT_32 and
>>> *sibcall_value_GOT_32 patterns.  Since I need to
>>> expose constraints on GOT base register to RA,
>>> I have to use 2 operands, GOT base and function
>>> symbol, to describe sibcall over 32-bit GOT slot.
>>
>> Please use
>>
>>        (mem:SI (plus:SI
>>              (match_operand:SI 0 "register_no_elim_operand" "U")
>>              (match_operand:SI 1 "GOT32_symbol_operand")))
>> ...
>>
>> to avoid manual rebuild of the operand.
>>
>
> Is this OK?
>

An updated patch to allow sibcall_memory_operand for RTL
expansion.  OK for trunk if there is no regression?

Thanks.


-- 
H.J.

[-- Attachment #2: 0001-Use-call-clobbered-register-for-sibcall-via-GOT.patch --]
[-- Type: text/x-patch, Size: 8666 bytes --]

From dffd3a70b9788174f9b279ff27bf72dbc2384659 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 16 Dec 2015 12:34:57 -0800
Subject: [PATCH] Use call-clobbered register for sibcall via GOT

Since sibcall never returns, we can only use call-clobbered register as
GOT base.  Otherwise, callee-saved register used as GOT base won't be
properly restored.  sibcall_memory_operand is changed to allow 32-bit
GOT slot only with pseudo register as GOT base for RTL expansion.  2
new patterns, *sibcall_GOT_32 and *sibcall_value_GOT_32, are added to
expose GOT base register to register allocator so that call-clobbered
register will be used for GOT base.

gcc/

	PR target/68937
	* config/i386/i386.c (ix86_function_ok_for_sibcall): Count
	call via GOT slot as indirect call.
	* config/i386/i386.md (*sibcall_GOT_32): New pattern.
	(*sibcall_value_GOT_32): Likewise.
	* config/i386/predicates.md (sibcall_memory_operand): Allow
	32-bit GOT slot only with pseudo register as GOT base.
	(GOT32_symbol_operand): New predicate.

gcc/testsuite/

	PR target/68937
	* gcc.target/i386/pr68937-1.c: New test.
	* gcc.target/i386/pr68937-2.c: Likewise.
	* gcc.target/i386/pr68937-3.c: Likewise.
	* gcc.target/i386/pr68937-4.c: Likewise.
	* gcc.target/i386/pr68937-5.c: Likewise.
---
 gcc/config/i386/i386.c                    |  4 +++-
 gcc/config/i386/i386.md                   | 33 +++++++++++++++++++++++++++++++
 gcc/config/i386/predicates.md             | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-1.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-2.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-3.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-4.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-5.c |  9 +++++++++
 8 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-5.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cecea24..0e2bec3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6723,8 +6723,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       /* If this call is indirect, we'll need to be able to use a
 	 call-clobbered register for the address of the target function.
 	 Make sure that all such registers are not used for passing
-	 parameters.  Note that DLLIMPORT functions are indirect.  */
+	 parameters.  Note that DLLIMPORT functions and call via GOT
+	 slot are indirect.  */
       if (!decl
+	  || (flag_pic && !flag_plt)
 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
 	{
 	  /* Check if regparm >= 3 since arg_reg_available is set to
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..6ab8eaa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11865,6 +11865,22 @@
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_GOT_32"
+  [(call (mem:QI
+	   (mem:SI (plus:SI
+		     (match_operand:SI 0 "register_no_elim_operand" "U")
+		     (match_operand:SI 1 "GOT32_symbol_operand"))))
+	 (match_operand 2))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_PLUS (Pmode, operands[0], operands[1]);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall"
   [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
 	 (match_operand 1))]
@@ -12042,6 +12058,23 @@
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_value_GOT_32"
+  [(set (match_operand 0)
+        (call (mem:QI
+		(mem:SI (plus:SI
+			  (match_operand:SI 1 "register_no_elim_operand" "U")
+			  (match_operand:SI 2 "GOT32_symbol_operand"))))
+	 (match_operand 3)))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8bdd5d8..f33a53a 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -597,11 +597,17 @@
 	    (match_operand 0 "memory_operand"))))
 
 ;; Return true if OP is a memory operands that can be used in sibcalls.
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
+;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
+;; and *sibcall_value_GOT_32 patterns.
 (define_predicate "sibcall_memory_operand"
   (and (match_operand 0 "memory_operand")
        (match_test "CONSTANT_P (XEXP (op, 0))
 		    || (GET_CODE (XEXP (op, 0)) == PLUS
 			&& REG_P (XEXP (XEXP (op, 0), 0))
+			&& (REGNO (XEXP (XEXP (op, 0), 0))
+			    >= FIRST_PSEUDO_REGISTER)
 			&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
 			&& GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
 			&& XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
@@ -633,6 +639,12 @@
 	  && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL);
 })
 
+;; Return true if OP is a 32-bit GOT symbol operand.
+(define_predicate "GOT32_symbol_operand"
+  (match_test "GET_CODE (op) == CONST
+               && GET_CODE (XEXP (op, 0)) == UNSPEC
+               && XINT (XEXP (op, 0), 1) == UNSPEC_GOT"))
+
 ;; Match exactly zero.
 (define_predicate "const0_operand"
   (match_code "const_int,const_wide_int,const_double,const_vector")
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-1.c b/gcc/testsuite/gcc.target/i386/pr68937-1.c
new file mode 100644
index 0000000..897856b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int);
+
+void
+foo (int b)
+{
+  bar (b);
+  bar (b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-2.c b/gcc/testsuite/gcc.target/i386/pr68937-2.c
new file mode 100644
index 0000000..257f4e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int);
+
+void
+foo (int a, int b)
+{
+  bar (a, b);
+  bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-3.c b/gcc/testsuite/gcc.target/i386/pr68937-3.c
new file mode 100644
index 0000000..6d8e40f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int, int);
+
+void
+foo (int a, int b, int c)
+{
+  bar (a, b, c);
+  bar (a, b, c);
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*.bar@GOT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-4.c b/gcc/testsuite/gcc.target/i386/pr68937-4.c
new file mode 100644
index 0000000..9c19956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern int bar (int, int);
+
+int
+foo (int a, int b)
+{
+  (void) bar (a, b);
+  return bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-5.c b/gcc/testsuite/gcc.target/i386/pr68937-5.c
new file mode 100644
index 0000000..f7e3ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { *-*-linux* } } } */
+/* { dg-options "-O2 -fpic -fno-plt -funroll-loops" } */
+
+extern void *f();
+void dmi_scan_machine(void) {
+  char *p = f(), *q;
+  for (q = p; q < p + 10; q++)
+    ;
+}
-- 
2.5.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 16:12         ` H.J. Lu
@ 2015-12-17 18:09           ` H.J. Lu
  2015-12-17 21:21             ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-17 18:09 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Thu, Dec 17, 2015 at 8:11 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 7:50 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>> Since sibcall never returns, we can only use call-clobbered register
>>>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>>>> be properly restored.
>>>>>>
>>>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>>>
>>>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>>>> class, and register_no_elim_operand predicate should be used with "U"
>>>>> constraint. Also, please introduce new predicate, similar to how
>>>>> GOT_memory_operand is defined and handled.
>>>>>
>>>>
>>>> Here is the updated patch.  There is a predicate already,
>>>> sibcall_memory_operand.  It allows any registers to
>>>> be as GOT base, which is the root of our problem.
>>>> This patch removes GOT slot from it and handles
>>>> sibcall over GOT slot with *sibcall_GOT_32 and
>>>> *sibcall_value_GOT_32 patterns.  Since I need to
>>>> expose constraints on GOT base register to RA,
>>>> I have to use 2 operands, GOT base and function
>>>> symbol, to describe sibcall over 32-bit GOT slot.
>>>
>>> Please use
>>>
>>>        (mem:SI (plus:SI
>>>              (match_operand:SI 0 "register_no_elim_operand" "U")
>>>              (match_operand:SI 1 "GOT32_symbol_operand")))
>>> ...
>>>
>>> to avoid manual rebuild of the operand.
>>>
>>
>> Is this OK?
>>
>
> An updated patch to allow sibcall_memory_operand for RTL
> expansion.  OK for trunk if there is no regression?
>

There is no regressions on x86-64 with -m32.  OK for trunk?

-- 
H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 18:09           ` H.J. Lu
@ 2015-12-17 21:21             ` Uros Bizjak
  2015-12-17 21:59               ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2015-12-17 21:21 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Thu, Dec 17, 2015 at 7:09 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 8:11 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 7:50 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>> Since sibcall never returns, we can only use call-clobbered register
>>>>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>>>>> be properly restored.
>>>>>>>
>>>>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>>>>
>>>>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>>>>> class, and register_no_elim_operand predicate should be used with "U"
>>>>>> constraint. Also, please introduce new predicate, similar to how
>>>>>> GOT_memory_operand is defined and handled.
>>>>>>
>>>>>
>>>>> Here is the updated patch.  There is a predicate already,
>>>>> sibcall_memory_operand.  It allows any registers to
>>>>> be as GOT base, which is the root of our problem.
>>>>> This patch removes GOT slot from it and handles
>>>>> sibcall over GOT slot with *sibcall_GOT_32 and
>>>>> *sibcall_value_GOT_32 patterns.  Since I need to
>>>>> expose constraints on GOT base register to RA,
>>>>> I have to use 2 operands, GOT base and function
>>>>> symbol, to describe sibcall over 32-bit GOT slot.
>>>>
>>>> Please use
>>>>
>>>>        (mem:SI (plus:SI
>>>>              (match_operand:SI 0 "register_no_elim_operand" "U")
>>>>              (match_operand:SI 1 "GOT32_symbol_operand")))
>>>> ...
>>>>
>>>> to avoid manual rebuild of the operand.
>>>>
>>>
>>> Is this OK?
>>>
>>
>> An updated patch to allow sibcall_memory_operand for RTL
>> expansion.  OK for trunk if there is no regression?
>>
>
> There is no regressions on x86-64 with -m32.  OK for trunk?

OK for mainline, with a following change:

@@ -597,11 +597,17 @@
         (match_operand 0 "memory_operand"))))

 ;; Return true if OP is a memory operands that can be used in sibcalls.
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
+;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
+;; and *sibcall_value_GOT_32 patterns.
 (define_predicate "sibcall_memory_operand"
   (and (match_operand 0 "memory_operand")
        (match_test "CONSTANT_P (XEXP (op, 0))
             || (GET_CODE (XEXP (op, 0)) == PLUS
             && REG_P (XEXP (XEXP (op, 0), 0))
+            && (REGNO (XEXP (XEXP (op, 0), 0))
+                >= FIRST_PSEUDO_REGISTER)
             && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
             && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
             && XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))

You can use (!HARD_REGISTER_NUM_P (...) || call_used_regs[...]) here.
Call-used hard regs are still allowed here.

Can you please also rewrite this horrible match_test as a block of C
code using GOT32_symbol_operand predicate?

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 21:21             ` Uros Bizjak
@ 2015-12-17 21:59               ` H.J. Lu
  2015-12-18  0:55                 ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-17 21:59 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Thu, Dec 17, 2015 at 1:21 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 7:09 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 8:11 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 7:50 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>> Since sibcall never returns, we can only use call-clobbered register
>>>>>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>>>>>> be properly restored.
>>>>>>>>
>>>>>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>>>>>
>>>>>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>>>>>> class, and register_no_elim_operand predicate should be used with "U"
>>>>>>> constraint. Also, please introduce new predicate, similar to how
>>>>>>> GOT_memory_operand is defined and handled.
>>>>>>>
>>>>>>
>>>>>> Here is the updated patch.  There is a predicate already,
>>>>>> sibcall_memory_operand.  It allows any registers to
>>>>>> be as GOT base, which is the root of our problem.
>>>>>> This patch removes GOT slot from it and handles
>>>>>> sibcall over GOT slot with *sibcall_GOT_32 and
>>>>>> *sibcall_value_GOT_32 patterns.  Since I need to
>>>>>> expose constraints on GOT base register to RA,
>>>>>> I have to use 2 operands, GOT base and function
>>>>>> symbol, to describe sibcall over 32-bit GOT slot.
>>>>>
>>>>> Please use
>>>>>
>>>>>        (mem:SI (plus:SI
>>>>>              (match_operand:SI 0 "register_no_elim_operand" "U")
>>>>>              (match_operand:SI 1 "GOT32_symbol_operand")))
>>>>> ...
>>>>>
>>>>> to avoid manual rebuild of the operand.
>>>>>
>>>>
>>>> Is this OK?
>>>>
>>>
>>> An updated patch to allow sibcall_memory_operand for RTL
>>> expansion.  OK for trunk if there is no regression?
>>>
>>
>> There is no regressions on x86-64 with -m32.  OK for trunk?
>
> OK for mainline, with a following change:
>
> @@ -597,11 +597,17 @@
>          (match_operand 0 "memory_operand"))))
>
>  ;; Return true if OP is a memory operands that can be used in sibcalls.
> +;; Since sibcall never returns, we can only use call-clobbered register
> +;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
> +;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
> +;; and *sibcall_value_GOT_32 patterns.
>  (define_predicate "sibcall_memory_operand"
>    (and (match_operand 0 "memory_operand")
>         (match_test "CONSTANT_P (XEXP (op, 0))
>              || (GET_CODE (XEXP (op, 0)) == PLUS
>              && REG_P (XEXP (XEXP (op, 0), 0))
> +            && (REGNO (XEXP (XEXP (op, 0), 0))
> +                >= FIRST_PSEUDO_REGISTER)
>              && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
>              && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
>              && XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
>
> You can use (!HARD_REGISTER_NUM_P (...) || call_used_regs[...]) here.
> Call-used hard regs are still allowed here.
>
> Can you please also rewrite this horrible match_test as a block of C
> code using GOT32_symbol_operand predicate?
>

I am retesting the patch with

;; Return true if OP is a memory operands that can be used in sibcalls.
;; Since sibcall never returns, we can only use call-clobbered register
;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
;; and *sibcall_value_GOT_32 patterns.
(define_predicate "sibcall_memory_operand"
  (match_operand 0 "memory_operand")
{
  op = XEXP (op, 0);
  if (CONSTANT_P (op))
    return true;
  if (GET_CODE (op) == PLUS && REG_P (XEXP (op, 0)))
    {
      int regno = REGNO (XEXP (op, 0));
      if (!HARD_REGISTER_NUM_P (regno) || call_used_regs[regno])
        {
          op = XEXP (op, 1);
          if (GOT32_symbol_operand (op, VOIDmode))
            return true;
        }
    }
  return false;
})


I will check it in if there is no regression.

Thanks.


-- 
H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-17 21:59               ` H.J. Lu
@ 2015-12-18  0:55                 ` H.J. Lu
  2015-12-19 16:19                   ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2015-12-18  0:55 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5984 bytes --]

On Thu, Dec 17, 2015 at 1:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 1:21 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 7:09 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 8:11 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Dec 17, 2015 at 7:50 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>> Since sibcall never returns, we can only use call-clobbered register
>>>>>>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>>>>>>> be properly restored.
>>>>>>>>>
>>>>>>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>>>>>>
>>>>>>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>>>>>>> class, and register_no_elim_operand predicate should be used with "U"
>>>>>>>> constraint. Also, please introduce new predicate, similar to how
>>>>>>>> GOT_memory_operand is defined and handled.
>>>>>>>>
>>>>>>>
>>>>>>> Here is the updated patch.  There is a predicate already,
>>>>>>> sibcall_memory_operand.  It allows any registers to
>>>>>>> be as GOT base, which is the root of our problem.
>>>>>>> This patch removes GOT slot from it and handles
>>>>>>> sibcall over GOT slot with *sibcall_GOT_32 and
>>>>>>> *sibcall_value_GOT_32 patterns.  Since I need to
>>>>>>> expose constraints on GOT base register to RA,
>>>>>>> I have to use 2 operands, GOT base and function
>>>>>>> symbol, to describe sibcall over 32-bit GOT slot.
>>>>>>
>>>>>> Please use
>>>>>>
>>>>>>        (mem:SI (plus:SI
>>>>>>              (match_operand:SI 0 "register_no_elim_operand" "U")
>>>>>>              (match_operand:SI 1 "GOT32_symbol_operand")))
>>>>>> ...
>>>>>>
>>>>>> to avoid manual rebuild of the operand.
>>>>>>
>>>>>
>>>>> Is this OK?
>>>>>
>>>>
>>>> An updated patch to allow sibcall_memory_operand for RTL
>>>> expansion.  OK for trunk if there is no regression?
>>>>
>>>
>>> There is no regressions on x86-64 with -m32.  OK for trunk?
>>
>> OK for mainline, with a following change:
>>
>> @@ -597,11 +597,17 @@
>>          (match_operand 0 "memory_operand"))))
>>
>>  ;; Return true if OP is a memory operands that can be used in sibcalls.
>> +;; Since sibcall never returns, we can only use call-clobbered register
>> +;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
>> +;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
>> +;; and *sibcall_value_GOT_32 patterns.
>>  (define_predicate "sibcall_memory_operand"
>>    (and (match_operand 0 "memory_operand")
>>         (match_test "CONSTANT_P (XEXP (op, 0))
>>              || (GET_CODE (XEXP (op, 0)) == PLUS
>>              && REG_P (XEXP (XEXP (op, 0), 0))
>> +            && (REGNO (XEXP (XEXP (op, 0), 0))
>> +                >= FIRST_PSEUDO_REGISTER)
>>              && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
>>              && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
>>              && XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
>>
>> You can use (!HARD_REGISTER_NUM_P (...) || call_used_regs[...]) here.
>> Call-used hard regs are still allowed here.
>>
>> Can you please also rewrite this horrible match_test as a block of C
>> code using GOT32_symbol_operand predicate?
>>
>
> I am retesting the patch with
>
> ;; Return true if OP is a memory operands that can be used in sibcalls.
> ;; Since sibcall never returns, we can only use call-clobbered register
> ;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
> ;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
> ;; and *sibcall_value_GOT_32 patterns.
> (define_predicate "sibcall_memory_operand"
>   (match_operand 0 "memory_operand")
> {
>   op = XEXP (op, 0);
>   if (CONSTANT_P (op))
>     return true;
>   if (GET_CODE (op) == PLUS && REG_P (XEXP (op, 0)))
>     {
>       int regno = REGNO (XEXP (op, 0));
>       if (!HARD_REGISTER_NUM_P (regno) || call_used_regs[regno])
>         {
>           op = XEXP (op, 1);
>           if (GOT32_symbol_operand (op, VOIDmode))
>             return true;
>         }
>     }
>   return false;
> })
>
>
> I will check it in if there is no regression.
>

There is no regression.  But I missed sibcall to local function
with -O2 -fPIC -m32 -fno-plt -mregparm=3:

extern void bar (int, int, int) __attribute__((visibility("hidden")));

void
foo (int a, int b, int c)
{
  bar (a, b, c);
  bar (a, b, c);
}

It doesn't need GOT.  This patch fixes it.

iff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0e2bec3..691915f9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6657,6 +6657,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
 {
   tree type, decl_or_type;
   rtx a, b;
+  bool bind_global = decl && !targetm.binds_local_p (decl);

   /* If we are generating position-independent code, we cannot sibcall
      optimize direct calls to global functions, as the PLT requires
@@ -6665,7 +6666,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       && !TARGET_64BIT
       && flag_pic
       && flag_plt
-      && decl && !targetm.binds_local_p (decl))
+      && bind_global)
     return false;

   /* If we need to align the outgoing stack, then sibcalling would
@@ -6726,7 +6727,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
  parameters.  Note that DLLIMPORT functions and call via GOT
  slot are indirect.  */
       if (!decl
-  || (flag_pic && !flag_plt)
+  || (bind_global && flag_pic && !flag_plt)
   || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
  {
   /* Check if regparm >= 3 since arg_reg_available is set to

Here is the complete patch with a testcase.  OK for trunk?

Thanks.


-- 
H.J.

[-- Attachment #2: 0001-Use-call-clobbered-register-for-sibcall-via-GOT.patch --]
[-- Type: text/x-patch, Size: 10583 bytes --]

From e4a7e066a8cdbb638fc5e1312c01f1043a2679fc Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 16 Dec 2015 12:34:57 -0800
Subject: [PATCH] Use call-clobbered register for sibcall via GOT

Since sibcall never returns, we can only use call-clobbered register as
GOT base.  Otherwise, callee-saved register used as GOT base won't be
properly restored.  sibcall_memory_operand is changed to allow 32-bit
GOT slot only with pseudo register as GOT base for RTL expansion.  2
new patterns, *sibcall_GOT_32 and *sibcall_value_GOT_32, are added to
expose GOT base register to register allocator so that call-clobbered
register will be used for GOT base.

gcc/

	PR target/68937
	* config/i386/i386.c (ix86_function_ok_for_sibcall): Count
	call to global function via GOT slot as indirect call.
	* config/i386/i386.md (*sibcall_GOT_32): New pattern.
	(*sibcall_value_GOT_32): Likewise.
	* config/i386/predicates.md (sibcall_memory_operand): Rewrite.
	Allow 32-bit GOT slot only with pseudo register as GOT base.
	(GOT32_symbol_operand): New predicate.

gcc/testsuite/

	PR target/68937
	* gcc.target/i386/pr68937-1.c: New test.
	* gcc.target/i386/pr68937-2.c: Likewise.
	* gcc.target/i386/pr68937-3.c: Likewise.
	* gcc.target/i386/pr68937-4.c: Likewise.
	* gcc.target/i386/pr68937-5.c: Likewise.
	* gcc.target/i386/pr68937-6.c: Likewise.
---
 gcc/config/i386/i386.c                    |  7 +++++--
 gcc/config/i386/i386.md                   | 33 ++++++++++++++++++++++++++++++
 gcc/config/i386/predicates.md             | 34 ++++++++++++++++++++++++-------
 gcc/testsuite/gcc.target/i386/pr68937-1.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-2.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-3.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-4.c | 13 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr68937-5.c |  9 ++++++++
 gcc/testsuite/gcc.target/i386/pr68937-6.c | 16 +++++++++++++++
 9 files changed, 142 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr68937-6.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cecea24..f5d23d9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6657,6 +6657,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
 {
   tree type, decl_or_type;
   rtx a, b;
+  bool bind_global = decl && !targetm.binds_local_p (decl);
 
   /* If we are generating position-independent code, we cannot sibcall
      optimize direct calls to global functions, as the PLT requires
@@ -6665,7 +6666,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       && !TARGET_64BIT
       && flag_pic
       && flag_plt
-      && decl && !targetm.binds_local_p (decl))
+      && bind_global)
     return false;
 
   /* If we need to align the outgoing stack, then sibcalling would
@@ -6723,8 +6724,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
       /* If this call is indirect, we'll need to be able to use a
 	 call-clobbered register for the address of the target function.
 	 Make sure that all such registers are not used for passing
-	 parameters.  Note that DLLIMPORT functions are indirect.  */
+	 parameters.  Note that DLLIMPORT functions and call to global
+	 function via GOT slot are indirect.  */
       if (!decl
+	  || (bind_global && flag_pic && !flag_plt)
 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
 	{
 	  /* Check if regparm >= 3 since arg_reg_available is set to
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..6ab8eaa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11865,6 +11865,22 @@
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_GOT_32"
+  [(call (mem:QI
+	   (mem:SI (plus:SI
+		     (match_operand:SI 0 "register_no_elim_operand" "U")
+		     (match_operand:SI 1 "GOT32_symbol_operand"))))
+	 (match_operand 2))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_PLUS (Pmode, operands[0], operands[1]);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall"
   [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
 	 (match_operand 1))]
@@ -12042,6 +12058,23 @@
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.
+(define_insn "*sibcall_value_GOT_32"
+  [(set (match_operand 0)
+        (call (mem:QI
+		(mem:SI (plus:SI
+			  (match_operand:SI 1 "register_no_elim_operand" "U")
+			  (match_operand:SI 2 "GOT32_symbol_operand"))))
+	 (match_operand 3)))]
+  "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+{
+  rtx fnaddr = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+  fnaddr = gen_const_mem (Pmode, fnaddr);
+  return ix86_output_call_insn (insn, fnaddr);
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 8bdd5d8..96d946c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -597,14 +597,28 @@
 	    (match_operand 0 "memory_operand"))))
 
 ;; Return true if OP is a memory operands that can be used in sibcalls.
+;; Since sibcall never returns, we can only use call-clobbered register
+;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
+;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
+;; and *sibcall_value_GOT_32 patterns.
 (define_predicate "sibcall_memory_operand"
-  (and (match_operand 0 "memory_operand")
-       (match_test "CONSTANT_P (XEXP (op, 0))
-		    || (GET_CODE (XEXP (op, 0)) == PLUS
-			&& REG_P (XEXP (XEXP (op, 0), 0))
-			&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
-			&& GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
-			&& XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
+  (match_operand 0 "memory_operand")
+{
+  op = XEXP (op, 0);
+  if (CONSTANT_P (op))
+    return true;
+  if (GET_CODE (op) == PLUS && REG_P (XEXP (op, 0)))
+    {
+      int regno = REGNO (XEXP (op, 0));
+      if (!HARD_REGISTER_NUM_P (regno) || call_used_regs[regno])
+	{
+	  op = XEXP (op, 1);
+	  if (GOT32_symbol_operand (op, VOIDmode))
+	    return true;
+	}
+    }
+  return false;
+})
 
 ;; Test for a valid operand for a call instruction.
 ;; Allow constant call address operands in Pmode only.
@@ -633,6 +647,12 @@
 	  && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL);
 })
 
+;; Return true if OP is a 32-bit GOT symbol operand.
+(define_predicate "GOT32_symbol_operand"
+  (match_test "GET_CODE (op) == CONST
+               && GET_CODE (XEXP (op, 0)) == UNSPEC
+               && XINT (XEXP (op, 0), 1) == UNSPEC_GOT"))
+
 ;; Match exactly zero.
 (define_predicate "const0_operand"
   (match_code "const_int,const_wide_int,const_double,const_vector")
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-1.c b/gcc/testsuite/gcc.target/i386/pr68937-1.c
new file mode 100644
index 0000000..897856b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int);
+
+void
+foo (int b)
+{
+  bar (b);
+  bar (b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-2.c b/gcc/testsuite/gcc.target/i386/pr68937-2.c
new file mode 100644
index 0000000..257f4e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int);
+
+void
+foo (int a, int b)
+{
+  bar (a, b);
+  bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-3.c b/gcc/testsuite/gcc.target/i386/pr68937-3.c
new file mode 100644
index 0000000..6d8e40f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void bar (int, int, int);
+
+void
+foo (int a, int b, int c)
+{
+  bar (a, b, c);
+  bar (a, b, c);
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*.bar@GOT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-4.c b/gcc/testsuite/gcc.target/i386/pr68937-4.c
new file mode 100644
index 0000000..9c19956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern int bar (int, int);
+
+int
+foo (int a, int b)
+{
+  (void) bar (a, b);
+  return bar (a, b);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*.bar@GOT\\(%e(a|c|d)x\\)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-5.c b/gcc/testsuite/gcc.target/i386/pr68937-5.c
new file mode 100644
index 0000000..f7e3ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { *-*-linux* } } } */
+/* { dg-options "-O2 -fpic -fno-plt -funroll-loops" } */
+
+extern void *f();
+void dmi_scan_machine(void) {
+  char *p = f(), *q;
+  for (q = p; q < p + 10; q++)
+    ;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr68937-6.c b/gcc/testsuite/gcc.target/i386/pr68937-6.c
new file mode 100644
index 0000000..406ce28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr68937-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { *-*-linux* && ia32 } } } */
+/* { dg-options "-O2 -fpic -fno-plt -mregparm=3" } */
+
+extern void foo (int, int, int);
+extern void bar (int, int, int) __attribute__((visibility("hidden")));
+
+void
+foo (int a, int b, int c)
+{
+  foo (a, b, c);
+  bar (a, b, c);
+  foo (a, b, c);
+  bar (a, b, c);
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]bar" } } */
-- 
2.5.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall
  2015-12-18  0:55                 ` H.J. Lu
@ 2015-12-19 16:19                   ` Uros Bizjak
  0 siblings, 0 replies; 11+ messages in thread
From: Uros Bizjak @ 2015-12-19 16:19 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek

On Fri, Dec 18, 2015 at 1:55 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 1:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Dec 17, 2015 at 1:21 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Dec 17, 2015 at 7:09 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Dec 17, 2015 at 8:11 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Thu, Dec 17, 2015 at 7:50 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Thu, Dec 17, 2015 at 5:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>>> On Thu, Dec 17, 2015 at 2:00 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>> On Thu, Dec 17, 2015 at 2:04 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>>>>> On Thu, Dec 17, 2015 at 12:29 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>> Since sibcall never returns, we can only use call-clobbered register
>>>>>>>>>> as GOT base.  Otherwise, callee-saved register used as GOT base won't
>>>>>>>>>> be properly restored.
>>>>>>>>>>
>>>>>>>>>> Tested on x86-64 with -m32.  OK for trunk?
>>>>>>>>>
>>>>>>>>> You don't have to add explicit clobber for members of "CLOBBERED_REGS"
>>>>>>>>> class, and register_no_elim_operand predicate should be used with "U"
>>>>>>>>> constraint. Also, please introduce new predicate, similar to how
>>>>>>>>> GOT_memory_operand is defined and handled.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Here is the updated patch.  There is a predicate already,
>>>>>>>> sibcall_memory_operand.  It allows any registers to
>>>>>>>> be as GOT base, which is the root of our problem.
>>>>>>>> This patch removes GOT slot from it and handles
>>>>>>>> sibcall over GOT slot with *sibcall_GOT_32 and
>>>>>>>> *sibcall_value_GOT_32 patterns.  Since I need to
>>>>>>>> expose constraints on GOT base register to RA,
>>>>>>>> I have to use 2 operands, GOT base and function
>>>>>>>> symbol, to describe sibcall over 32-bit GOT slot.
>>>>>>>
>>>>>>> Please use
>>>>>>>
>>>>>>>        (mem:SI (plus:SI
>>>>>>>              (match_operand:SI 0 "register_no_elim_operand" "U")
>>>>>>>              (match_operand:SI 1 "GOT32_symbol_operand")))
>>>>>>> ...
>>>>>>>
>>>>>>> to avoid manual rebuild of the operand.
>>>>>>>
>>>>>>
>>>>>> Is this OK?
>>>>>>
>>>>>
>>>>> An updated patch to allow sibcall_memory_operand for RTL
>>>>> expansion.  OK for trunk if there is no regression?
>>>>>
>>>>
>>>> There is no regressions on x86-64 with -m32.  OK for trunk?
>>>
>>> OK for mainline, with a following change:
>>>
>>> @@ -597,11 +597,17 @@
>>>          (match_operand 0 "memory_operand"))))
>>>
>>>  ;; Return true if OP is a memory operands that can be used in sibcalls.
>>> +;; Since sibcall never returns, we can only use call-clobbered register
>>> +;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
>>> +;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
>>> +;; and *sibcall_value_GOT_32 patterns.
>>>  (define_predicate "sibcall_memory_operand"
>>>    (and (match_operand 0 "memory_operand")
>>>         (match_test "CONSTANT_P (XEXP (op, 0))
>>>              || (GET_CODE (XEXP (op, 0)) == PLUS
>>>              && REG_P (XEXP (XEXP (op, 0), 0))
>>> +            && (REGNO (XEXP (XEXP (op, 0), 0))
>>> +                >= FIRST_PSEUDO_REGISTER)
>>>              && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
>>>              && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
>>>              && XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
>>>
>>> You can use (!HARD_REGISTER_NUM_P (...) || call_used_regs[...]) here.
>>> Call-used hard regs are still allowed here.
>>>
>>> Can you please also rewrite this horrible match_test as a block of C
>>> code using GOT32_symbol_operand predicate?
>>>
>>
>> I am retesting the patch with
>>
>> ;; Return true if OP is a memory operands that can be used in sibcalls.
>> ;; Since sibcall never returns, we can only use call-clobbered register
>> ;; as GOT base.  Allow GOT slot here only with pseudo register as GOT
>> ;; base.  Properly handle sibcall over GOT slot with *sibcall_GOT_32
>> ;; and *sibcall_value_GOT_32 patterns.
>> (define_predicate "sibcall_memory_operand"
>>   (match_operand 0 "memory_operand")
>> {
>>   op = XEXP (op, 0);
>>   if (CONSTANT_P (op))
>>     return true;
>>   if (GET_CODE (op) == PLUS && REG_P (XEXP (op, 0)))
>>     {
>>       int regno = REGNO (XEXP (op, 0));
>>       if (!HARD_REGISTER_NUM_P (regno) || call_used_regs[regno])
>>         {
>>           op = XEXP (op, 1);
>>           if (GOT32_symbol_operand (op, VOIDmode))
>>             return true;
>>         }
>>     }
>>   return false;
>> })
>>
>>
>> I will check it in if there is no regression.
>>
>
> There is no regression.  But I missed sibcall to local function
> with -O2 -fPIC -m32 -fno-plt -mregparm=3:
>
> extern void bar (int, int, int) __attribute__((visibility("hidden")));
>
> void
> foo (int a, int b, int c)
> {
>   bar (a, b, c);
>   bar (a, b, c);
> }
>
> It doesn't need GOT.  This patch fixes it.
>
> iff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 0e2bec3..691915f9 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -6657,6 +6657,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
>  {
>    tree type, decl_or_type;
>    rtx a, b;
> +  bool bind_global = decl && !targetm.binds_local_p (decl);
>
>    /* If we are generating position-independent code, we cannot sibcall
>       optimize direct calls to global functions, as the PLT requires
> @@ -6665,7 +6666,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
>        && !TARGET_64BIT
>        && flag_pic
>        && flag_plt
> -      && decl && !targetm.binds_local_p (decl))
> +      && bind_global)
>      return false;
>
>    /* If we need to align the outgoing stack, then sibcalling would
> @@ -6726,7 +6727,7 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
>   parameters.  Note that DLLIMPORT functions and call via GOT
>   slot are indirect.  */
>        if (!decl
> -  || (flag_pic && !flag_plt)
> +  || (bind_global && flag_pic && !flag_plt)
>    || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
>   {
>    /* Check if regparm >= 3 since arg_reg_available is set to
>
> Here is the complete patch with a testcase.  OK for trunk?

LGTM, but please allow Jakub (CC'd) a couple of days for his eventual objection.

Otherwise, OK for mainline after this period.

Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-12-19 16:19 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-16 23:29 [PATCH] PR target/68937: i686: -fno-plt produces wrong code (maybe only with tailcall H.J. Lu
2015-12-17 10:04 ` Uros Bizjak
2015-12-17 13:00   ` H.J. Lu
2015-12-17 13:42     ` Uros Bizjak
2015-12-17 15:50       ` H.J. Lu
2015-12-17 16:12         ` H.J. Lu
2015-12-17 18:09           ` H.J. Lu
2015-12-17 21:21             ` Uros Bizjak
2015-12-17 21:59               ` H.J. Lu
2015-12-18  0:55                 ` H.J. Lu
2015-12-19 16:19                   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).