public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
@ 2015-08-16 18:44 H.J. Lu
  2015-08-16 19:39 ` Alexander Monakov
  0 siblings, 1 reply; 7+ messages in thread
From: H.J. Lu @ 2015-08-16 18:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak, Sriraman Tallam, Alexander Monakov

prepare_call_address in calls.c is the wrong place to handle -fno-plt.
We shoudn't force function address into register and hope that load
function address via GOT and indirect call via register will be folded
into indirect call via GOT, which doesn't always happen.  Allso non-PIC
case can only be handled in backend.  Instead, backend should expand
external function call into indirect call via GOT for -fno-plt.

This patch reverts -fno-plt in prepare_call_address and handles it in
ix86_expand_call.  Other backends may need similar changes to support
-fno-plt.  Alternately, we can introduce a target hook to indicate
whether an external function should be called via register for -fno-plt
so that i386 backend can disable it in prepare_call_address.

Any comments?

H.J.
--
gcc/

	PR target/67215
	* calls.c (prepare_call_address): Don't handle -fno-plt here.
	* config/i386/i386.c (ix86_expand_call): Generate indirect call
	via GOT for -fno-plt.  Support indirect call via GOT for x32.

gcc/testsuite/

	PR target/67215
	* gcc.target/i386/pr67215-1.c: New test.
	* gcc.target/i386/pr67215-2.c: Likewise.
---
 gcc/calls.c                               | 12 ------
 gcc/config/i386/i386.c                    | 65 +++++++++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr67215-1.c | 20 ++++++++++
 gcc/testsuite/gcc.target/i386/pr67215-2.c | 20 ++++++++++
 4 files changed, 89 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-2.c

diff --git a/gcc/calls.c b/gcc/calls.c
index 5636725..7cce9be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -203,18 +203,6 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
 	       && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
 	      ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
 	      : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic
-	   && fndecl_or_type
-	   && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-	   && (!flag_plt
-	       || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-	   && !targetm.binds_local_p (fndecl_or_type))
-    {
-      /* This is done only for PIC code.  There is no easy interface to force the
-	 function address into GOT for non-PIC case.  non-PIC case needs to be
-	 handled specially by the backend.  */
-      funexp = force_reg (Pmode, funexp);
-    }
   else if (! sibcallp)
     {
       if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4a0986c..bf8a21d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25650,21 +25650,50 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       /* Static functions and indirect calls don't need the pic register.  Also,
 	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
 	 it an indirect call.  */
+      rtx addr = XEXP (fnaddr, 0);
       if (flag_pic
-	  && (!TARGET_64BIT
-	      || (ix86_cmodel == CM_LARGE_PIC
-		  && DEFAULT_ABI != MS_ABI))
-	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-	  && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
-	  && flag_plt
-	  && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
-	      || !lookup_attribute ("noplt",
-		     DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
+	  && GET_CODE (addr) == SYMBOL_REF
+	  && !SYMBOL_REF_LOCAL_P (addr))
 	{
-	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
-	  if (ix86_use_pseudo_pic_reg ())
-	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
-			    pic_offset_table_rtx);
+	  if (flag_plt
+	      && (SYMBOL_REF_DECL (addr) == NULL_TREE
+		  || !lookup_attribute ("noplt",
+					DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
+	    {
+	      if (!TARGET_64BIT
+		  || (ix86_cmodel == CM_LARGE_PIC
+		      && DEFAULT_ABI != MS_ABI))
+		{
+		  use_reg (&use, gen_rtx_REG (Pmode,
+					      REAL_PIC_OFFSET_TABLE_REGNUM));
+		  if (ix86_use_pseudo_pic_reg ())
+		    emit_move_insn (gen_rtx_REG (Pmode,
+						 REAL_PIC_OFFSET_TABLE_REGNUM),
+				    pic_offset_table_rtx);
+		}
+	    }
+	  else if (!TARGET_PECOFF && !TARGET_MACHO)
+	    {
+	      if (TARGET_64BIT)
+		{
+		  fnaddr = gen_rtx_UNSPEC (Pmode,
+					   gen_rtvec (1, addr),
+					   UNSPEC_GOTPCREL);
+		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		}
+	      else
+		{
+		  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+					   UNSPEC_GOT);
+		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		  fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+					 fnaddr);
+		}
+	      fnaddr = gen_const_mem (Pmode, fnaddr);
+	      if (GET_MODE (fnaddr) != word_mode)
+		fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
+	      fnaddr = gen_rtx_MEM (QImode, fnaddr);
+	    }
 	}
     }
 
@@ -25686,9 +25715,13 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
-  else if (sibcall
-	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
-	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
+  else if (!(TARGET_X32
+	     && MEM_P (fnaddr)
+	     && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+	     && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
+	   && (sibcall
+	       ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+	       : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
     {
       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-1.c b/gcc/testsuite/gcc.target/i386/pr67215-1.c
new file mode 100644
index 0000000..fd37f8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt" } */
+
+extern char* bar (int);
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-2.c b/gcc/testsuite/gcc.target/i386/pr67215-2.c
new file mode 100644
index 0000000..ebf2919
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic" } */
+
+extern char* bar (int) __attribute__ ((noplt));
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
-- 
2.4.3

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
  2015-08-16 18:44 RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86 H.J. Lu
@ 2015-08-16 19:39 ` Alexander Monakov
  2015-08-16 19:50   ` H.J. Lu
  0 siblings, 1 reply; 7+ messages in thread
From: Alexander Monakov @ 2015-08-16 19:39 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches, Uros Bizjak, Sriraman Tallam

On Sun, 16 Aug 2015, H.J. Lu wrote:

> prepare_call_address in calls.c is the wrong place to handle -fno-plt.
> We shoudn't force function address into register and hope that load
> function address via GOT and indirect call via register will be folded
> into indirect call via GOT, which doesn't always happen.

When the address load initially exists separately from the indirect call, the
load can be scheduled or be subject to loop invariant motion.  What is your
reason to have them fused right from the start?

In PR 67215, when you asked for an -O1 testcase, the reporter responded with a
case that demonstrates loop invariant motion on the call address.  Why should
that be avoided?  (I think it shouldn't, at least not generally)

> Allso non-PIC
> case can only be handled in backend.  Instead, backend should expand
> external function call into indirect call via GOT for -fno-plt.
> 
> This patch reverts -fno-plt in prepare_call_address and handles it in
> ix86_expand_call.  Other backends may need similar changes to support
> -fno-plt.  Alternately, we can introduce a target hook to indicate
> whether an external function should be called via register for -fno-plt
> so that i386 backend can disable it in prepare_call_address.
> 
> Any comments?

Initially my patch was x86-only, but then I opted for the generic calls.c
change, expecting that it would work fine in a machine-independent manner
(which didn't work out, as ARM and AArch64 experience demonstrated).  My
initial i386 backend patch was much smaller:

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3263656..cd5f246 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25577,15 +25578,23 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       /* Static functions and indirect calls don't need the pic register.  */
       if (flag_pic
          && (!TARGET_64BIT
+             || !flag_plt
              || (ix86_cmodel == CM_LARGE_PIC
                  && DEFAULT_ABI != MS_ABI))
          && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
          && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
        {
-         use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
-         if (ix86_use_pseudo_pic_reg ())
-           emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
-                           pic_offset_table_rtx);
+         if (flag_plt)
+           {
+             use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
+             if (ix86_use_pseudo_pic_reg ())
+               emit_move_insn (gen_rtx_REG (Pmode,
+                                            REAL_PIC_OFFSET_TABLE_REGNUM),
+                               pic_offset_table_rtx);
+           }
+         else
+           fnaddr = gen_rtx_MEM (QImode,
+                                 legitimize_pic_address (XEXP (fnaddr, 0), 0));
        }
     }

(it doesn't apply to current trunk and doesn't handle all cases your patch
handles, but at least it shows how do achieve the goal for "unfused" codegen)

Couple more comments on your patch below.

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 4a0986c..bf8a21d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -25650,21 +25650,50 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>        /* Static functions and indirect calls don't need the pic register.  Also,
>  	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
>  	 it an indirect call.  */
> +      rtx addr = XEXP (fnaddr, 0);
>        if (flag_pic
> -	  && (!TARGET_64BIT
> -	      || (ix86_cmodel == CM_LARGE_PIC
> -		  && DEFAULT_ABI != MS_ABI))
> -	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
> -	  && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
> -	  && flag_plt
> -	  && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
> -	      || !lookup_attribute ("noplt",
> -		     DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
> +	  && GET_CODE (addr) == SYMBOL_REF
> +	  && !SYMBOL_REF_LOCAL_P (addr))
>  	{
> -	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
> -	  if (ix86_use_pseudo_pic_reg ())
> -	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
> -			    pic_offset_table_rtx);
> +	  if (flag_plt
> +	      && (SYMBOL_REF_DECL (addr) == NULL_TREE
> +		  || !lookup_attribute ("noplt",
> +					DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
> +	    {

Under what circumstances can SYMBOL_REF_DECL be NULL here?  For libcalls?
(I realize that your patch doesn't change the treatment; I just want to know)

> +	      if (!TARGET_64BIT
> +		  || (ix86_cmodel == CM_LARGE_PIC
> +		      && DEFAULT_ABI != MS_ABI))
> +		{
> +		  use_reg (&use, gen_rtx_REG (Pmode,
> +					      REAL_PIC_OFFSET_TABLE_REGNUM));
> +		  if (ix86_use_pseudo_pic_reg ())
> +		    emit_move_insn (gen_rtx_REG (Pmode,
> +						 REAL_PIC_OFFSET_TABLE_REGNUM),
> +				    pic_offset_table_rtx);
> +		}
> +	    }
> +	  else if (!TARGET_PECOFF && !TARGET_MACHO)
> +	    {
> +	      if (TARGET_64BIT)
> +		{
> +		  fnaddr = gen_rtx_UNSPEC (Pmode,
> +					   gen_rtvec (1, addr),
> +					   UNSPEC_GOTPCREL);
> +		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
> +		}
> +	      else
> +		{
> +		  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
> +					   UNSPEC_GOT);
> +		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
> +		  fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
> +					 fnaddr);
> +		}
> +	      fnaddr = gen_const_mem (Pmode, fnaddr);
> +	      if (GET_MODE (fnaddr) != word_mode)
> +		fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
> +	      fnaddr = gen_rtx_MEM (QImode, fnaddr);
> +	    }
>  	}
>      }
>  
> @@ -25686,9 +25715,13 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>        && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
>        && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
>      fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
> -  else if (sibcall
> -	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
> -	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
> +  else if (!(TARGET_X32
> +	     && MEM_P (fnaddr)
> +	     && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
> +	     && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
> +	   && (sibcall
> +	       ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
> +	       : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
>      {
>        fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
>        fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));

Perhaps add a comment that GOT slots are 64-bit on x32?

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
  2015-08-16 19:39 ` Alexander Monakov
@ 2015-08-16 19:50   ` H.J. Lu
  2015-08-17  1:14     ` H.J. Lu
  0 siblings, 1 reply; 7+ messages in thread
From: H.J. Lu @ 2015-08-16 19:50 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: GCC Patches, Uros Bizjak, Sriraman Tallam

On Sun, Aug 16, 2015 at 12:24 PM, Alexander Monakov <amonakov@ispras.ru> wrote:
> On Sun, 16 Aug 2015, H.J. Lu wrote:
>
>> prepare_call_address in calls.c is the wrong place to handle -fno-plt.
>> We shoudn't force function address into register and hope that load
>> function address via GOT and indirect call via register will be folded
>> into indirect call via GOT, which doesn't always happen.
>
> When the address load initially exists separately from the indirect call, the
> load can be scheduled or be subject to loop invariant motion.  What is your
> reason to have them fused right from the start?
>
> In PR 67215, when you asked for an -O1 testcase, the reporter responded with a
> case that demonstrates loop invariant motion on the call address.  Why should
> that be avoided?  (I think it shouldn't, at least not generally)

Load it into a register avoids one load.  But using a register for it
is bad for x86 which has few registers.  Change

call foo@PLT

to

call *foo@GOT

to avoid one extra direct branch to PLT is always good for both x86
and x86-64.

>> Allso non-PIC
>> case can only be handled in backend.  Instead, backend should expand
>> external function call into indirect call via GOT for -fno-plt.
>>
>> This patch reverts -fno-plt in prepare_call_address and handles it in
>> ix86_expand_call.  Other backends may need similar changes to support
>> -fno-plt.  Alternately, we can introduce a target hook to indicate
>> whether an external function should be called via register for -fno-plt
>> so that i386 backend can disable it in prepare_call_address.
>>
>> Any comments?
>
> Initially my patch was x86-only, but then I opted for the generic calls.c
> change, expecting that it would work fine in a machine-independent manner
> (which didn't work out, as ARM and AArch64 experience demonstrated).  My

Nor for PR 67215 on x86.

> initial i386 backend patch was much smaller:
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3263656..cd5f246 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -25577,15 +25578,23 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>        /* Static functions and indirect calls don't need the pic register.  */
>        if (flag_pic
>           && (!TARGET_64BIT
> +             || !flag_plt
>               || (ix86_cmodel == CM_LARGE_PIC
>                   && DEFAULT_ABI != MS_ABI))
>           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
>           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
>         {
> -         use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
> -         if (ix86_use_pseudo_pic_reg ())
> -           emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
> -                           pic_offset_table_rtx);
> +         if (flag_plt)
> +           {
> +             use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
> +             if (ix86_use_pseudo_pic_reg ())
> +               emit_move_insn (gen_rtx_REG (Pmode,
> +                                            REAL_PIC_OFFSET_TABLE_REGNUM),
> +                               pic_offset_table_rtx);
> +           }
> +         else
> +           fnaddr = gen_rtx_MEM (QImode,
> +                                 legitimize_pic_address (XEXP (fnaddr, 0), 0));
>         }
>      }
>
> (it doesn't apply to current trunk and doesn't handle all cases your patch
> handles, but at least it shows how do achieve the goal for "unfused" codegen)

But the fused indirect call is what we want for x86.

> Couple more comments on your patch below.
>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 4a0986c..bf8a21d 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -25650,21 +25650,50 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>>        /* Static functions and indirect calls don't need the pic register.  Also,
>>        check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
>>        it an indirect call.  */
>> +      rtx addr = XEXP (fnaddr, 0);
>>        if (flag_pic
>> -       && (!TARGET_64BIT
>> -           || (ix86_cmodel == CM_LARGE_PIC
>> -               && DEFAULT_ABI != MS_ABI))
>> -       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
>> -       && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
>> -       && flag_plt
>> -       && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
>> -           || !lookup_attribute ("noplt",
>> -                  DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
>> +       && GET_CODE (addr) == SYMBOL_REF
>> +       && !SYMBOL_REF_LOCAL_P (addr))
>>       {
>> -       use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
>> -       if (ix86_use_pseudo_pic_reg ())
>> -         emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
>> -                         pic_offset_table_rtx);
>> +       if (flag_plt
>> +           && (SYMBOL_REF_DECL (addr) == NULL_TREE
>> +               || !lookup_attribute ("noplt",
>> +                                     DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
>> +         {
>
> Under what circumstances can SYMBOL_REF_DECL be NULL here?  For libcalls?
> (I realize that your patch doesn't change the treatment; I just want to know)

I don't think it can happen.  But it is a separate issue and I want to
limit my change to one issue.

>> +           if (!TARGET_64BIT
>> +               || (ix86_cmodel == CM_LARGE_PIC
>> +                   && DEFAULT_ABI != MS_ABI))
>> +             {
>> +               use_reg (&use, gen_rtx_REG (Pmode,
>> +                                           REAL_PIC_OFFSET_TABLE_REGNUM));
>> +               if (ix86_use_pseudo_pic_reg ())
>> +                 emit_move_insn (gen_rtx_REG (Pmode,
>> +                                              REAL_PIC_OFFSET_TABLE_REGNUM),
>> +                                 pic_offset_table_rtx);
>> +             }
>> +         }
>> +       else if (!TARGET_PECOFF && !TARGET_MACHO)
>> +         {
>> +           if (TARGET_64BIT)
>> +             {
>> +               fnaddr = gen_rtx_UNSPEC (Pmode,
>> +                                        gen_rtvec (1, addr),
>> +                                        UNSPEC_GOTPCREL);
>> +               fnaddr = gen_rtx_CONST (Pmode, fnaddr);
>> +             }
>> +           else
>> +             {
>> +               fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
>> +                                        UNSPEC_GOT);
>> +               fnaddr = gen_rtx_CONST (Pmode, fnaddr);
>> +               fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
>> +                                      fnaddr);
>> +             }
>> +           fnaddr = gen_const_mem (Pmode, fnaddr);
>> +           if (GET_MODE (fnaddr) != word_mode)
>> +             fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
>> +           fnaddr = gen_rtx_MEM (QImode, fnaddr);
>> +         }
>>       }
>>      }
>>
>> @@ -25686,9 +25715,13 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>>        && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
>>        && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
>>      fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
>> -  else if (sibcall
>> -        ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
>> -        : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
>> +  else if (!(TARGET_X32
>> +          && MEM_P (fnaddr)
>> +          && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
>> +          && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
>> +        && (sibcall
>> +            ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
>> +            : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
>>      {
>>        fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
>>        fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
>
> Perhaps add a comment that GOT slots are 64-bit on x32?
>

Good idea.  I will update my patch.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
  2015-08-16 19:50   ` H.J. Lu
@ 2015-08-17  1:14     ` H.J. Lu
  2015-08-17 17:17       ` Alexander Monakov
  0 siblings, 1 reply; 7+ messages in thread
From: H.J. Lu @ 2015-08-17  1:14 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: GCC Patches, Uros Bizjak, Sriraman Tallam

On Sun, Aug 16, 2015 at 12:39 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Aug 16, 2015 at 12:24 PM, Alexander Monakov <amonakov@ispras.ru> wrote:
>
>>> +           if (!TARGET_64BIT
>>> +               || (ix86_cmodel == CM_LARGE_PIC
>>> +                   && DEFAULT_ABI != MS_ABI))
>>> +             {
>>> +               use_reg (&use, gen_rtx_REG (Pmode,
>>> +                                           REAL_PIC_OFFSET_TABLE_REGNUM));
>>> +               if (ix86_use_pseudo_pic_reg ())
>>> +                 emit_move_insn (gen_rtx_REG (Pmode,
>>> +                                              REAL_PIC_OFFSET_TABLE_REGNUM),
>>> +                                 pic_offset_table_rtx);
>>> +             }
>>> +         }
>>> +       else if (!TARGET_PECOFF && !TARGET_MACHO)
>>> +         {
>>> +           if (TARGET_64BIT)
>>> +             {
>>> +               fnaddr = gen_rtx_UNSPEC (Pmode,
>>> +                                        gen_rtvec (1, addr),
>>> +                                        UNSPEC_GOTPCREL);
>>> +               fnaddr = gen_rtx_CONST (Pmode, fnaddr);
>>> +             }
>>> +           else
>>> +             {
>>> +               fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
>>> +                                        UNSPEC_GOT);
>>> +               fnaddr = gen_rtx_CONST (Pmode, fnaddr);
>>> +               fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
>>> +                                      fnaddr);
>>> +             }
>>> +           fnaddr = gen_const_mem (Pmode, fnaddr);
>>> +           if (GET_MODE (fnaddr) != word_mode)
>>> +             fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
>>> +           fnaddr = gen_rtx_MEM (QImode, fnaddr);
>>> +         }
>>>       }
>>>      }
>>>
>>> @@ -25686,9 +25715,13 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>>>        && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
>>>        && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
>>>      fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
>>> -  else if (sibcall
>>> -        ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
>>> -        : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
>>> +  else if (!(TARGET_X32
>>> +          && MEM_P (fnaddr)
>>> +          && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
>>> +          && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
>>> +        && (sibcall
>>> +            ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
>>> +            : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
>>>      {
>>>        fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
>>>        fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
>>
>> Perhaps add a comment that GOT slots are 64-bit on x32?
>>
>
> Good idea.  I will update my patch.
>

How about this?


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bf8a21d..216dee6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
  fnaddr);
  }
       fnaddr = gen_const_mem (Pmode, fnaddr);
+      /* Pmode may not be the same as word_mode for x32, which
+ doesn't support indirect branch va 32-bit memory slot.
+ Since x32 GOT slot is 64 bit with zero upper 32 bits,
+ indirect branch via x32 GOT slot is OK.  */
       if (GET_MODE (fnaddr) != word_mode)
  fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
       fnaddr = gen_rtx_MEM (QImode, fnaddr);
@@ -25715,7 +25719,9 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
-  else if (!(TARGET_X32
+  /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
+     branch via x32 GOT slot is OK.  */
+  else if (!(Pmode != word_mode
      && MEM_P (fnaddr)
      && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
      && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))



-- 
H.J.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
  2015-08-17  1:14     ` H.J. Lu
@ 2015-08-17 17:17       ` Alexander Monakov
  2015-08-17 17:32         ` H.J. Lu
  0 siblings, 1 reply; 7+ messages in thread
From: Alexander Monakov @ 2015-08-17 17:17 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches, Uros Bizjak, Sriraman Tallam

> >> Perhaps add a comment that GOT slots are 64-bit on x32?
> >>
> >
> > Good idea.  I will update my patch.
> >
> 
> How about this?
> 
> 
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index bf8a21d..216dee6 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>   fnaddr);
>   }
>        fnaddr = gen_const_mem (Pmode, fnaddr);
> +      /* Pmode may not be the same as word_mode for x32, which

I think 'Pmode is not the same as word_mode on x32' is more appropriate here.

> + doesn't support indirect branch va 32-bit memory slot.

Typo: s/va/via.

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
  2015-08-17 17:17       ` Alexander Monakov
@ 2015-08-17 17:32         ` H.J. Lu
  2015-08-19 12:50           ` H.J. Lu
  0 siblings, 1 reply; 7+ messages in thread
From: H.J. Lu @ 2015-08-17 17:32 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: GCC Patches, Uros Bizjak, Sriraman Tallam

[-- Attachment #1: Type: text/plain, Size: 885 bytes --]

On Mon, Aug 17, 2015 at 10:08 AM, Alexander Monakov <amonakov@ispras.ru> wrote:
>> >> Perhaps add a comment that GOT slots are 64-bit on x32?
>> >>
>> >
>> > Good idea.  I will update my patch.
>> >
>>
>> How about this?
>>
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index bf8a21d..216dee6 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
>>   fnaddr);
>>   }
>>        fnaddr = gen_const_mem (Pmode, fnaddr);
>> +      /* Pmode may not be the same as word_mode for x32, which
>
> I think 'Pmode is not the same as word_mode on x32' is more appropriate here.

"-maddress-mode=long -mx32" makes Pmode == word_mode.

>> + doesn't support indirect branch va 32-bit memory slot.
>
> Typo: s/va/via.
>

Fixed.

Here is the updated patch.

Thanks.

-- 
H.J.

[-- Attachment #2: 0001-Properly-handle-fno-plt-in-ix86_expand_call.patch --]
[-- Type: text/x-patch, Size: 7999 bytes --]

From 04258b418d2ea105249d371a06805122d8953816 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 16 Aug 2015 04:46:20 -0700
Subject: [PATCH] Properly handle -fno-plt in ix86_expand_call

prepare_call_address in calls.c is the wrong place to handle -fno-plt.
We shoudn't force function address into register and hope that load
function address via GOT and indirect call via register will be folded
into indirect call via GOT, which doesn't always happen.  Also non-PIC
case can only be handled in backend.  Instead, backend should expand
external function call into indirect call via GOT for -fno-plt.

This patch reverts -fno-plt in prepare_call_address and handles it in
ix86_expand_call.  Other backends may need similar changes to support
-fno-plt.  Alternately, we can introduce a target hook to indicate
whether an external function should be called via register for -fno-plt
so that i386 backend can disable it in prepare_call_address.

gcc/

	PR target/67215
	* calls.c (prepare_call_address): Don't handle -fno-plt here.
	* config/i386/i386.c (ix86_expand_call): Generate indirect call
	via GOT for -fno-plt.  Support indirect call via GOT for x32.

gcc/testsuite/

	PR target/67215
	* gcc.target/i386/pr67215-1.c: New test.
	* gcc.target/i386/pr67215-2.c: Likewise.
---
 gcc/calls.c                               | 12 ------
 gcc/config/i386/i386.c                    | 71 ++++++++++++++++++++++++-------
 gcc/testsuite/gcc.target/i386/pr67215-1.c | 20 +++++++++
 gcc/testsuite/gcc.target/i386/pr67215-2.c | 20 +++++++++
 4 files changed, 95 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-2.c

diff --git a/gcc/calls.c b/gcc/calls.c
index 5636725..7cce9be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -203,18 +203,6 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
 	       && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
 	      ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
 	      : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic
-	   && fndecl_or_type
-	   && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-	   && (!flag_plt
-	       || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-	   && !targetm.binds_local_p (fndecl_or_type))
-    {
-      /* This is done only for PIC code.  There is no easy interface to force the
-	 function address into GOT for non-PIC case.  non-PIC case needs to be
-	 handled specially by the backend.  */
-      funexp = force_reg (Pmode, funexp);
-    }
   else if (! sibcallp)
     {
       if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e1..ac9a6c4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25649,21 +25649,54 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       /* Static functions and indirect calls don't need the pic register.  Also,
 	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
 	 it an indirect call.  */
+      rtx addr = XEXP (fnaddr, 0);
       if (flag_pic
-	  && (!TARGET_64BIT
-	      || (ix86_cmodel == CM_LARGE_PIC
-		  && DEFAULT_ABI != MS_ABI))
-	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-	  && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
-	  && flag_plt
-	  && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
-	      || !lookup_attribute ("noplt",
-		     DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
+	  && GET_CODE (addr) == SYMBOL_REF
+	  && !SYMBOL_REF_LOCAL_P (addr))
 	{
-	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
-	  if (ix86_use_pseudo_pic_reg ())
-	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
-			    pic_offset_table_rtx);
+	  if (flag_plt
+	      && (SYMBOL_REF_DECL (addr) == NULL_TREE
+		  || !lookup_attribute ("noplt",
+					DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
+	    {
+	      if (!TARGET_64BIT
+		  || (ix86_cmodel == CM_LARGE_PIC
+		      && DEFAULT_ABI != MS_ABI))
+		{
+		  use_reg (&use, gen_rtx_REG (Pmode,
+					      REAL_PIC_OFFSET_TABLE_REGNUM));
+		  if (ix86_use_pseudo_pic_reg ())
+		    emit_move_insn (gen_rtx_REG (Pmode,
+						 REAL_PIC_OFFSET_TABLE_REGNUM),
+				    pic_offset_table_rtx);
+		}
+	    }
+	  else if (!TARGET_PECOFF && !TARGET_MACHO)
+	    {
+	      if (TARGET_64BIT)
+		{
+		  fnaddr = gen_rtx_UNSPEC (Pmode,
+					   gen_rtvec (1, addr),
+					   UNSPEC_GOTPCREL);
+		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		}
+	      else
+		{
+		  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+					   UNSPEC_GOT);
+		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		  fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+					 fnaddr);
+		}
+	      fnaddr = gen_const_mem (Pmode, fnaddr);
+	      /* Pmode may not be the same as word_mode for x32, which
+		 doesn't support indirect branch via 32-bit memory slot.
+		 Since x32 GOT slot is 64 bit with zero upper 32 bits,
+		 indirect branch via x32 GOT slot is OK.  */
+	      if (GET_MODE (fnaddr) != word_mode)
+		fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
+	      fnaddr = gen_rtx_MEM (QImode, fnaddr);
+	    }
 	}
     }
 
@@ -25685,9 +25718,15 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
-  else if (sibcall
-	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
-	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
+  /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
+     branch via x32 GOT slot is OK.  */
+  else if (!(TARGET_X32
+	     && MEM_P (fnaddr)
+	     && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+	     && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
+	   && (sibcall
+	       ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+	       : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
     {
       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-1.c b/gcc/testsuite/gcc.target/i386/pr67215-1.c
new file mode 100644
index 0000000..fd37f8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt" } */
+
+extern char* bar (int);
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-2.c b/gcc/testsuite/gcc.target/i386/pr67215-2.c
new file mode 100644
index 0000000..ebf2919
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic" } */
+
+extern char* bar (int) __attribute__ ((noplt));
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
-- 
2.4.3


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
  2015-08-17 17:32         ` H.J. Lu
@ 2015-08-19 12:50           ` H.J. Lu
  0 siblings, 0 replies; 7+ messages in thread
From: H.J. Lu @ 2015-08-19 12:50 UTC (permalink / raw)
  To: Alexander Monakov, GCC Patches, Uros Bizjak, Sriraman Tallam, Jeff Law

On Mon, Aug 17, 2015 at 10:17:00AM -0700, H.J. Lu wrote:
> On Mon, Aug 17, 2015 at 10:08 AM, Alexander Monakov <amonakov@ispras.ru> wrote:
> >> >> Perhaps add a comment that GOT slots are 64-bit on x32?
> >> >>
> >> >
> >> > Good idea.  I will update my patch.
> >> >
> >>
> >> How about this?
> >>
> >>
> >> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> >> index bf8a21d..216dee6 100644
> >> --- a/gcc/config/i386/i386.c
> >> +++ b/gcc/config/i386/i386.c
> >> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
> >>   fnaddr);
> >>   }
> >>        fnaddr = gen_const_mem (Pmode, fnaddr);
> >> +      /* Pmode may not be the same as word_mode for x32, which
> >
> > I think 'Pmode is not the same as word_mode on x32' is more appropriate here.
> 
> "-maddress-mode=long -mx32" makes Pmode == word_mode.
> 
> >> + doesn't support indirect branch va 32-bit memory slot.
> >
> > Typo: s/va/via.
> >
> 
> Fixed.
> 
> Here is the updated patch.
> 

Hi Jefff,

Can you review this?

Thanks.


H.J.
---
It boils down to that -fno-plt should convert calling an external
function, foo, from

call foo@PLT

to

call *foo@GOT

to avoid one extra direct branch to PLT.  The proper place for this is
in backend during expanding a function call.  The backend already takes
of many details for calling an external function, like setting up a PIC
register.  Using the GOT slot instead of PLT slot, just one of those
details.  For x86, it should be done in ix86_expand_call, not
prepare_call_address and hope for the best, which doesn't always
happen.  Also non-PIC case can only be handled in backend.

This patch reverts -fno-plt in prepare_call_address and handles it in
ix86_expand_call.  Other backends may need similar changes to support
-fno-plt.  Alternately, we can introduce a target hook to indicate
whether an external function should be called via register for -fno-plt
so that i386 backend can disable it in prepare_call_address.

sibcall_memory_operand is also updated to accept the GOT slot so that

call *foo@GOT(%reg)

can be generated by ix86_expand_call for 32-bit and 64-bit large model.

gcc/

	PR target/67215
	* calls.c (prepare_call_address): Don't handle -fno-plt here.
	* config/i386/i386.c (ix86_expand_call): Generate indirect call
	via GOT for -fno-plt.  Support indirect call via GOT for x32.
	* config/i386/predicates.md (sibcall_memory_operand): Allow
	GOT memory operand.

gcc/testsuite/

	PR target/67215
	* gcc.target/i386/pr67215-1.c: New test.
	* gcc.target/i386/pr67215-2.c: Likewise.
	* gcc.target/i386/pr67215-3.c: Likewise.
---
 gcc/calls.c                               | 12 ------
 gcc/config/i386/i386.c                    | 71 ++++++++++++++++++++++++-------
 gcc/config/i386/predicates.md             |  7 ++-
 gcc/testsuite/gcc.target/i386/pr67215-1.c | 20 +++++++++
 gcc/testsuite/gcc.target/i386/pr67215-2.c | 20 +++++++++
 gcc/testsuite/gcc.target/i386/pr67215-3.c | 12 ++++++
 6 files changed, 113 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-3.c

diff --git a/gcc/calls.c b/gcc/calls.c
index 5636725..7cce9be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -203,18 +203,6 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
 	       && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
 	      ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
 	      : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic
-	   && fndecl_or_type
-	   && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-	   && (!flag_plt
-	       || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-	   && !targetm.binds_local_p (fndecl_or_type))
-    {
-      /* This is done only for PIC code.  There is no easy interface to force the
-	 function address into GOT for non-PIC case.  non-PIC case needs to be
-	 handled specially by the backend.  */
-      funexp = force_reg (Pmode, funexp);
-    }
   else if (! sibcallp)
     {
       if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e1..ac9a6c4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25649,21 +25649,54 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       /* Static functions and indirect calls don't need the pic register.  Also,
 	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
 	 it an indirect call.  */
+      rtx addr = XEXP (fnaddr, 0);
       if (flag_pic
-	  && (!TARGET_64BIT
-	      || (ix86_cmodel == CM_LARGE_PIC
-		  && DEFAULT_ABI != MS_ABI))
-	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-	  && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
-	  && flag_plt
-	  && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
-	      || !lookup_attribute ("noplt",
-		     DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
+	  && GET_CODE (addr) == SYMBOL_REF
+	  && !SYMBOL_REF_LOCAL_P (addr))
 	{
-	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
-	  if (ix86_use_pseudo_pic_reg ())
-	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
-			    pic_offset_table_rtx);
+	  if (flag_plt
+	      && (SYMBOL_REF_DECL (addr) == NULL_TREE
+		  || !lookup_attribute ("noplt",
+					DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
+	    {
+	      if (!TARGET_64BIT
+		  || (ix86_cmodel == CM_LARGE_PIC
+		      && DEFAULT_ABI != MS_ABI))
+		{
+		  use_reg (&use, gen_rtx_REG (Pmode,
+					      REAL_PIC_OFFSET_TABLE_REGNUM));
+		  if (ix86_use_pseudo_pic_reg ())
+		    emit_move_insn (gen_rtx_REG (Pmode,
+						 REAL_PIC_OFFSET_TABLE_REGNUM),
+				    pic_offset_table_rtx);
+		}
+	    }
+	  else if (!TARGET_PECOFF && !TARGET_MACHO)
+	    {
+	      if (TARGET_64BIT)
+		{
+		  fnaddr = gen_rtx_UNSPEC (Pmode,
+					   gen_rtvec (1, addr),
+					   UNSPEC_GOTPCREL);
+		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		}
+	      else
+		{
+		  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+					   UNSPEC_GOT);
+		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+		  fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+					 fnaddr);
+		}
+	      fnaddr = gen_const_mem (Pmode, fnaddr);
+	      /* Pmode may not be the same as word_mode for x32, which
+		 doesn't support indirect branch via 32-bit memory slot.
+		 Since x32 GOT slot is 64 bit with zero upper 32 bits,
+		 indirect branch via x32 GOT slot is OK.  */
+	      if (GET_MODE (fnaddr) != word_mode)
+		fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
+	      fnaddr = gen_rtx_MEM (QImode, fnaddr);
+	    }
 	}
     }
 
@@ -25685,9 +25718,15 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
-  else if (sibcall
-	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
-	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
+  /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
+     branch via x32 GOT slot is OK.  */
+  else if (!(TARGET_X32
+	     && MEM_P (fnaddr)
+	     && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+	     && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
+	   && (sibcall
+	       ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+	       : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
     {
       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a9c8623..be2df76 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -593,7 +593,12 @@
 ;; Return true if OP is a memory operands that can be used in sibcalls.
 (define_predicate "sibcall_memory_operand"
   (and (match_operand 0 "memory_operand")
-       (match_test "CONSTANT_P (XEXP (op, 0))")))
+       (match_test "CONSTANT_P (XEXP (op, 0))
+		    || (GET_CODE (XEXP (op, 0)) == PLUS
+			&& REG_P (XEXP (XEXP (op, 0), 0))
+			&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
+			&& GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
+			&& XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
 
 ;; Test for a valid operand for a call instruction.
 ;; Allow constant call address operands in Pmode only.
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-1.c b/gcc/testsuite/gcc.target/i386/pr67215-1.c
new file mode 100644
index 0000000..fd37f8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt" } */
+
+extern char* bar (int);
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-2.c b/gcc/testsuite/gcc.target/i386/pr67215-2.c
new file mode 100644
index 0000000..ebf2919
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic" } */
+
+extern char* bar (int) __attribute__ ((noplt));
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-3.c b/gcc/testsuite/gcc.target/i386/pr67215-3.c
new file mode 100644
index 0000000..dbd9a2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -fdump-rtl-expand" } */
+
+extern int bar (void);
+
+int
+foo (void)
+{
+  return bar ();
+}
+
+/* { dg-final { scan-rtl-dump "\\(call \\(mem:QI \\(mem/u/c:" "expand" } } */
-- 
2.4.3

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-08-19 12:50 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-16 18:44 RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86 H.J. Lu
2015-08-16 19:39 ` Alexander Monakov
2015-08-16 19:50   ` H.J. Lu
2015-08-17  1:14     ` H.J. Lu
2015-08-17 17:17       ` Alexander Monakov
2015-08-17 17:32         ` H.J. Lu
2015-08-19 12:50           ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).