* [PATCH] x86-64: Load external function address via GOT slot
@ 2016-06-20 17:05 H.J. Lu
2016-06-20 19:13 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2016-06-20 17:05 UTC (permalink / raw)
To: gcc-patches; +Cc: Jakub Jelinek, Uros Bizjak
Hi,
This patch implements the alternate code sequence recommended in
https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
to load external function address via GOT slot with
movq func@GOTPCREL(%rip), %rax
so that linker won't create an PLT entry for extern function
address.
Tested on x86-64. OK for trunk?
H.J.
--
gcc/
PR target/67400
* config/i386/i386-protos.h (ix86_force_load_from_GOT_p): New.
* config/i386/i386.c (ix86_force_load_from_GOT_p): New function.
(ix86_legitimate_address_p): Allow UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_print_operand_address): Support UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_expand_move): Load the external function address via the
GOT slot if ix86_force_load_from_GOT_p returns true.
* config/i386/predicates.md (x86_64_immediate_operand): Return
false if ix86_force_load_from_GOT_p returns true.
gcc/testsuite/
PR target/67400
* gcc.target/i386/pr67400-1.c: New test.
* gcc.target/i386/pr67400-2.c: Likewise.
* gcc.target/i386/pr67400-3.c: Likewise.
* gcc.target/i386/pr67400-4.c: Likewise.
* gcc.target/i386/pr67400-5.c: Likewise.
* gcc.target/i386/pr67400-6.c: Likewise.
---
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.c | 51 +++++++++++++++++++++++++++++++
gcc/config/i386/predicates.md | 4 +++
gcc/testsuite/gcc.target/i386/pr67400-1.c | 13 ++++++++
gcc/testsuite/gcc.target/i386/pr67400-2.c | 14 +++++++++
gcc/testsuite/gcc.target/i386/pr67400-3.c | 16 ++++++++++
gcc/testsuite/gcc.target/i386/pr67400-4.c | 13 ++++++++
gcc/testsuite/gcc.target/i386/pr67400-5.c | 11 +++++++
gcc/testsuite/gcc.target/i386/pr67400-6.c | 13 ++++++++
9 files changed, 136 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-6.c
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 9fd14f6..8130161 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx,
extern bool constant_address_p (rtx);
extern bool legitimate_pic_operand_p (rtx);
extern bool legitimate_pic_address_disp_p (rtx);
+extern bool ix86_force_load_from_GOT_p (rtx);
extern void print_reg (rtx, int, FILE*);
extern void ix86_print_operand (FILE *, rtx, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 56a5b9c..c8c5081 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15182,6 +15182,24 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
return true;
}
+/* True if operand X should be loaded from GOT. */
+
+bool
+ix86_force_load_from_GOT_p (rtx x)
+{
+ /* External function symbol should be loaded via the GOT slot for
+ -fno-plt. */
+ return (!flag_plt
+ && !flag_pic
+ && ix86_cmodel != CM_LARGE
+ && TARGET_64BIT
+ && !TARGET_PECOFF
+ && !TARGET_MACHO
+ && GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (x)
+ && !SYMBOL_REF_LOCAL_P (x));
+}
+
/* Determine if it's legal to put X into the constant pool. This
is not possible for the address of thread-local symbols, which
is checked above. */
@@ -15560,6 +15578,10 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
return false;
case UNSPEC_GOTPCREL:
+ gcc_assert (flag_pic
+ || ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)));
+ goto is_legitimate_pic;
+
case UNSPEC_PCREL:
gcc_assert (flag_pic);
goto is_legitimate_pic;
@@ -18130,6 +18152,12 @@ ix86_print_operand_address_as (FILE *file, rtx addr,
}
else if (flag_pic)
output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC
+ && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
+ || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
+ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
+ output_pic_addr_const (file, XEXP (disp, 0), code);
else
output_addr_const (file, disp);
}
@@ -19448,6 +19476,29 @@ ix86_expand_move (machine_mode mode, rtx operands[])
op1 = convert_to_mode (mode, op1, 1);
}
}
+ }
+ else if (ix86_force_load_from_GOT_p (op1))
+ {
+ /* Load the external function address via the GOT slot to
+ avoid PLT. */
+ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
+ (TARGET_64BIT
+ ? UNSPEC_GOTPCREL
+ : UNSPEC_GOT));
+ op1 = gen_rtx_CONST (Pmode, op1);
+ op1 = gen_const_mem (Pmode, op1);
+ /* This symbol must be referenced via a load from the Global
+ Offset Table. */
+ set_mem_alias_set (op1, ix86_GOT_alias_set ());
+ op1 = convert_to_mode (mode, op1, 1);
+ op1 = force_reg (mode, op1);
+ emit_insn (gen_rtx_SET (op0, op1));
+ /* Generate a CLOBBER so that there will be no REG_EQUAL note
+ on the last insn to prevent cse and fwprop from replacing
+ a GOT load with a constant. */
+ rtx tmp = gen_reg_rtx (Pmode);
+ emit_clobber (tmp);
+ return;
}
else
{
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b3cf2a3..06a0002 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -149,6 +149,10 @@
(define_predicate "x86_64_immediate_operand"
(match_code "const_int,symbol_ref,label_ref,const")
{
+ /* Load the external function address via the GOT slot to avoid PLT. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
if (!TARGET_64BIT)
return immediate_operand (op, mode);
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-1.c b/gcc/testsuite/gcc.target/i386/pr67400-1.c
new file mode 100644
index 0000000..a875b76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-2.c b/gcc/testsuite/gcc.target/i386/pr67400-2.c
new file mode 100644
index 0000000..9f3f4bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+extern void *p;
+
+void
+foo (void)
+{
+ p = &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-3.c b/gcc/testsuite/gcc.target/i386/pr67400-3.c
new file mode 100644
index 0000000..045974e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+static void
+bar (void)
+{
+}
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-4.c b/gcc/testsuite/gcc.target/i386/pr67400-4.c
new file mode 100644
index 0000000..fd373db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void) __attribute__ ((visibility ("hidden")));
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-5.c b/gcc/testsuite/gcc.target/i386/pr67400-5.c
new file mode 100644
index 0000000..9bb98dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void foo (void);
+extern void bar (int, int, int, int, int, int, void *);
+
+void
+x (void)
+{
+ bar (1, 2, 3, 4, 5, 6, foo);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-6.c b/gcc/testsuite/gcc.target/i386/pr67400-6.c
new file mode 100644
index 0000000..b84196a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern int bar (void);
+
+int
+check (void *p)
+{
+ return p != &bar;
+}
+
+/* { dg-final { scan-assembler "cmp\(l|q\)\[ \t\]*.*bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */
--
2.5.5
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-20 17:05 [PATCH] x86-64: Load external function address via GOT slot H.J. Lu
@ 2016-06-20 19:13 ` Uros Bizjak
2016-06-20 19:19 ` H.J. Lu
0 siblings, 1 reply; 10+ messages in thread
From: Uros Bizjak @ 2016-06-20 19:13 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek
On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Hi,
>
> This patch implements the alternate code sequence recommended in
>
> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>
> to load external function address via GOT slot with
>
> movq func@GOTPCREL(%rip), %rax
>
> so that linker won't create an PLT entry for extern function
> address.
>
> Tested on x86-64. OK for trunk?
> + else if (ix86_force_load_from_GOT_p (op1))
> + {
> + /* Load the external function address via the GOT slot to
> + avoid PLT. */
> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
> + (TARGET_64BIT
> + ? UNSPEC_GOTPCREL
> + : UNSPEC_GOT));
> + op1 = gen_rtx_CONST (Pmode, op1);
> + op1 = gen_const_mem (Pmode, op1);
> + /* This symbol must be referenced via a load from the Global
> + Offset Table. */
> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
> + op1 = convert_to_mode (mode, op1, 1);
> + op1 = force_reg (mode, op1);
> + emit_insn (gen_rtx_SET (op0, op1));
> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
> + on the last insn to prevent cse and fwprop from replacing
> + a GOT load with a constant. */
> + rtx tmp = gen_reg_rtx (Pmode);
> + emit_clobber (tmp);
> + return;
Jeff, is this the recommended way to prevent CSE, as far as RTL
infrastructure is concerned? I didn't find any example of this
approach with other targets.
Uros.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-20 19:13 ` Uros Bizjak
@ 2016-06-20 19:19 ` H.J. Lu
2016-06-20 19:27 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2016-06-20 19:19 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches, Jakub Jelinek
On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>> Hi,
>>
>> This patch implements the alternate code sequence recommended in
>>
>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>
>> to load external function address via GOT slot with
>>
>> movq func@GOTPCREL(%rip), %rax
>>
>> so that linker won't create an PLT entry for extern function
>> address.
>>
>> Tested on x86-64. OK for trunk?
>
>> + else if (ix86_force_load_from_GOT_p (op1))
>> + {
>> + /* Load the external function address via the GOT slot to
>> + avoid PLT. */
>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>> + (TARGET_64BIT
>> + ? UNSPEC_GOTPCREL
>> + : UNSPEC_GOT));
>> + op1 = gen_rtx_CONST (Pmode, op1);
>> + op1 = gen_const_mem (Pmode, op1);
>> + /* This symbol must be referenced via a load from the Global
>> + Offset Table. */
>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>> + op1 = convert_to_mode (mode, op1, 1);
>> + op1 = force_reg (mode, op1);
>> + emit_insn (gen_rtx_SET (op0, op1));
>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>> + on the last insn to prevent cse and fwprop from replacing
>> + a GOT load with a constant. */
>> + rtx tmp = gen_reg_rtx (Pmode);
>> + emit_clobber (tmp);
>> + return;
>
> Jeff, is this the recommended way to prevent CSE, as far as RTL
> infrastructure is concerned? I didn't find any example of this
> approach with other targets.
>
FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
as well as other targets:
frv/frv.c: emit_clobber (op0);
frv/frv.c: emit_clobber (op1);
im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
s390/s390.c: emit_clobber (addr);
s390/s390.md: emit_clobber (reg0);
s390/s390.md: emit_clobber (reg1);
s390/s390.md: emit_clobber (reg0);
s390/s390.md: emit_clobber (reg0);
s390/s390.md: emit_clobber (reg1);
--
H.J.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-20 19:19 ` H.J. Lu
@ 2016-06-20 19:27 ` Uros Bizjak
2016-06-20 19:47 ` Richard Sandiford
0 siblings, 1 reply; 10+ messages in thread
From: Uros Bizjak @ 2016-06-20 19:27 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek
On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> Hi,
>>>
>>> This patch implements the alternate code sequence recommended in
>>>
>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>>
>>> to load external function address via GOT slot with
>>>
>>> movq func@GOTPCREL(%rip), %rax
>>>
>>> so that linker won't create an PLT entry for extern function
>>> address.
>>>
>>> Tested on x86-64. OK for trunk?
>>
>>> + else if (ix86_force_load_from_GOT_p (op1))
>>> + {
>>> + /* Load the external function address via the GOT slot to
>>> + avoid PLT. */
>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>>> + (TARGET_64BIT
>>> + ? UNSPEC_GOTPCREL
>>> + : UNSPEC_GOT));
>>> + op1 = gen_rtx_CONST (Pmode, op1);
>>> + op1 = gen_const_mem (Pmode, op1);
>>> + /* This symbol must be referenced via a load from the Global
>>> + Offset Table. */
>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>>> + op1 = convert_to_mode (mode, op1, 1);
>>> + op1 = force_reg (mode, op1);
>>> + emit_insn (gen_rtx_SET (op0, op1));
>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>>> + on the last insn to prevent cse and fwprop from replacing
>>> + a GOT load with a constant. */
>>> + rtx tmp = gen_reg_rtx (Pmode);
>>> + emit_clobber (tmp);
>>> + return;
>>
>> Jeff, is this the recommended way to prevent CSE, as far as RTL
>> infrastructure is concerned? I didn't find any example of this
>> approach with other targets.
>>
>
> FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
> as well as other targets:
>
> frv/frv.c: emit_clobber (op0);
> frv/frv.c: emit_clobber (op1);
> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
> s390/s390.c: emit_clobber (addr);
> s390/s390.md: emit_clobber (reg0);
> s390/s390.md: emit_clobber (reg1);
> s390/s390.md: emit_clobber (reg0);
> s390/s390.md: emit_clobber (reg0);
> s390/s390.md: emit_clobber (reg1);
These usages mark the whole register as being "clobbered"
(=undefined), before only a part of register is written, e.g.:
emit_clobber (int_xmm);
emit_move_insn (gen_lowpart (DImode, int_xmm), input);
They aren't used to prevent unwanted CSE.
Uros.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-20 19:27 ` Uros Bizjak
@ 2016-06-20 19:47 ` Richard Sandiford
2016-06-20 20:34 ` H.J. Lu
2016-06-21 12:40 ` H.J. Lu
0 siblings, 2 replies; 10+ messages in thread
From: Richard Sandiford @ 2016-06-20 19:47 UTC (permalink / raw)
To: Uros Bizjak; +Cc: H.J. Lu, gcc-patches, Jakub Jelinek
Uros Bizjak <ubizjak@gmail.com> writes:
> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>> Hi,
>>>>
>>>> This patch implements the alternate code sequence recommended in
>>>>
>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>>>
>>>> to load external function address via GOT slot with
>>>>
>>>> movq func@GOTPCREL(%rip), %rax
>>>>
>>>> so that linker won't create an PLT entry for extern function
>>>> address.
>>>>
>>>> Tested on x86-64. OK for trunk?
>>>
>>>> + else if (ix86_force_load_from_GOT_p (op1))
>>>> + {
>>>> + /* Load the external function address via the GOT slot to
>>>> + avoid PLT. */
>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>>>> + (TARGET_64BIT
>>>> + ? UNSPEC_GOTPCREL
>>>> + : UNSPEC_GOT));
>>>> + op1 = gen_rtx_CONST (Pmode, op1);
>>>> + op1 = gen_const_mem (Pmode, op1);
>>>> + /* This symbol must be referenced via a load from the Global
>>>> + Offset Table. */
>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>>>> + op1 = convert_to_mode (mode, op1, 1);
>>>> + op1 = force_reg (mode, op1);
>>>> + emit_insn (gen_rtx_SET (op0, op1));
>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>>>> + on the last insn to prevent cse and fwprop from replacing
>>>> + a GOT load with a constant. */
>>>> + rtx tmp = gen_reg_rtx (Pmode);
>>>> + emit_clobber (tmp);
>>>> + return;
>>>
>>> Jeff, is this the recommended way to prevent CSE, as far as RTL
>>> infrastructure is concerned? I didn't find any example of this
>>> approach with other targets.
>>>
>>
>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
>> as well as other targets:
>>
>> frv/frv.c: emit_clobber (op0);
>> frv/frv.c: emit_clobber (op1);
>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
>> s390/s390.c: emit_clobber (addr);
>> s390/s390.md: emit_clobber (reg0);
>> s390/s390.md: emit_clobber (reg1);
>> s390/s390.md: emit_clobber (reg0);
>> s390/s390.md: emit_clobber (reg0);
>> s390/s390.md: emit_clobber (reg1);
>
> These usages mark the whole register as being "clobbered"
> (=undefined), before only a part of register is written, e.g.:
>
> emit_clobber (int_xmm);
> emit_move_insn (gen_lowpart (DImode, int_xmm), input);
>
> They aren't used to prevent unwanted CSE.
Since it's being called in the move expander, I thought the normal
way of preventing the constant being rematerialised would be to reject
it in the move define_insn predicates.
FWIW, I agree that using a clobber for this is going to be fragile.
Thanks,
Richard
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-20 19:47 ` Richard Sandiford
@ 2016-06-20 20:34 ` H.J. Lu
2016-06-21 12:40 ` H.J. Lu
1 sibling, 0 replies; 10+ messages in thread
From: H.J. Lu @ 2016-06-20 20:34 UTC (permalink / raw)
To: Uros Bizjak, gcc-patches, Jakub Jelinek, Richard Sandiford
On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford
<rdsandiford@googlemail.com> wrote:
> Uros Bizjak <ubizjak@gmail.com> writes:
>> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Hi,
>>>>>
>>>>> This patch implements the alternate code sequence recommended in
>>>>>
>>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>>>>
>>>>> to load external function address via GOT slot with
>>>>>
>>>>> movq func@GOTPCREL(%rip), %rax
>>>>>
>>>>> so that linker won't create an PLT entry for extern function
>>>>> address.
>>>>>
>>>>> Tested on x86-64. OK for trunk?
>>>>
>>>>> + else if (ix86_force_load_from_GOT_p (op1))
>>>>> + {
>>>>> + /* Load the external function address via the GOT slot to
>>>>> + avoid PLT. */
>>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>>>>> + (TARGET_64BIT
>>>>> + ? UNSPEC_GOTPCREL
>>>>> + : UNSPEC_GOT));
>>>>> + op1 = gen_rtx_CONST (Pmode, op1);
>>>>> + op1 = gen_const_mem (Pmode, op1);
>>>>> + /* This symbol must be referenced via a load from the Global
>>>>> + Offset Table. */
>>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>>>>> + op1 = convert_to_mode (mode, op1, 1);
>>>>> + op1 = force_reg (mode, op1);
>>>>> + emit_insn (gen_rtx_SET (op0, op1));
>>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>>>>> + on the last insn to prevent cse and fwprop from replacing
>>>>> + a GOT load with a constant. */
>>>>> + rtx tmp = gen_reg_rtx (Pmode);
>>>>> + emit_clobber (tmp);
>>>>> + return;
>>>>
>>>> Jeff, is this the recommended way to prevent CSE, as far as RTL
>>>> infrastructure is concerned? I didn't find any example of this
>>>> approach with other targets.
>>>>
>>>
>>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
>>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
>>> as well as other targets:
>>>
>>> frv/frv.c: emit_clobber (op0);
>>> frv/frv.c: emit_clobber (op1);
>>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
>>> s390/s390.c: emit_clobber (addr);
>>> s390/s390.md: emit_clobber (reg0);
>>> s390/s390.md: emit_clobber (reg1);
>>> s390/s390.md: emit_clobber (reg0);
>>> s390/s390.md: emit_clobber (reg0);
>>> s390/s390.md: emit_clobber (reg1);
>>
>> These usages mark the whole register as being "clobbered"
>> (=undefined), before only a part of register is written, e.g.:
>>
>> emit_clobber (int_xmm);
>> emit_move_insn (gen_lowpart (DImode, int_xmm), input);
>>
>> They aren't used to prevent unwanted CSE.
>
> Since it's being called in the move expander, I thought the normal
> way of preventing the constant being rematerialised would be to reject
> it in the move define_insn predicates.
>
> FWIW, I agree that using a clobber for this is going to be fragile.
>
Here is the alternative from clobber.
--
H.J.
--
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a68983c..79999df 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2347,7 +2347,7 @@
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
"=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm")
- (match_operand:SI 1 "general_operand"
+ (match_operand:SI 1 "ix86_general_operand"
"g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
@@ -2564,7 +2564,7 @@
(define_insn "*movqi_internal"
[(set (match_operand:QI 0 "nonimmediate_operand"
"=q,q ,q ,r,r ,?r,m ,k,k,r ,m,k")
- (match_operand:QI 1 "general_operand"
+ (match_operand:QI 1 "ix86_general_operand"
"q ,qn,qm,q,rn,qm,qn,r ,k,k,k,m"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 06a0002..a471deb 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -367,6 +367,12 @@
}
})
+;; Return true if OP is general operand representable on ix86
+(define_predicate "ix86_general_operand"
+ (and (match_operand 0 "general_operand")
+ (ior (not (match_code "symbol_ref"))
+ (match_test "!ix86_force_load_from_GOT_p (op)"))))
+
;; Return true if size of VALUE can be stored in a sign
;; extended immediate field.
(define_predicate "x86_64_immediate_size_operand"
@@ -1036,6 +1042,9 @@
struct ix86_address parts;
int ok;
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
if (!CONST_INT_P (op)
&& mode != VOIDmode
&& GET_MODE (op) != mode)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-20 19:47 ` Richard Sandiford
2016-06-20 20:34 ` H.J. Lu
@ 2016-06-21 12:40 ` H.J. Lu
2016-06-21 18:22 ` Uros Bizjak
1 sibling, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2016-06-21 12:40 UTC (permalink / raw)
To: Uros Bizjak, gcc-patches, Jakub Jelinek, Richard Sandiford
[-- Attachment #1: Type: text/plain, Size: 3220 bytes --]
On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford
<rdsandiford@googlemail.com> wrote:
> Uros Bizjak <ubizjak@gmail.com> writes:
>> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Hi,
>>>>>
>>>>> This patch implements the alternate code sequence recommended in
>>>>>
>>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>>>>
>>>>> to load external function address via GOT slot with
>>>>>
>>>>> movq func@GOTPCREL(%rip), %rax
>>>>>
>>>>> so that linker won't create an PLT entry for extern function
>>>>> address.
>>>>>
>>>>> Tested on x86-64. OK for trunk?
>>>>
>>>>> + else if (ix86_force_load_from_GOT_p (op1))
>>>>> + {
>>>>> + /* Load the external function address via the GOT slot to
>>>>> + avoid PLT. */
>>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>>>>> + (TARGET_64BIT
>>>>> + ? UNSPEC_GOTPCREL
>>>>> + : UNSPEC_GOT));
>>>>> + op1 = gen_rtx_CONST (Pmode, op1);
>>>>> + op1 = gen_const_mem (Pmode, op1);
>>>>> + /* This symbol must be referenced via a load from the Global
>>>>> + Offset Table. */
>>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>>>>> + op1 = convert_to_mode (mode, op1, 1);
>>>>> + op1 = force_reg (mode, op1);
>>>>> + emit_insn (gen_rtx_SET (op0, op1));
>>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>>>>> + on the last insn to prevent cse and fwprop from replacing
>>>>> + a GOT load with a constant. */
>>>>> + rtx tmp = gen_reg_rtx (Pmode);
>>>>> + emit_clobber (tmp);
>>>>> + return;
>>>>
>>>> Jeff, is this the recommended way to prevent CSE, as far as RTL
>>>> infrastructure is concerned? I didn't find any example of this
>>>> approach with other targets.
>>>>
>>>
>>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
>>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
>>> as well as other targets:
>>>
>>> frv/frv.c: emit_clobber (op0);
>>> frv/frv.c: emit_clobber (op1);
>>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
>>> s390/s390.c: emit_clobber (addr);
>>> s390/s390.md: emit_clobber (reg0);
>>> s390/s390.md: emit_clobber (reg1);
>>> s390/s390.md: emit_clobber (reg0);
>>> s390/s390.md: emit_clobber (reg0);
>>> s390/s390.md: emit_clobber (reg1);
>>
>> These usages mark the whole register as being "clobbered"
>> (=undefined), before only a part of register is written, e.g.:
>>
>> emit_clobber (int_xmm);
>> emit_move_insn (gen_lowpart (DImode, int_xmm), input);
>>
>> They aren't used to prevent unwanted CSE.
>
> Since it's being called in the move expander, I thought the normal
> way of preventing the constant being rematerialised would be to reject
> it in the move define_insn predicates.
>
> FWIW, I agree that using a clobber for this is going to be fragile.
>
Here is the patch without clobber. Tested on x86-64. OK for
trunk?
Thanks.
--
H.J.
[-- Attachment #2: 0001-x86-64-Load-external-function-address-via-GOT-slot.patch --]
[-- Type: text/x-patch, Size: 11096 bytes --]
From 55ab339cc4173565095b66c0fc2ffa4267b55606 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 28 Aug 2015 19:14:49 -0700
Subject: [PATCH] x86-64: Load external function address via GOT slot
This patch implements the alternate code sequence recommended in
https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
to load external function address via GOT slot with
movq func@GOTPCREL(%rip), %rax
so that linker won't create an PLT entry for extern function address.
gcc/
PR target/67400
* config/i386/i386-protos.h (ix86_force_load_from_GOT_p): New.
* config/i386/i386.c (ix86_force_load_from_GOT_p): New function.
(ix86_legitimate_address_p): Allow UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_print_operand_address): Support UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_expand_move): Load the external function address via the
GOT slot if ix86_force_load_from_GOT_p returns true.
* config/i386/i386.md (*movsi_internal): Replace general_operand
with ix86_general_operand.
(*movqi_internal): Likewise.
* config/i386/predicates.md (x86_64_immediate_operand): Return
false if ix86_force_load_from_GOT_p returns true.
(address_no_seg_operand): Likewise.
(ix86_general_operand): New predicate.
gcc/testsuite/
PR target/67400
* gcc.target/i386/pr67400-1.c: New test.
* gcc.target/i386/pr67400-2.c: Likewise.
* gcc.target/i386/pr67400-3.c: Likewise.
* gcc.target/i386/pr67400-4.c: Likewise.
* gcc.target/i386/pr67400-5.c: Likewise.
* gcc.target/i386/pr67400-6.c: Likewise.
---
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.c | 44 +++++++++++++++++++++++++++++++
gcc/config/i386/i386.md | 4 +--
gcc/config/i386/predicates.md | 15 +++++++++++
gcc/testsuite/gcc.target/i386/pr67400-1.c | 13 +++++++++
gcc/testsuite/gcc.target/i386/pr67400-2.c | 14 ++++++++++
gcc/testsuite/gcc.target/i386/pr67400-3.c | 16 +++++++++++
gcc/testsuite/gcc.target/i386/pr67400-4.c | 13 +++++++++
gcc/testsuite/gcc.target/i386/pr67400-5.c | 11 ++++++++
gcc/testsuite/gcc.target/i386/pr67400-6.c | 13 +++++++++
10 files changed, 142 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-6.c
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 9fd14f6..8130161 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx,
extern bool constant_address_p (rtx);
extern bool legitimate_pic_operand_p (rtx);
extern bool legitimate_pic_address_disp_p (rtx);
+extern bool ix86_force_load_from_GOT_p (rtx);
extern void print_reg (rtx, int, FILE*);
extern void ix86_print_operand (FILE *, rtx, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 56a5b9c..6912e8c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15182,6 +15182,24 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
return true;
}
+/* True if operand X should be loaded from GOT. */
+
+bool
+ix86_force_load_from_GOT_p (rtx x)
+{
+ /* External function symbol should be loaded via the GOT slot for
+ -fno-plt. */
+ return (!flag_plt
+ && !flag_pic
+ && ix86_cmodel != CM_LARGE
+ && TARGET_64BIT
+ && !TARGET_PECOFF
+ && !TARGET_MACHO
+ && GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (x)
+ && !SYMBOL_REF_LOCAL_P (x));
+}
+
/* Determine if it's legal to put X into the constant pool. This
is not possible for the address of thread-local symbols, which
is checked above. */
@@ -15560,6 +15578,10 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
return false;
case UNSPEC_GOTPCREL:
+ gcc_assert (flag_pic
+ || ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)));
+ goto is_legitimate_pic;
+
case UNSPEC_PCREL:
gcc_assert (flag_pic);
goto is_legitimate_pic;
@@ -18130,6 +18152,12 @@ ix86_print_operand_address_as (FILE *file, rtx addr,
}
else if (flag_pic)
output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC
+ && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
+ || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
+ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
+ output_pic_addr_const (file, XEXP (disp, 0), code);
else
output_addr_const (file, disp);
}
@@ -19448,6 +19476,22 @@ ix86_expand_move (machine_mode mode, rtx operands[])
op1 = convert_to_mode (mode, op1, 1);
}
}
+ }
+ else if (ix86_force_load_from_GOT_p (op1))
+ {
+ /* Load the external function address via the GOT slot to
+ avoid PLT. */
+ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
+ (TARGET_64BIT
+ ? UNSPEC_GOTPCREL
+ : UNSPEC_GOT));
+ op1 = gen_rtx_CONST (Pmode, op1);
+ op1 = gen_const_mem (Pmode, op1);
+ /* This symbol must be referenced via a load from the Global
+ Offset Table. */
+ set_mem_alias_set (op1, ix86_GOT_alias_set ());
+ op1 = convert_to_mode (mode, op1, 1);
+ op1 = force_reg (mode, op1);
}
else
{
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 85dda3f..8f3227f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2347,7 +2347,7 @@
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
"=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm")
- (match_operand:SI 1 "general_operand"
+ (match_operand:SI 1 "ix86_general_operand"
"g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
@@ -2564,7 +2564,7 @@
(define_insn "*movqi_internal"
[(set (match_operand:QI 0 "nonimmediate_operand"
"=q,q ,q ,r,r ,?r,m ,k,k,r ,m,k")
- (match_operand:QI 1 "general_operand"
+ (match_operand:QI 1 "ix86_general_operand"
"q ,qn,qm,q,rn,qm,qn,r ,k,k,k,m"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b3cf2a3..6c74e7e 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -149,6 +149,10 @@
(define_predicate "x86_64_immediate_operand"
(match_code "const_int,symbol_ref,label_ref,const")
{
+ /* Load the external function address via the GOT slot to avoid PLT. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
if (!TARGET_64BIT)
return immediate_operand (op, mode);
@@ -363,6 +367,13 @@
}
})
+;; Return true if OP is general operand, excluding the external function
+;; symbol if it should be loaded via the GOT slot to avoid PLT.
+(define_predicate "ix86_general_operand"
+ (and (match_operand 0 "general_operand")
+ (ior (not (match_code "symbol_ref"))
+ (match_test "!ix86_force_load_from_GOT_p (op)"))))
+
;; Return true if size of VALUE can be stored in a sign
;; extended immediate field.
(define_predicate "x86_64_immediate_size_operand"
@@ -1032,6 +1043,10 @@
struct ix86_address parts;
int ok;
+ /* Load the external function address via the GOT slot to avoid PLT. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
if (!CONST_INT_P (op)
&& mode != VOIDmode
&& GET_MODE (op) != mode)
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-1.c b/gcc/testsuite/gcc.target/i386/pr67400-1.c
new file mode 100644
index 0000000..4dd5e35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\(mov|lea\)\(l|q\)\[ \t\]*\(\\\$|\)bar," { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-2.c b/gcc/testsuite/gcc.target/i386/pr67400-2.c
new file mode 100644
index 0000000..9f3f4bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+extern void *p;
+
+void
+foo (void)
+{
+ p = &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-3.c b/gcc/testsuite/gcc.target/i386/pr67400-3.c
new file mode 100644
index 0000000..045974e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+static void
+bar (void)
+{
+}
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-4.c b/gcc/testsuite/gcc.target/i386/pr67400-4.c
new file mode 100644
index 0000000..fd373db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void) __attribute__ ((visibility ("hidden")));
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-5.c b/gcc/testsuite/gcc.target/i386/pr67400-5.c
new file mode 100644
index 0000000..9bb98dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void foo (void);
+extern void bar (int, int, int, int, int, int, void *);
+
+void
+x (void)
+{
+ bar (1, 2, 3, 4, 5, 6, foo);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr67400-6.c b/gcc/testsuite/gcc.target/i386/pr67400-6.c
new file mode 100644
index 0000000..b84196a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67400-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern int bar (void);
+
+int
+check (void *p)
+{
+ return p != &bar;
+}
+
+/* { dg-final { scan-assembler "cmp\(l|q\)\[ \t\]*.*bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */
--
2.5.5
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-21 12:40 ` H.J. Lu
@ 2016-06-21 18:22 ` Uros Bizjak
2016-06-21 19:51 ` H.J. Lu
0 siblings, 1 reply; 10+ messages in thread
From: Uros Bizjak @ 2016-06-21 18:22 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek, Richard Sandiford
[-- Attachment #1: Type: text/plain, Size: 4052 bytes --]
On Tue, Jun 21, 2016 at 2:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford
> <rdsandiford@googlemail.com> wrote:
>> Uros Bizjak <ubizjak@gmail.com> writes:
>>> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> This patch implements the alternate code sequence recommended in
>>>>>>
>>>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>>>>>
>>>>>> to load external function address via GOT slot with
>>>>>>
>>>>>> movq func@GOTPCREL(%rip), %rax
>>>>>>
>>>>>> so that linker won't create an PLT entry for extern function
>>>>>> address.
>>>>>>
>>>>>> Tested on x86-64. OK for trunk?
>>>>>
>>>>>> + else if (ix86_force_load_from_GOT_p (op1))
>>>>>> + {
>>>>>> + /* Load the external function address via the GOT slot to
>>>>>> + avoid PLT. */
>>>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>>>>>> + (TARGET_64BIT
>>>>>> + ? UNSPEC_GOTPCREL
>>>>>> + : UNSPEC_GOT));
>>>>>> + op1 = gen_rtx_CONST (Pmode, op1);
>>>>>> + op1 = gen_const_mem (Pmode, op1);
>>>>>> + /* This symbol must be referenced via a load from the Global
>>>>>> + Offset Table. */
>>>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>>>>>> + op1 = convert_to_mode (mode, op1, 1);
>>>>>> + op1 = force_reg (mode, op1);
>>>>>> + emit_insn (gen_rtx_SET (op0, op1));
>>>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>>>>>> + on the last insn to prevent cse and fwprop from replacing
>>>>>> + a GOT load with a constant. */
>>>>>> + rtx tmp = gen_reg_rtx (Pmode);
>>>>>> + emit_clobber (tmp);
>>>>>> + return;
>>>>>
>>>>> Jeff, is this the recommended way to prevent CSE, as far as RTL
>>>>> infrastructure is concerned? I didn't find any example of this
>>>>> approach with other targets.
>>>>>
>>>>
>>>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
>>>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
>>>> as well as other targets:
>>>>
>>>> frv/frv.c: emit_clobber (op0);
>>>> frv/frv.c: emit_clobber (op1);
>>>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
>>>> s390/s390.c: emit_clobber (addr);
>>>> s390/s390.md: emit_clobber (reg0);
>>>> s390/s390.md: emit_clobber (reg1);
>>>> s390/s390.md: emit_clobber (reg0);
>>>> s390/s390.md: emit_clobber (reg0);
>>>> s390/s390.md: emit_clobber (reg1);
>>>
>>> These usages mark the whole register as being "clobbered"
>>> (=undefined), before only a part of register is written, e.g.:
>>>
>>> emit_clobber (int_xmm);
>>> emit_move_insn (gen_lowpart (DImode, int_xmm), input);
>>>
>>> They aren't used to prevent unwanted CSE.
>>
>> Since it's being called in the move expander, I thought the normal
>> way of preventing the constant being rematerialised would be to reject
>> it in the move define_insn predicates.
>>
>> FWIW, I agree that using a clobber for this is going to be fragile.
>>
>
> Here is the patch without clobber. Tested on x86-64. OK for
> trunk?
No, your patch has multiple problems:
1. It won't work for e.g. &bar+1, since you have to legitimize the
symbol in two places of ix86_expand_move. Also, why use TARGET_64BIT
in
+ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
+ (TARGET_64BIT
+ ? UNSPEC_GOTPCREL
+ : UNSPEC_GOT));
when ix86_force_load_from_GOT_p rejects non-64bit targets?
2. New check should be added to ix86_legitimate_constant_p, not to
predicates of move insn patterns. Unfortunately, we still have to
change x86_64_immediate_operand in two places.
I have attached my version of the patch. It handles all your
testcases, plus &foo+1 case. Bootstrap is still running.
Does the patch work for you?
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5549 bytes --]
Index: i386-protos.h
===================================================================
--- i386-protos.h (revision 237653)
+++ i386-protos.h (working copy)
@@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, r
extern bool constant_address_p (rtx);
extern bool legitimate_pic_operand_p (rtx);
extern bool legitimate_pic_address_disp_p (rtx);
+extern bool ix86_force_load_from_GOT_p (rtx);
extern void print_reg (rtx, int, FILE*);
extern void ix86_print_operand (FILE *, rtx, int);
Index: i386.c
===================================================================
--- i386.c (revision 237653)
+++ i386.c (working copy)
@@ -15120,6 +15120,19 @@ darwin_local_data_pic (rtx disp)
&& XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
}
+/* True if operand X should be loaded from GOT. */
+
+bool
+ix86_force_load_from_GOT_p (rtx x)
+{
+ return (TARGET_64BIT && !TARGET_PECOFF && !TARGET_MACHO
+ && !flag_plt && !flag_pic
+ && ix86_cmodel != CM_LARGE
+ && GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (x)
+ && !SYMBOL_REF_LOCAL_P (x));
+}
+
/* Determine if a given RTX is a valid constant. We already know this
satisfies CONSTANT_P. */
@@ -15188,6 +15201,12 @@ ix86_legitimate_constant_p (machine_mode mode, rtx
if (MACHO_DYNAMIC_NO_PIC_P)
return machopic_symbol_defined_p (x);
#endif
+
+ /* External function address should be loaded
+ via the GOT slot to avoid PLT. */
+ if (ix86_force_load_from_GOT_p (x))
+ return false;
+
break;
CASE_CONST_SCALAR_INT:
@@ -15596,6 +15615,9 @@ ix86_legitimate_address_p (machine_mode, rtx addr,
return false;
case UNSPEC_GOTPCREL:
+ if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
+ goto is_legitimate_pic;
+ /* FALLTHRU */
case UNSPEC_PCREL:
gcc_assert (flag_pic);
goto is_legitimate_pic;
@@ -18164,6 +18186,12 @@ ix86_print_operand_address_as (FILE *file, rtx add
fputs ("ds:", file);
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
}
+ /* Load the external function address via the GOT slot to avoid PLT. */
+ else if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC
+ && XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
+ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
+ output_pic_addr_const (file, disp, 0);
else if (flag_pic)
output_pic_addr_const (file, disp, 0);
else
@@ -19406,6 +19434,15 @@ ix86_expand_move (machine_mode mode, rtx operands[
return;
op1 = convert_to_mode (mode, op1, 1);
}
+ else if (ix86_force_load_from_GOT_p (op1))
+ {
+ /* Load the external function address via GOT slot to avoid PLT. */
+ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
+ UNSPEC_GOTPCREL);
+ op1 = gen_rtx_CONST (Pmode, op1);
+ op1 = gen_const_mem (Pmode, op1);
+ set_mem_alias_set (op1, ix86_GOT_alias_set ());
+ }
else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
op1 = tmp;
}
@@ -19420,6 +19457,15 @@ ix86_expand_move (machine_mode mode, rtx operands[
model = SYMBOL_REF_TLS_MODEL (symbol);
if (model)
tmp = legitimize_tls_address (symbol, model, true);
+ else if (ix86_force_load_from_GOT_p (symbol))
+ {
+ /* Load the external function address via GOT slot to avoid PLT. */
+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol),
+ UNSPEC_GOTPCREL);
+ tmp = gen_rtx_CONST (Pmode, tmp);
+ tmp = gen_const_mem (Pmode, tmp);
+ set_mem_alias_set (tmp, ix86_GOT_alias_set ());
+ }
else
tmp = legitimize_pe_coff_symbol (symbol, true);
Index: predicates.md
===================================================================
--- predicates.md (revision 237653)
+++ predicates.md (working copy)
@@ -160,13 +160,18 @@
return trunc_int_for_mode (val, SImode) == val;
}
case SYMBOL_REF:
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op))
+ return false;
+
+ /* Load the external function address via the GOT slot. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
/* For certain code models, the symbolic references are known to fit.
in CM_SMALL_PIC model we know it fits if it is local to the shared
library. Don't count TLS SYMBOL_REFs here, since they should fit
only if inside of UNSPEC handled below. */
- /* TLS symbols are not constant. */
- if (SYMBOL_REF_TLS_MODEL (op))
- return false;
return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
|| (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op)));
@@ -207,6 +212,11 @@
/* TLS symbols are not constant. */
if (SYMBOL_REF_TLS_MODEL (op1))
return false;
+
+ /* Load the external function address via the GOT slot. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
/* For CM_SMALL assume that latest object is 16MB before
end of 31bits boundary. We may also accept pretty
large negative constants knowing that all objects are
@@ -273,10 +283,11 @@
return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff);
case SYMBOL_REF:
- /* For certain code models, the symbolic references are known to fit. */
/* TLS symbols are not constant. */
if (SYMBOL_REF_TLS_MODEL (op))
return false;
+
+ /* For certain code models, the symbolic references are known to fit. */
return (ix86_cmodel == CM_SMALL
|| (ix86_cmodel == CM_MEDIUM
&& !SYMBOL_REF_FAR_ADDR_P (op)));
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-21 18:22 ` Uros Bizjak
@ 2016-06-21 19:51 ` H.J. Lu
2016-06-22 22:12 ` Uros Bizjak
0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2016-06-21 19:51 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches, Jakub Jelinek, Richard Sandiford
On Tue, Jun 21, 2016 at 11:22 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Tue, Jun 21, 2016 at 2:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford
>> <rdsandiford@googlemail.com> wrote:
>>> Uros Bizjak <ubizjak@gmail.com> writes:
>>>> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> This patch implements the alternate code sequence recommended in
>>>>>>>
>>>>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI
>>>>>>>
>>>>>>> to load external function address via GOT slot with
>>>>>>>
>>>>>>> movq func@GOTPCREL(%rip), %rax
>>>>>>>
>>>>>>> so that linker won't create an PLT entry for extern function
>>>>>>> address.
>>>>>>>
>>>>>>> Tested on x86-64. OK for trunk?
>>>>>>
>>>>>>> + else if (ix86_force_load_from_GOT_p (op1))
>>>>>>> + {
>>>>>>> + /* Load the external function address via the GOT slot to
>>>>>>> + avoid PLT. */
>>>>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
>>>>>>> + (TARGET_64BIT
>>>>>>> + ? UNSPEC_GOTPCREL
>>>>>>> + : UNSPEC_GOT));
>>>>>>> + op1 = gen_rtx_CONST (Pmode, op1);
>>>>>>> + op1 = gen_const_mem (Pmode, op1);
>>>>>>> + /* This symbol must be referenced via a load from the Global
>>>>>>> + Offset Table. */
>>>>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ());
>>>>>>> + op1 = convert_to_mode (mode, op1, 1);
>>>>>>> + op1 = force_reg (mode, op1);
>>>>>>> + emit_insn (gen_rtx_SET (op0, op1));
>>>>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note
>>>>>>> + on the last insn to prevent cse and fwprop from replacing
>>>>>>> + a GOT load with a constant. */
>>>>>>> + rtx tmp = gen_reg_rtx (Pmode);
>>>>>>> + emit_clobber (tmp);
>>>>>>> + return;
>>>>>>
>>>>>> Jeff, is this the recommended way to prevent CSE, as far as RTL
>>>>>> infrastructure is concerned? I didn't find any example of this
>>>>>> approach with other targets.
>>>>>>
>>>>>
>>>>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign,
>>>>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general
>>>>> as well as other targets:
>>>>>
>>>>> frv/frv.c: emit_clobber (op0);
>>>>> frv/frv.c: emit_clobber (op1);
>>>>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
>>>>> s390/s390.c: emit_clobber (addr);
>>>>> s390/s390.md: emit_clobber (reg0);
>>>>> s390/s390.md: emit_clobber (reg1);
>>>>> s390/s390.md: emit_clobber (reg0);
>>>>> s390/s390.md: emit_clobber (reg0);
>>>>> s390/s390.md: emit_clobber (reg1);
>>>>
>>>> These usages mark the whole register as being "clobbered"
>>>> (=undefined), before only a part of register is written, e.g.:
>>>>
>>>> emit_clobber (int_xmm);
>>>> emit_move_insn (gen_lowpart (DImode, int_xmm), input);
>>>>
>>>> They aren't used to prevent unwanted CSE.
>>>
>>> Since it's being called in the move expander, I thought the normal
>>> way of preventing the constant being rematerialised would be to reject
>>> it in the move define_insn predicates.
>>>
>>> FWIW, I agree that using a clobber for this is going to be fragile.
>>>
>>
>> Here is the patch without clobber. Tested on x86-64. OK for
>> trunk?
>
> No, your patch has multiple problems:
>
> 1. It won't work for e.g. &bar+1, since you have to legitimize the
> symbol in two places of ix86_expand_move. Also, why use TARGET_64BIT
> in
>
> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
> + (TARGET_64BIT
> + ? UNSPEC_GOTPCREL
> + : UNSPEC_GOT));
>
> when ix86_force_load_from_GOT_p rejects non-64bit targets?
>
> 2. New check should be added to ix86_legitimate_constant_p, not to
> predicates of move insn patterns. Unfortunately, we still have to
> change x86_64_immediate_operand in two places.
>
> I have attached my version of the patch. It handles all your
> testcases, plus &foo+1 case. Bootstrap is still running.
>
> Does the patch work for you?
It works.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot
2016-06-21 19:51 ` H.J. Lu
@ 2016-06-22 22:12 ` Uros Bizjak
0 siblings, 0 replies; 10+ messages in thread
From: Uros Bizjak @ 2016-06-22 22:12 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek, Richard Sandiford
[-- Attachment #1: Type: text/plain, Size: 1830 bytes --]
On Tue, Jun 21, 2016 at 9:51 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> I have attached my version of the patch. It handles all your
>> testcases, plus &foo+1 case. Bootstrap is still running.
>>
>> Does the patch work for you?
>
> It works.
Attached version of the patch was committed to mainline SVN. Regarding
the testcases - I have made them to compile on non-ia32 only ATM.
Let's change them when ia32 support is added (it is a trivial change).
2016-06-23 Uros Bizjak <ubizjak@gmail.com>
H.J. Lu <hongjiu.lu@intel.com>
PR target/67400
* config/i386/i386-protos.h (ix86_force_load_from_GOT_p): New.
* config/i386/i386.c (ix86_force_load_from_GOT_p): New function.
(ix86_legitimate_constant_p): Do not allow UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_legitimate_address_p): Allow UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_print_operand_address_as): Support UNSPEC_GOTPCREL if
ix86_force_load_from_GOT_p returns true.
(ix86_expand_move): Load the external function address via the
GOT slot if ix86_force_load_from_GOT_p returns true.
* config/i386/predicates.md (x86_64_immediate_operand): Return
false for SYMBOL_REFs where ix86_force_load_from_GOT_p returns true.
(x86_64_zext_immediate_operand): Ditto.
testsuite/ChangeLog:
2016-06-23 H.J. Lu <hongjiu.lu@intel.com>
PR target/67400
* gcc.target/i386/pr67400-1.c: New test.
* gcc.target/i386/pr67400-2.c: Likewise.
* gcc.target/i386/pr67400-3.c: Likewise.
* gcc.target/i386/pr67400-4.c: Likewise.
* gcc.target/i386/pr67400-5.c: Likewise.
* gcc.target/i386/pr67400-6.c: Likewise.
* gcc.target/i386/pr67400-7.c: Likewise.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainline SVN.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 9639 bytes --]
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h (revision 237716)
+++ config/i386/i386-protos.h (working copy)
@@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, r
extern bool constant_address_p (rtx);
extern bool legitimate_pic_operand_p (rtx);
extern bool legitimate_pic_address_disp_p (rtx);
+extern bool ix86_force_load_from_GOT_p (rtx);
extern void print_reg (rtx, int, FILE*);
extern void ix86_print_operand (FILE *, rtx, int);
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 237716)
+++ config/i386/i386.c (working copy)
@@ -15120,6 +15120,19 @@ darwin_local_data_pic (rtx disp)
&& XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
}
+/* True if operand X should be loaded from GOT. */
+
+bool
+ix86_force_load_from_GOT_p (rtx x)
+{
+ return (TARGET_64BIT && !TARGET_PECOFF && !TARGET_MACHO
+ && !flag_plt && !flag_pic
+ && ix86_cmodel != CM_LARGE
+ && GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (x)
+ && !SYMBOL_REF_LOCAL_P (x));
+}
+
/* Determine if a given RTX is a valid constant. We already know this
satisfies CONSTANT_P. */
@@ -15188,6 +15201,12 @@ ix86_legitimate_constant_p (machine_mode mode, rtx
if (MACHO_DYNAMIC_NO_PIC_P)
return machopic_symbol_defined_p (x);
#endif
+
+ /* External function address should be loaded
+ via the GOT slot to avoid PLT. */
+ if (ix86_force_load_from_GOT_p (x))
+ return false;
+
break;
CASE_CONST_SCALAR_INT:
@@ -15596,6 +15615,9 @@ ix86_legitimate_address_p (machine_mode, rtx addr,
return false;
case UNSPEC_GOTPCREL:
+ if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
+ goto is_legitimate_pic;
+ /* FALLTHRU */
case UNSPEC_PCREL:
gcc_assert (flag_pic);
goto is_legitimate_pic;
@@ -18169,6 +18191,12 @@ ix86_print_operand_address_as (FILE *file, rtx add
fputs ("ds:", file);
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
}
+ /* Load the external function address via the GOT slot to avoid PLT. */
+ else if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC
+ && XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
+ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
+ output_pic_addr_const (file, disp, 0);
else if (flag_pic)
output_pic_addr_const (file, disp, 0);
else
@@ -19417,6 +19445,15 @@ ix86_expand_move (machine_mode mode, rtx operands[
if (model)
op1 = legitimize_tls_address (op1, model, true);
+ else if (ix86_force_load_from_GOT_p (op1))
+ {
+ /* Load the external function address via GOT slot to avoid PLT. */
+ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
+ UNSPEC_GOTPCREL);
+ op1 = gen_rtx_CONST (Pmode, op1);
+ op1 = gen_const_mem (Pmode, op1);
+ set_mem_alias_set (op1, ix86_GOT_alias_set ());
+ }
else
{
tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md (revision 237716)
+++ config/i386/predicates.md (working copy)
@@ -160,13 +160,18 @@
return trunc_int_for_mode (val, SImode) == val;
}
case SYMBOL_REF:
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op))
+ return false;
+
+ /* Load the external function address via the GOT slot. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
/* For certain code models, the symbolic references are known to fit.
in CM_SMALL_PIC model we know it fits if it is local to the shared
library. Don't count TLS SYMBOL_REFs here, since they should fit
only if inside of UNSPEC handled below. */
- /* TLS symbols are not constant. */
- if (SYMBOL_REF_TLS_MODEL (op))
- return false;
return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
|| (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op)));
@@ -207,6 +212,11 @@
/* TLS symbols are not constant. */
if (SYMBOL_REF_TLS_MODEL (op1))
return false;
+
+ /* Load the external function address via the GOT slot. */
+ if (ix86_force_load_from_GOT_p (op1))
+ return false;
+
/* For CM_SMALL assume that latest object is 16MB before
end of 31bits boundary. We may also accept pretty
large negative constants knowing that all objects are
@@ -273,10 +283,15 @@
return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff);
case SYMBOL_REF:
- /* For certain code models, the symbolic references are known to fit. */
/* TLS symbols are not constant. */
if (SYMBOL_REF_TLS_MODEL (op))
return false;
+
+ /* Load the external function address via the GOT slot. */
+ if (ix86_force_load_from_GOT_p (op))
+ return false;
+
+ /* For certain code models, the symbolic references are known to fit. */
return (ix86_cmodel == CM_SMALL
|| (ix86_cmodel == CM_MEDIUM
&& !SYMBOL_REF_FAR_ADDR_P (op)));
@@ -301,6 +316,11 @@
/* TLS symbols are not constant. */
if (SYMBOL_REF_TLS_MODEL (op1))
return false;
+
+ /* Load the external function address via the GOT slot. */
+ if (ix86_force_load_from_GOT_p (op1))
+ return false;
+
/* For small code model we may accept pretty large positive
offsets, since one bit is available for free. Negative
offsets are limited by the size of NULL pointer area
Index: testsuite/gcc.target/i386/pr67400-1.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-1.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-1.c (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */
+/* { dg-final { scan-assembler-not "\(mov|lea\)\(l|q\)\[ \t\]*\(\\\$|\)bar," } } */
Index: testsuite/gcc.target/i386/pr67400-2.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-2.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-2.c (working copy)
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+extern void *p;
+
+void
+foo (void)
+{
+ p = &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," } } */
Index: testsuite/gcc.target/i386/pr67400-3.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-3.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-3.c (working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+static void
+bar (void)
+{
+}
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */
Index: testsuite/gcc.target/i386/pr67400-4.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-4.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-4.c (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void) __attribute__ ((visibility ("hidden")));
+
+void *
+foo (void)
+{
+ return &bar;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */
Index: testsuite/gcc.target/i386/pr67400-5.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-5.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-5.c (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void foo (void);
+extern void bar (int, int, int, int, int, int, void *);
+
+void
+x (void)
+{
+ bar (1, 2, 3, 4, 5, 6, foo);
+}
Index: testsuite/gcc.target/i386/pr67400-6.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-6.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-6.c (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern int bar (void);
+
+int
+check (void *p)
+{
+ return p != &bar;
+}
+
+/* { dg-final { scan-assembler "cmp\(l|q\)\[ \t\]*.*bar@GOTPCREL" } } */
+/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," } } */
Index: testsuite/gcc.target/i386/pr67400-7.c
===================================================================
--- testsuite/gcc.target/i386/pr67400-7.c (nonexistent)
+++ testsuite/gcc.target/i386/pr67400-7.c (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fno-pic -fno-plt" } */
+
+extern void bar (void);
+
+void *
+foo (void)
+{
+ return &bar+1;
+}
+
+/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */
+/* { dg-final { scan-assembler-not "\(mov|lea\)\(l|q\)\[ \t\]*\(\\\$|\)bar," } } */
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2016-06-22 22:12 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-20 17:05 [PATCH] x86-64: Load external function address via GOT slot H.J. Lu
2016-06-20 19:13 ` Uros Bizjak
2016-06-20 19:19 ` H.J. Lu
2016-06-20 19:27 ` Uros Bizjak
2016-06-20 19:47 ` Richard Sandiford
2016-06-20 20:34 ` H.J. Lu
2016-06-21 12:40 ` H.J. Lu
2016-06-21 18:22 ` Uros Bizjak
2016-06-21 19:51 ` H.J. Lu
2016-06-22 22:12 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).