* [PATCH] x86-64: Load external function address via GOT slot @ 2016-06-20 17:05 H.J. Lu 2016-06-20 19:13 ` Uros Bizjak 0 siblings, 1 reply; 10+ messages in thread From: H.J. Lu @ 2016-06-20 17:05 UTC (permalink / raw) To: gcc-patches; +Cc: Jakub Jelinek, Uros Bizjak Hi, This patch implements the alternate code sequence recommended in https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI to load external function address via GOT slot with movq func@GOTPCREL(%rip), %rax so that linker won't create an PLT entry for extern function address. Tested on x86-64. OK for trunk? H.J. -- gcc/ PR target/67400 * config/i386/i386-protos.h (ix86_force_load_from_GOT_p): New. * config/i386/i386.c (ix86_force_load_from_GOT_p): New function. (ix86_legitimate_address_p): Allow UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_print_operand_address): Support UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_expand_move): Load the external function address via the GOT slot if ix86_force_load_from_GOT_p returns true. * config/i386/predicates.md (x86_64_immediate_operand): Return false if ix86_force_load_from_GOT_p returns true. gcc/testsuite/ PR target/67400 * gcc.target/i386/pr67400-1.c: New test. * gcc.target/i386/pr67400-2.c: Likewise. * gcc.target/i386/pr67400-3.c: Likewise. * gcc.target/i386/pr67400-4.c: Likewise. * gcc.target/i386/pr67400-5.c: Likewise. * gcc.target/i386/pr67400-6.c: Likewise. --- gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.c | 51 +++++++++++++++++++++++++++++++ gcc/config/i386/predicates.md | 4 +++ gcc/testsuite/gcc.target/i386/pr67400-1.c | 13 ++++++++ gcc/testsuite/gcc.target/i386/pr67400-2.c | 14 +++++++++ gcc/testsuite/gcc.target/i386/pr67400-3.c | 16 ++++++++++ gcc/testsuite/gcc.target/i386/pr67400-4.c | 13 ++++++++ gcc/testsuite/gcc.target/i386/pr67400-5.c | 11 +++++++ gcc/testsuite/gcc.target/i386/pr67400-6.c | 13 ++++++++ 9 files changed, 136 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-6.c diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 9fd14f6..8130161 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx, extern bool constant_address_p (rtx); extern bool legitimate_pic_operand_p (rtx); extern bool legitimate_pic_address_disp_p (rtx); +extern bool ix86_force_load_from_GOT_p (rtx); extern void print_reg (rtx, int, FILE*); extern void ix86_print_operand (FILE *, rtx, int); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 56a5b9c..c8c5081 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15182,6 +15182,24 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) return true; } +/* True if operand X should be loaded from GOT. */ + +bool +ix86_force_load_from_GOT_p (rtx x) +{ + /* External function symbol should be loaded via the GOT slot for + -fno-plt. */ + return (!flag_plt + && !flag_pic + && ix86_cmodel != CM_LARGE + && TARGET_64BIT + && !TARGET_PECOFF + && !TARGET_MACHO + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (x) + && !SYMBOL_REF_LOCAL_P (x)); +} + /* Determine if it's legal to put X into the constant pool. This is not possible for the address of thread-local symbols, which is checked above. */ @@ -15560,6 +15578,10 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) return false; case UNSPEC_GOTPCREL: + gcc_assert (flag_pic + || ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))); + goto is_legitimate_pic; + case UNSPEC_PCREL: gcc_assert (flag_pic); goto is_legitimate_pic; @@ -18130,6 +18152,12 @@ ix86_print_operand_address_as (FILE *file, rtx addr, } else if (flag_pic) output_pic_addr_const (file, disp, 0); + else if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC + && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL + || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) + && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) + output_pic_addr_const (file, XEXP (disp, 0), code); else output_addr_const (file, disp); } @@ -19448,6 +19476,29 @@ ix86_expand_move (machine_mode mode, rtx operands[]) op1 = convert_to_mode (mode, op1, 1); } } + } + else if (ix86_force_load_from_GOT_p (op1)) + { + /* Load the external function address via the GOT slot to + avoid PLT. */ + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), + (TARGET_64BIT + ? UNSPEC_GOTPCREL + : UNSPEC_GOT)); + op1 = gen_rtx_CONST (Pmode, op1); + op1 = gen_const_mem (Pmode, op1); + /* This symbol must be referenced via a load from the Global + Offset Table. */ + set_mem_alias_set (op1, ix86_GOT_alias_set ()); + op1 = convert_to_mode (mode, op1, 1); + op1 = force_reg (mode, op1); + emit_insn (gen_rtx_SET (op0, op1)); + /* Generate a CLOBBER so that there will be no REG_EQUAL note + on the last insn to prevent cse and fwprop from replacing + a GOT load with a constant. */ + rtx tmp = gen_reg_rtx (Pmode); + emit_clobber (tmp); + return; } else { diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index b3cf2a3..06a0002 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -149,6 +149,10 @@ (define_predicate "x86_64_immediate_operand" (match_code "const_int,symbol_ref,label_ref,const") { + /* Load the external function address via the GOT slot to avoid PLT. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + if (!TARGET_64BIT) return immediate_operand (op, mode); diff --git a/gcc/testsuite/gcc.target/i386/pr67400-1.c b/gcc/testsuite/gcc.target/i386/pr67400-1.c new file mode 100644 index 0000000..a875b76 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-2.c b/gcc/testsuite/gcc.target/i386/pr67400-2.c new file mode 100644 index 0000000..9f3f4bc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); +extern void *p; + +void +foo (void) +{ + p = &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-3.c b/gcc/testsuite/gcc.target/i386/pr67400-3.c new file mode 100644 index 0000000..045974e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +static void +bar (void) +{ +} + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-4.c b/gcc/testsuite/gcc.target/i386/pr67400-4.c new file mode 100644 index 0000000..fd373db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void) __attribute__ ((visibility ("hidden"))); + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-5.c b/gcc/testsuite/gcc.target/i386/pr67400-5.c new file mode 100644 index 0000000..9bb98dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-5.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void foo (void); +extern void bar (int, int, int, int, int, int, void *); + +void +x (void) +{ + bar (1, 2, 3, 4, 5, 6, foo); +} diff --git a/gcc/testsuite/gcc.target/i386/pr67400-6.c b/gcc/testsuite/gcc.target/i386/pr67400-6.c new file mode 100644 index 0000000..b84196a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-6.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern int bar (void); + +int +check (void *p) +{ + return p != &bar; +} + +/* { dg-final { scan-assembler "cmp\(l|q\)\[ \t\]*.*bar@GOTPCREL" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */ -- 2.5.5 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-20 17:05 [PATCH] x86-64: Load external function address via GOT slot H.J. Lu @ 2016-06-20 19:13 ` Uros Bizjak 2016-06-20 19:19 ` H.J. Lu 0 siblings, 1 reply; 10+ messages in thread From: Uros Bizjak @ 2016-06-20 19:13 UTC (permalink / raw) To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: > Hi, > > This patch implements the alternate code sequence recommended in > > https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI > > to load external function address via GOT slot with > > movq func@GOTPCREL(%rip), %rax > > so that linker won't create an PLT entry for extern function > address. > > Tested on x86-64. OK for trunk? > + else if (ix86_force_load_from_GOT_p (op1)) > + { > + /* Load the external function address via the GOT slot to > + avoid PLT. */ > + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), > + (TARGET_64BIT > + ? UNSPEC_GOTPCREL > + : UNSPEC_GOT)); > + op1 = gen_rtx_CONST (Pmode, op1); > + op1 = gen_const_mem (Pmode, op1); > + /* This symbol must be referenced via a load from the Global > + Offset Table. */ > + set_mem_alias_set (op1, ix86_GOT_alias_set ()); > + op1 = convert_to_mode (mode, op1, 1); > + op1 = force_reg (mode, op1); > + emit_insn (gen_rtx_SET (op0, op1)); > + /* Generate a CLOBBER so that there will be no REG_EQUAL note > + on the last insn to prevent cse and fwprop from replacing > + a GOT load with a constant. */ > + rtx tmp = gen_reg_rtx (Pmode); > + emit_clobber (tmp); > + return; Jeff, is this the recommended way to prevent CSE, as far as RTL infrastructure is concerned? I didn't find any example of this approach with other targets. Uros. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-20 19:13 ` Uros Bizjak @ 2016-06-20 19:19 ` H.J. Lu 2016-06-20 19:27 ` Uros Bizjak 0 siblings, 1 reply; 10+ messages in thread From: H.J. Lu @ 2016-06-20 19:19 UTC (permalink / raw) To: Uros Bizjak; +Cc: gcc-patches, Jakub Jelinek On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: > On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >> Hi, >> >> This patch implements the alternate code sequence recommended in >> >> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >> >> to load external function address via GOT slot with >> >> movq func@GOTPCREL(%rip), %rax >> >> so that linker won't create an PLT entry for extern function >> address. >> >> Tested on x86-64. OK for trunk? > >> + else if (ix86_force_load_from_GOT_p (op1)) >> + { >> + /* Load the external function address via the GOT slot to >> + avoid PLT. */ >> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >> + (TARGET_64BIT >> + ? UNSPEC_GOTPCREL >> + : UNSPEC_GOT)); >> + op1 = gen_rtx_CONST (Pmode, op1); >> + op1 = gen_const_mem (Pmode, op1); >> + /* This symbol must be referenced via a load from the Global >> + Offset Table. */ >> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >> + op1 = convert_to_mode (mode, op1, 1); >> + op1 = force_reg (mode, op1); >> + emit_insn (gen_rtx_SET (op0, op1)); >> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >> + on the last insn to prevent cse and fwprop from replacing >> + a GOT load with a constant. */ >> + rtx tmp = gen_reg_rtx (Pmode); >> + emit_clobber (tmp); >> + return; > > Jeff, is this the recommended way to prevent CSE, as far as RTL > infrastructure is concerned? I didn't find any example of this > approach with other targets. > FWIW, the similar approach is used in ix86_expand_vector_move_misalign, ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general as well as other targets: frv/frv.c: emit_clobber (op0); frv/frv.c: emit_clobber (op1); im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ s390/s390.c: emit_clobber (addr); s390/s390.md: emit_clobber (reg0); s390/s390.md: emit_clobber (reg1); s390/s390.md: emit_clobber (reg0); s390/s390.md: emit_clobber (reg0); s390/s390.md: emit_clobber (reg1); -- H.J. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-20 19:19 ` H.J. Lu @ 2016-06-20 19:27 ` Uros Bizjak 2016-06-20 19:47 ` Richard Sandiford 0 siblings, 1 reply; 10+ messages in thread From: Uros Bizjak @ 2016-06-20 19:27 UTC (permalink / raw) To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >>> Hi, >>> >>> This patch implements the alternate code sequence recommended in >>> >>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >>> >>> to load external function address via GOT slot with >>> >>> movq func@GOTPCREL(%rip), %rax >>> >>> so that linker won't create an PLT entry for extern function >>> address. >>> >>> Tested on x86-64. OK for trunk? >> >>> + else if (ix86_force_load_from_GOT_p (op1)) >>> + { >>> + /* Load the external function address via the GOT slot to >>> + avoid PLT. */ >>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >>> + (TARGET_64BIT >>> + ? UNSPEC_GOTPCREL >>> + : UNSPEC_GOT)); >>> + op1 = gen_rtx_CONST (Pmode, op1); >>> + op1 = gen_const_mem (Pmode, op1); >>> + /* This symbol must be referenced via a load from the Global >>> + Offset Table. */ >>> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >>> + op1 = convert_to_mode (mode, op1, 1); >>> + op1 = force_reg (mode, op1); >>> + emit_insn (gen_rtx_SET (op0, op1)); >>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >>> + on the last insn to prevent cse and fwprop from replacing >>> + a GOT load with a constant. */ >>> + rtx tmp = gen_reg_rtx (Pmode); >>> + emit_clobber (tmp); >>> + return; >> >> Jeff, is this the recommended way to prevent CSE, as far as RTL >> infrastructure is concerned? I didn't find any example of this >> approach with other targets. >> > > FWIW, the similar approach is used in ix86_expand_vector_move_misalign, > ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general > as well as other targets: > > frv/frv.c: emit_clobber (op0); > frv/frv.c: emit_clobber (op1); > im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ > s390/s390.c: emit_clobber (addr); > s390/s390.md: emit_clobber (reg0); > s390/s390.md: emit_clobber (reg1); > s390/s390.md: emit_clobber (reg0); > s390/s390.md: emit_clobber (reg0); > s390/s390.md: emit_clobber (reg1); These usages mark the whole register as being "clobbered" (=undefined), before only a part of register is written, e.g.: emit_clobber (int_xmm); emit_move_insn (gen_lowpart (DImode, int_xmm), input); They aren't used to prevent unwanted CSE. Uros. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-20 19:27 ` Uros Bizjak @ 2016-06-20 19:47 ` Richard Sandiford 2016-06-20 20:34 ` H.J. Lu 2016-06-21 12:40 ` H.J. Lu 0 siblings, 2 replies; 10+ messages in thread From: Richard Sandiford @ 2016-06-20 19:47 UTC (permalink / raw) To: Uros Bizjak; +Cc: H.J. Lu, gcc-patches, Jakub Jelinek Uros Bizjak <ubizjak@gmail.com> writes: > On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >>>> Hi, >>>> >>>> This patch implements the alternate code sequence recommended in >>>> >>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >>>> >>>> to load external function address via GOT slot with >>>> >>>> movq func@GOTPCREL(%rip), %rax >>>> >>>> so that linker won't create an PLT entry for extern function >>>> address. >>>> >>>> Tested on x86-64. OK for trunk? >>> >>>> + else if (ix86_force_load_from_GOT_p (op1)) >>>> + { >>>> + /* Load the external function address via the GOT slot to >>>> + avoid PLT. */ >>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >>>> + (TARGET_64BIT >>>> + ? UNSPEC_GOTPCREL >>>> + : UNSPEC_GOT)); >>>> + op1 = gen_rtx_CONST (Pmode, op1); >>>> + op1 = gen_const_mem (Pmode, op1); >>>> + /* This symbol must be referenced via a load from the Global >>>> + Offset Table. */ >>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >>>> + op1 = convert_to_mode (mode, op1, 1); >>>> + op1 = force_reg (mode, op1); >>>> + emit_insn (gen_rtx_SET (op0, op1)); >>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >>>> + on the last insn to prevent cse and fwprop from replacing >>>> + a GOT load with a constant. */ >>>> + rtx tmp = gen_reg_rtx (Pmode); >>>> + emit_clobber (tmp); >>>> + return; >>> >>> Jeff, is this the recommended way to prevent CSE, as far as RTL >>> infrastructure is concerned? I didn't find any example of this >>> approach with other targets. >>> >> >> FWIW, the similar approach is used in ix86_expand_vector_move_misalign, >> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general >> as well as other targets: >> >> frv/frv.c: emit_clobber (op0); >> frv/frv.c: emit_clobber (op1); >> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ >> s390/s390.c: emit_clobber (addr); >> s390/s390.md: emit_clobber (reg0); >> s390/s390.md: emit_clobber (reg1); >> s390/s390.md: emit_clobber (reg0); >> s390/s390.md: emit_clobber (reg0); >> s390/s390.md: emit_clobber (reg1); > > These usages mark the whole register as being "clobbered" > (=undefined), before only a part of register is written, e.g.: > > emit_clobber (int_xmm); > emit_move_insn (gen_lowpart (DImode, int_xmm), input); > > They aren't used to prevent unwanted CSE. Since it's being called in the move expander, I thought the normal way of preventing the constant being rematerialised would be to reject it in the move define_insn predicates. FWIW, I agree that using a clobber for this is going to be fragile. Thanks, Richard ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-20 19:47 ` Richard Sandiford @ 2016-06-20 20:34 ` H.J. Lu 2016-06-21 12:40 ` H.J. Lu 1 sibling, 0 replies; 10+ messages in thread From: H.J. Lu @ 2016-06-20 20:34 UTC (permalink / raw) To: Uros Bizjak, gcc-patches, Jakub Jelinek, Richard Sandiford On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford <rdsandiford@googlemail.com> wrote: > Uros Bizjak <ubizjak@gmail.com> writes: >> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >>>>> Hi, >>>>> >>>>> This patch implements the alternate code sequence recommended in >>>>> >>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >>>>> >>>>> to load external function address via GOT slot with >>>>> >>>>> movq func@GOTPCREL(%rip), %rax >>>>> >>>>> so that linker won't create an PLT entry for extern function >>>>> address. >>>>> >>>>> Tested on x86-64. OK for trunk? >>>> >>>>> + else if (ix86_force_load_from_GOT_p (op1)) >>>>> + { >>>>> + /* Load the external function address via the GOT slot to >>>>> + avoid PLT. */ >>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >>>>> + (TARGET_64BIT >>>>> + ? UNSPEC_GOTPCREL >>>>> + : UNSPEC_GOT)); >>>>> + op1 = gen_rtx_CONST (Pmode, op1); >>>>> + op1 = gen_const_mem (Pmode, op1); >>>>> + /* This symbol must be referenced via a load from the Global >>>>> + Offset Table. */ >>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >>>>> + op1 = convert_to_mode (mode, op1, 1); >>>>> + op1 = force_reg (mode, op1); >>>>> + emit_insn (gen_rtx_SET (op0, op1)); >>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >>>>> + on the last insn to prevent cse and fwprop from replacing >>>>> + a GOT load with a constant. */ >>>>> + rtx tmp = gen_reg_rtx (Pmode); >>>>> + emit_clobber (tmp); >>>>> + return; >>>> >>>> Jeff, is this the recommended way to prevent CSE, as far as RTL >>>> infrastructure is concerned? I didn't find any example of this >>>> approach with other targets. >>>> >>> >>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign, >>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general >>> as well as other targets: >>> >>> frv/frv.c: emit_clobber (op0); >>> frv/frv.c: emit_clobber (op1); >>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ >>> s390/s390.c: emit_clobber (addr); >>> s390/s390.md: emit_clobber (reg0); >>> s390/s390.md: emit_clobber (reg1); >>> s390/s390.md: emit_clobber (reg0); >>> s390/s390.md: emit_clobber (reg0); >>> s390/s390.md: emit_clobber (reg1); >> >> These usages mark the whole register as being "clobbered" >> (=undefined), before only a part of register is written, e.g.: >> >> emit_clobber (int_xmm); >> emit_move_insn (gen_lowpart (DImode, int_xmm), input); >> >> They aren't used to prevent unwanted CSE. > > Since it's being called in the move expander, I thought the normal > way of preventing the constant being rematerialised would be to reject > it in the move define_insn predicates. > > FWIW, I agree that using a clobber for this is going to be fragile. > Here is the alternative from clobber. -- H.J. -- diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a68983c..79999df 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2347,7 +2347,7 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm") - (match_operand:SI 1 "general_operand" + (match_operand:SI 1 "ix86_general_operand" "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { @@ -2564,7 +2564,7 @@ (define_insn "*movqi_internal" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,k,k,r ,m,k") - (match_operand:QI 1 "general_operand" + (match_operand:QI 1 "ix86_general_operand" "q ,qn,qm,q,rn,qm,qn,r ,k,k,k,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 06a0002..a471deb 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -367,6 +367,12 @@ } }) +;; Return true if OP is general operand representable on ix86 +(define_predicate "ix86_general_operand" + (and (match_operand 0 "general_operand") + (ior (not (match_code "symbol_ref")) + (match_test "!ix86_force_load_from_GOT_p (op)")))) + ;; Return true if size of VALUE can be stored in a sign ;; extended immediate field. (define_predicate "x86_64_immediate_size_operand" @@ -1036,6 +1042,9 @@ struct ix86_address parts; int ok; + if (ix86_force_load_from_GOT_p (op)) + return false; + if (!CONST_INT_P (op) && mode != VOIDmode && GET_MODE (op) != mode) ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-20 19:47 ` Richard Sandiford 2016-06-20 20:34 ` H.J. Lu @ 2016-06-21 12:40 ` H.J. Lu 2016-06-21 18:22 ` Uros Bizjak 1 sibling, 1 reply; 10+ messages in thread From: H.J. Lu @ 2016-06-21 12:40 UTC (permalink / raw) To: Uros Bizjak, gcc-patches, Jakub Jelinek, Richard Sandiford [-- Attachment #1: Type: text/plain, Size: 3220 bytes --] On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford <rdsandiford@googlemail.com> wrote: > Uros Bizjak <ubizjak@gmail.com> writes: >> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >>>>> Hi, >>>>> >>>>> This patch implements the alternate code sequence recommended in >>>>> >>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >>>>> >>>>> to load external function address via GOT slot with >>>>> >>>>> movq func@GOTPCREL(%rip), %rax >>>>> >>>>> so that linker won't create an PLT entry for extern function >>>>> address. >>>>> >>>>> Tested on x86-64. OK for trunk? >>>> >>>>> + else if (ix86_force_load_from_GOT_p (op1)) >>>>> + { >>>>> + /* Load the external function address via the GOT slot to >>>>> + avoid PLT. */ >>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >>>>> + (TARGET_64BIT >>>>> + ? UNSPEC_GOTPCREL >>>>> + : UNSPEC_GOT)); >>>>> + op1 = gen_rtx_CONST (Pmode, op1); >>>>> + op1 = gen_const_mem (Pmode, op1); >>>>> + /* This symbol must be referenced via a load from the Global >>>>> + Offset Table. */ >>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >>>>> + op1 = convert_to_mode (mode, op1, 1); >>>>> + op1 = force_reg (mode, op1); >>>>> + emit_insn (gen_rtx_SET (op0, op1)); >>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >>>>> + on the last insn to prevent cse and fwprop from replacing >>>>> + a GOT load with a constant. */ >>>>> + rtx tmp = gen_reg_rtx (Pmode); >>>>> + emit_clobber (tmp); >>>>> + return; >>>> >>>> Jeff, is this the recommended way to prevent CSE, as far as RTL >>>> infrastructure is concerned? I didn't find any example of this >>>> approach with other targets. >>>> >>> >>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign, >>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general >>> as well as other targets: >>> >>> frv/frv.c: emit_clobber (op0); >>> frv/frv.c: emit_clobber (op1); >>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ >>> s390/s390.c: emit_clobber (addr); >>> s390/s390.md: emit_clobber (reg0); >>> s390/s390.md: emit_clobber (reg1); >>> s390/s390.md: emit_clobber (reg0); >>> s390/s390.md: emit_clobber (reg0); >>> s390/s390.md: emit_clobber (reg1); >> >> These usages mark the whole register as being "clobbered" >> (=undefined), before only a part of register is written, e.g.: >> >> emit_clobber (int_xmm); >> emit_move_insn (gen_lowpart (DImode, int_xmm), input); >> >> They aren't used to prevent unwanted CSE. > > Since it's being called in the move expander, I thought the normal > way of preventing the constant being rematerialised would be to reject > it in the move define_insn predicates. > > FWIW, I agree that using a clobber for this is going to be fragile. > Here is the patch without clobber. Tested on x86-64. OK for trunk? Thanks. -- H.J. [-- Attachment #2: 0001-x86-64-Load-external-function-address-via-GOT-slot.patch --] [-- Type: text/x-patch, Size: 11096 bytes --] From 55ab339cc4173565095b66c0fc2ffa4267b55606 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.tools@gmail.com> Date: Fri, 28 Aug 2015 19:14:49 -0700 Subject: [PATCH] x86-64: Load external function address via GOT slot This patch implements the alternate code sequence recommended in https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI to load external function address via GOT slot with movq func@GOTPCREL(%rip), %rax so that linker won't create an PLT entry for extern function address. gcc/ PR target/67400 * config/i386/i386-protos.h (ix86_force_load_from_GOT_p): New. * config/i386/i386.c (ix86_force_load_from_GOT_p): New function. (ix86_legitimate_address_p): Allow UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_print_operand_address): Support UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_expand_move): Load the external function address via the GOT slot if ix86_force_load_from_GOT_p returns true. * config/i386/i386.md (*movsi_internal): Replace general_operand with ix86_general_operand. (*movqi_internal): Likewise. * config/i386/predicates.md (x86_64_immediate_operand): Return false if ix86_force_load_from_GOT_p returns true. (address_no_seg_operand): Likewise. (ix86_general_operand): New predicate. gcc/testsuite/ PR target/67400 * gcc.target/i386/pr67400-1.c: New test. * gcc.target/i386/pr67400-2.c: Likewise. * gcc.target/i386/pr67400-3.c: Likewise. * gcc.target/i386/pr67400-4.c: Likewise. * gcc.target/i386/pr67400-5.c: Likewise. * gcc.target/i386/pr67400-6.c: Likewise. --- gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.c | 44 +++++++++++++++++++++++++++++++ gcc/config/i386/i386.md | 4 +-- gcc/config/i386/predicates.md | 15 +++++++++++ gcc/testsuite/gcc.target/i386/pr67400-1.c | 13 +++++++++ gcc/testsuite/gcc.target/i386/pr67400-2.c | 14 ++++++++++ gcc/testsuite/gcc.target/i386/pr67400-3.c | 16 +++++++++++ gcc/testsuite/gcc.target/i386/pr67400-4.c | 13 +++++++++ gcc/testsuite/gcc.target/i386/pr67400-5.c | 11 ++++++++ gcc/testsuite/gcc.target/i386/pr67400-6.c | 13 +++++++++ 10 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr67400-6.c diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 9fd14f6..8130161 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx, extern bool constant_address_p (rtx); extern bool legitimate_pic_operand_p (rtx); extern bool legitimate_pic_address_disp_p (rtx); +extern bool ix86_force_load_from_GOT_p (rtx); extern void print_reg (rtx, int, FILE*); extern void ix86_print_operand (FILE *, rtx, int); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 56a5b9c..6912e8c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15182,6 +15182,24 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) return true; } +/* True if operand X should be loaded from GOT. */ + +bool +ix86_force_load_from_GOT_p (rtx x) +{ + /* External function symbol should be loaded via the GOT slot for + -fno-plt. */ + return (!flag_plt + && !flag_pic + && ix86_cmodel != CM_LARGE + && TARGET_64BIT + && !TARGET_PECOFF + && !TARGET_MACHO + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (x) + && !SYMBOL_REF_LOCAL_P (x)); +} + /* Determine if it's legal to put X into the constant pool. This is not possible for the address of thread-local symbols, which is checked above. */ @@ -15560,6 +15578,10 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) return false; case UNSPEC_GOTPCREL: + gcc_assert (flag_pic + || ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))); + goto is_legitimate_pic; + case UNSPEC_PCREL: gcc_assert (flag_pic); goto is_legitimate_pic; @@ -18130,6 +18152,12 @@ ix86_print_operand_address_as (FILE *file, rtx addr, } else if (flag_pic) output_pic_addr_const (file, disp, 0); + else if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC + && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL + || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) + && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) + output_pic_addr_const (file, XEXP (disp, 0), code); else output_addr_const (file, disp); } @@ -19448,6 +19476,22 @@ ix86_expand_move (machine_mode mode, rtx operands[]) op1 = convert_to_mode (mode, op1, 1); } } + } + else if (ix86_force_load_from_GOT_p (op1)) + { + /* Load the external function address via the GOT slot to + avoid PLT. */ + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), + (TARGET_64BIT + ? UNSPEC_GOTPCREL + : UNSPEC_GOT)); + op1 = gen_rtx_CONST (Pmode, op1); + op1 = gen_const_mem (Pmode, op1); + /* This symbol must be referenced via a load from the Global + Offset Table. */ + set_mem_alias_set (op1, ix86_GOT_alias_set ()); + op1 = convert_to_mode (mode, op1, 1); + op1 = force_reg (mode, op1); } else { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 85dda3f..8f3227f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2347,7 +2347,7 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm") - (match_operand:SI 1 "general_operand" + (match_operand:SI 1 "ix86_general_operand" "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { @@ -2564,7 +2564,7 @@ (define_insn "*movqi_internal" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,k,k,r ,m,k") - (match_operand:QI 1 "general_operand" + (match_operand:QI 1 "ix86_general_operand" "q ,qn,qm,q,rn,qm,qn,r ,k,k,k,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index b3cf2a3..6c74e7e 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -149,6 +149,10 @@ (define_predicate "x86_64_immediate_operand" (match_code "const_int,symbol_ref,label_ref,const") { + /* Load the external function address via the GOT slot to avoid PLT. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + if (!TARGET_64BIT) return immediate_operand (op, mode); @@ -363,6 +367,13 @@ } }) +;; Return true if OP is general operand, excluding the external function +;; symbol if it should be loaded via the GOT slot to avoid PLT. +(define_predicate "ix86_general_operand" + (and (match_operand 0 "general_operand") + (ior (not (match_code "symbol_ref")) + (match_test "!ix86_force_load_from_GOT_p (op)")))) + ;; Return true if size of VALUE can be stored in a sign ;; extended immediate field. (define_predicate "x86_64_immediate_size_operand" @@ -1032,6 +1043,10 @@ struct ix86_address parts; int ok; + /* Load the external function address via the GOT slot to avoid PLT. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + if (!CONST_INT_P (op) && mode != VOIDmode && GET_MODE (op) != mode) diff --git a/gcc/testsuite/gcc.target/i386/pr67400-1.c b/gcc/testsuite/gcc.target/i386/pr67400-1.c new file mode 100644 index 0000000..4dd5e35 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "\(mov|lea\)\(l|q\)\[ \t\]*\(\\\$|\)bar," { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-2.c b/gcc/testsuite/gcc.target/i386/pr67400-2.c new file mode 100644 index 0000000..9f3f4bc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); +extern void *p; + +void +foo (void) +{ + p = &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-3.c b/gcc/testsuite/gcc.target/i386/pr67400-3.c new file mode 100644 index 0000000..045974e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +static void +bar (void) +{ +} + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-4.c b/gcc/testsuite/gcc.target/i386/pr67400-4.c new file mode 100644 index 0000000..fd373db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void) __attribute__ ((visibility ("hidden"))); + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr67400-5.c b/gcc/testsuite/gcc.target/i386/pr67400-5.c new file mode 100644 index 0000000..9bb98dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-5.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void foo (void); +extern void bar (int, int, int, int, int, int, void *); + +void +x (void) +{ + bar (1, 2, 3, 4, 5, 6, foo); +} diff --git a/gcc/testsuite/gcc.target/i386/pr67400-6.c b/gcc/testsuite/gcc.target/i386/pr67400-6.c new file mode 100644 index 0000000..b84196a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr67400-6.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern int bar (void); + +int +check (void *p) +{ + return p != &bar; +} + +/* { dg-final { scan-assembler "cmp\(l|q\)\[ \t\]*.*bar@GOTPCREL" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," { target { ! ia32 } } } } */ -- 2.5.5 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-21 12:40 ` H.J. Lu @ 2016-06-21 18:22 ` Uros Bizjak 2016-06-21 19:51 ` H.J. Lu 0 siblings, 1 reply; 10+ messages in thread From: Uros Bizjak @ 2016-06-21 18:22 UTC (permalink / raw) To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek, Richard Sandiford [-- Attachment #1: Type: text/plain, Size: 4052 bytes --] On Tue, Jun 21, 2016 at 2:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford > <rdsandiford@googlemail.com> wrote: >> Uros Bizjak <ubizjak@gmail.com> writes: >>> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >>>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >>>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >>>>>> Hi, >>>>>> >>>>>> This patch implements the alternate code sequence recommended in >>>>>> >>>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >>>>>> >>>>>> to load external function address via GOT slot with >>>>>> >>>>>> movq func@GOTPCREL(%rip), %rax >>>>>> >>>>>> so that linker won't create an PLT entry for extern function >>>>>> address. >>>>>> >>>>>> Tested on x86-64. OK for trunk? >>>>> >>>>>> + else if (ix86_force_load_from_GOT_p (op1)) >>>>>> + { >>>>>> + /* Load the external function address via the GOT slot to >>>>>> + avoid PLT. */ >>>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >>>>>> + (TARGET_64BIT >>>>>> + ? UNSPEC_GOTPCREL >>>>>> + : UNSPEC_GOT)); >>>>>> + op1 = gen_rtx_CONST (Pmode, op1); >>>>>> + op1 = gen_const_mem (Pmode, op1); >>>>>> + /* This symbol must be referenced via a load from the Global >>>>>> + Offset Table. */ >>>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >>>>>> + op1 = convert_to_mode (mode, op1, 1); >>>>>> + op1 = force_reg (mode, op1); >>>>>> + emit_insn (gen_rtx_SET (op0, op1)); >>>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >>>>>> + on the last insn to prevent cse and fwprop from replacing >>>>>> + a GOT load with a constant. */ >>>>>> + rtx tmp = gen_reg_rtx (Pmode); >>>>>> + emit_clobber (tmp); >>>>>> + return; >>>>> >>>>> Jeff, is this the recommended way to prevent CSE, as far as RTL >>>>> infrastructure is concerned? I didn't find any example of this >>>>> approach with other targets. >>>>> >>>> >>>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign, >>>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general >>>> as well as other targets: >>>> >>>> frv/frv.c: emit_clobber (op0); >>>> frv/frv.c: emit_clobber (op1); >>>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ >>>> s390/s390.c: emit_clobber (addr); >>>> s390/s390.md: emit_clobber (reg0); >>>> s390/s390.md: emit_clobber (reg1); >>>> s390/s390.md: emit_clobber (reg0); >>>> s390/s390.md: emit_clobber (reg0); >>>> s390/s390.md: emit_clobber (reg1); >>> >>> These usages mark the whole register as being "clobbered" >>> (=undefined), before only a part of register is written, e.g.: >>> >>> emit_clobber (int_xmm); >>> emit_move_insn (gen_lowpart (DImode, int_xmm), input); >>> >>> They aren't used to prevent unwanted CSE. >> >> Since it's being called in the move expander, I thought the normal >> way of preventing the constant being rematerialised would be to reject >> it in the move define_insn predicates. >> >> FWIW, I agree that using a clobber for this is going to be fragile. >> > > Here is the patch without clobber. Tested on x86-64. OK for > trunk? No, your patch has multiple problems: 1. It won't work for e.g. &bar+1, since you have to legitimize the symbol in two places of ix86_expand_move. Also, why use TARGET_64BIT in + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), + (TARGET_64BIT + ? UNSPEC_GOTPCREL + : UNSPEC_GOT)); when ix86_force_load_from_GOT_p rejects non-64bit targets? 2. New check should be added to ix86_legitimate_constant_p, not to predicates of move insn patterns. Unfortunately, we still have to change x86_64_immediate_operand in two places. I have attached my version of the patch. It handles all your testcases, plus &foo+1 case. Bootstrap is still running. Does the patch work for you? Uros. [-- Attachment #2: p.diff.txt --] [-- Type: text/plain, Size: 5549 bytes --] Index: i386-protos.h =================================================================== --- i386-protos.h (revision 237653) +++ i386-protos.h (working copy) @@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, r extern bool constant_address_p (rtx); extern bool legitimate_pic_operand_p (rtx); extern bool legitimate_pic_address_disp_p (rtx); +extern bool ix86_force_load_from_GOT_p (rtx); extern void print_reg (rtx, int, FILE*); extern void ix86_print_operand (FILE *, rtx, int); Index: i386.c =================================================================== --- i386.c (revision 237653) +++ i386.c (working copy) @@ -15120,6 +15120,19 @@ darwin_local_data_pic (rtx disp) && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); } +/* True if operand X should be loaded from GOT. */ + +bool +ix86_force_load_from_GOT_p (rtx x) +{ + return (TARGET_64BIT && !TARGET_PECOFF && !TARGET_MACHO + && !flag_plt && !flag_pic + && ix86_cmodel != CM_LARGE + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (x) + && !SYMBOL_REF_LOCAL_P (x)); +} + /* Determine if a given RTX is a valid constant. We already know this satisfies CONSTANT_P. */ @@ -15188,6 +15201,12 @@ ix86_legitimate_constant_p (machine_mode mode, rtx if (MACHO_DYNAMIC_NO_PIC_P) return machopic_symbol_defined_p (x); #endif + + /* External function address should be loaded + via the GOT slot to avoid PLT. */ + if (ix86_force_load_from_GOT_p (x)) + return false; + break; CASE_CONST_SCALAR_INT: @@ -15596,6 +15615,9 @@ ix86_legitimate_address_p (machine_mode, rtx addr, return false; case UNSPEC_GOTPCREL: + if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) + goto is_legitimate_pic; + /* FALLTHRU */ case UNSPEC_PCREL: gcc_assert (flag_pic); goto is_legitimate_pic; @@ -18164,6 +18186,12 @@ ix86_print_operand_address_as (FILE *file, rtx add fputs ("ds:", file); fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); } + /* Load the external function address via the GOT slot to avoid PLT. */ + else if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC + && XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL + && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) + output_pic_addr_const (file, disp, 0); else if (flag_pic) output_pic_addr_const (file, disp, 0); else @@ -19406,6 +19434,15 @@ ix86_expand_move (machine_mode mode, rtx operands[ return; op1 = convert_to_mode (mode, op1, 1); } + else if (ix86_force_load_from_GOT_p (op1)) + { + /* Load the external function address via GOT slot to avoid PLT. */ + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), + UNSPEC_GOTPCREL); + op1 = gen_rtx_CONST (Pmode, op1); + op1 = gen_const_mem (Pmode, op1); + set_mem_alias_set (op1, ix86_GOT_alias_set ()); + } else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX) op1 = tmp; } @@ -19420,6 +19457,15 @@ ix86_expand_move (machine_mode mode, rtx operands[ model = SYMBOL_REF_TLS_MODEL (symbol); if (model) tmp = legitimize_tls_address (symbol, model, true); + else if (ix86_force_load_from_GOT_p (symbol)) + { + /* Load the external function address via GOT slot to avoid PLT. */ + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), + UNSPEC_GOTPCREL); + tmp = gen_rtx_CONST (Pmode, tmp); + tmp = gen_const_mem (Pmode, tmp); + set_mem_alias_set (tmp, ix86_GOT_alias_set ()); + } else tmp = legitimize_pe_coff_symbol (symbol, true); Index: predicates.md =================================================================== --- predicates.md (revision 237653) +++ predicates.md (working copy) @@ -160,13 +160,18 @@ return trunc_int_for_mode (val, SImode) == val; } case SYMBOL_REF: + /* TLS symbols are not constant. */ + if (SYMBOL_REF_TLS_MODEL (op)) + return false; + + /* Load the external function address via the GOT slot. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + /* For certain code models, the symbolic references are known to fit. in CM_SMALL_PIC model we know it fits if it is local to the shared library. Don't count TLS SYMBOL_REFs here, since they should fit only if inside of UNSPEC handled below. */ - /* TLS symbols are not constant. */ - if (SYMBOL_REF_TLS_MODEL (op)) - return false; return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op))); @@ -207,6 +212,11 @@ /* TLS symbols are not constant. */ if (SYMBOL_REF_TLS_MODEL (op1)) return false; + + /* Load the external function address via the GOT slot. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + /* For CM_SMALL assume that latest object is 16MB before end of 31bits boundary. We may also accept pretty large negative constants knowing that all objects are @@ -273,10 +283,11 @@ return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff); case SYMBOL_REF: - /* For certain code models, the symbolic references are known to fit. */ /* TLS symbols are not constant. */ if (SYMBOL_REF_TLS_MODEL (op)) return false; + + /* For certain code models, the symbolic references are known to fit. */ return (ix86_cmodel == CM_SMALL || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op))); ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-21 18:22 ` Uros Bizjak @ 2016-06-21 19:51 ` H.J. Lu 2016-06-22 22:12 ` Uros Bizjak 0 siblings, 1 reply; 10+ messages in thread From: H.J. Lu @ 2016-06-21 19:51 UTC (permalink / raw) To: Uros Bizjak; +Cc: gcc-patches, Jakub Jelinek, Richard Sandiford On Tue, Jun 21, 2016 at 11:22 AM, Uros Bizjak <ubizjak@gmail.com> wrote: > On Tue, Jun 21, 2016 at 2:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >> On Mon, Jun 20, 2016 at 12:46 PM, Richard Sandiford >> <rdsandiford@googlemail.com> wrote: >>> Uros Bizjak <ubizjak@gmail.com> writes: >>>> On Mon, Jun 20, 2016 at 9:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >>>>> On Mon, Jun 20, 2016 at 12:13 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >>>>>> On Mon, Jun 20, 2016 at 7:05 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >>>>>>> Hi, >>>>>>> >>>>>>> This patch implements the alternate code sequence recommended in >>>>>>> >>>>>>> https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI >>>>>>> >>>>>>> to load external function address via GOT slot with >>>>>>> >>>>>>> movq func@GOTPCREL(%rip), %rax >>>>>>> >>>>>>> so that linker won't create an PLT entry for extern function >>>>>>> address. >>>>>>> >>>>>>> Tested on x86-64. OK for trunk? >>>>>> >>>>>>> + else if (ix86_force_load_from_GOT_p (op1)) >>>>>>> + { >>>>>>> + /* Load the external function address via the GOT slot to >>>>>>> + avoid PLT. */ >>>>>>> + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), >>>>>>> + (TARGET_64BIT >>>>>>> + ? UNSPEC_GOTPCREL >>>>>>> + : UNSPEC_GOT)); >>>>>>> + op1 = gen_rtx_CONST (Pmode, op1); >>>>>>> + op1 = gen_const_mem (Pmode, op1); >>>>>>> + /* This symbol must be referenced via a load from the Global >>>>>>> + Offset Table. */ >>>>>>> + set_mem_alias_set (op1, ix86_GOT_alias_set ()); >>>>>>> + op1 = convert_to_mode (mode, op1, 1); >>>>>>> + op1 = force_reg (mode, op1); >>>>>>> + emit_insn (gen_rtx_SET (op0, op1)); >>>>>>> + /* Generate a CLOBBER so that there will be no REG_EQUAL note >>>>>>> + on the last insn to prevent cse and fwprop from replacing >>>>>>> + a GOT load with a constant. */ >>>>>>> + rtx tmp = gen_reg_rtx (Pmode); >>>>>>> + emit_clobber (tmp); >>>>>>> + return; >>>>>> >>>>>> Jeff, is this the recommended way to prevent CSE, as far as RTL >>>>>> infrastructure is concerned? I didn't find any example of this >>>>>> approach with other targets. >>>>>> >>>>> >>>>> FWIW, the similar approach is used in ix86_expand_vector_move_misalign, >>>>> ix86_expand_convert_uns_didf_sse and ix86_expand_vector_init_general >>>>> as well as other targets: >>>>> >>>>> frv/frv.c: emit_clobber (op0); >>>>> frv/frv.c: emit_clobber (op1); >>>>> im32c/m32c.c: /* emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */ >>>>> s390/s390.c: emit_clobber (addr); >>>>> s390/s390.md: emit_clobber (reg0); >>>>> s390/s390.md: emit_clobber (reg1); >>>>> s390/s390.md: emit_clobber (reg0); >>>>> s390/s390.md: emit_clobber (reg0); >>>>> s390/s390.md: emit_clobber (reg1); >>>> >>>> These usages mark the whole register as being "clobbered" >>>> (=undefined), before only a part of register is written, e.g.: >>>> >>>> emit_clobber (int_xmm); >>>> emit_move_insn (gen_lowpart (DImode, int_xmm), input); >>>> >>>> They aren't used to prevent unwanted CSE. >>> >>> Since it's being called in the move expander, I thought the normal >>> way of preventing the constant being rematerialised would be to reject >>> it in the move define_insn predicates. >>> >>> FWIW, I agree that using a clobber for this is going to be fragile. >>> >> >> Here is the patch without clobber. Tested on x86-64. OK for >> trunk? > > No, your patch has multiple problems: > > 1. It won't work for e.g. &bar+1, since you have to legitimize the > symbol in two places of ix86_expand_move. Also, why use TARGET_64BIT > in > > + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), > + (TARGET_64BIT > + ? UNSPEC_GOTPCREL > + : UNSPEC_GOT)); > > when ix86_force_load_from_GOT_p rejects non-64bit targets? > > 2. New check should be added to ix86_legitimate_constant_p, not to > predicates of move insn patterns. Unfortunately, we still have to > change x86_64_immediate_operand in two places. > > I have attached my version of the patch. It handles all your > testcases, plus &foo+1 case. Bootstrap is still running. > > Does the patch work for you? It works. Thanks. -- H.J. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86-64: Load external function address via GOT slot 2016-06-21 19:51 ` H.J. Lu @ 2016-06-22 22:12 ` Uros Bizjak 0 siblings, 0 replies; 10+ messages in thread From: Uros Bizjak @ 2016-06-22 22:12 UTC (permalink / raw) To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek, Richard Sandiford [-- Attachment #1: Type: text/plain, Size: 1830 bytes --] On Tue, Jun 21, 2016 at 9:51 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >> I have attached my version of the patch. It handles all your >> testcases, plus &foo+1 case. Bootstrap is still running. >> >> Does the patch work for you? > > It works. Attached version of the patch was committed to mainline SVN. Regarding the testcases - I have made them to compile on non-ia32 only ATM. Let's change them when ia32 support is added (it is a trivial change). 2016-06-23 Uros Bizjak <ubizjak@gmail.com> H.J. Lu <hongjiu.lu@intel.com> PR target/67400 * config/i386/i386-protos.h (ix86_force_load_from_GOT_p): New. * config/i386/i386.c (ix86_force_load_from_GOT_p): New function. (ix86_legitimate_constant_p): Do not allow UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_legitimate_address_p): Allow UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_print_operand_address_as): Support UNSPEC_GOTPCREL if ix86_force_load_from_GOT_p returns true. (ix86_expand_move): Load the external function address via the GOT slot if ix86_force_load_from_GOT_p returns true. * config/i386/predicates.md (x86_64_immediate_operand): Return false for SYMBOL_REFs where ix86_force_load_from_GOT_p returns true. (x86_64_zext_immediate_operand): Ditto. testsuite/ChangeLog: 2016-06-23 H.J. Lu <hongjiu.lu@intel.com> PR target/67400 * gcc.target/i386/pr67400-1.c: New test. * gcc.target/i386/pr67400-2.c: Likewise. * gcc.target/i386/pr67400-3.c: Likewise. * gcc.target/i386/pr67400-4.c: Likewise. * gcc.target/i386/pr67400-5.c: Likewise. * gcc.target/i386/pr67400-6.c: Likewise. * gcc.target/i386/pr67400-7.c: Likewise. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros. [-- Attachment #2: p.diff.txt --] [-- Type: text/plain, Size: 9639 bytes --] Index: config/i386/i386-protos.h =================================================================== --- config/i386/i386-protos.h (revision 237716) +++ config/i386/i386-protos.h (working copy) @@ -70,6 +70,7 @@ extern bool ix86_expand_set_or_movmem (rtx, rtx, r extern bool constant_address_p (rtx); extern bool legitimate_pic_operand_p (rtx); extern bool legitimate_pic_address_disp_p (rtx); +extern bool ix86_force_load_from_GOT_p (rtx); extern void print_reg (rtx, int, FILE*); extern void ix86_print_operand (FILE *, rtx, int); Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 237716) +++ config/i386/i386.c (working copy) @@ -15120,6 +15120,19 @@ darwin_local_data_pic (rtx disp) && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); } +/* True if operand X should be loaded from GOT. */ + +bool +ix86_force_load_from_GOT_p (rtx x) +{ + return (TARGET_64BIT && !TARGET_PECOFF && !TARGET_MACHO + && !flag_plt && !flag_pic + && ix86_cmodel != CM_LARGE + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (x) + && !SYMBOL_REF_LOCAL_P (x)); +} + /* Determine if a given RTX is a valid constant. We already know this satisfies CONSTANT_P. */ @@ -15188,6 +15201,12 @@ ix86_legitimate_constant_p (machine_mode mode, rtx if (MACHO_DYNAMIC_NO_PIC_P) return machopic_symbol_defined_p (x); #endif + + /* External function address should be loaded + via the GOT slot to avoid PLT. */ + if (ix86_force_load_from_GOT_p (x)) + return false; + break; CASE_CONST_SCALAR_INT: @@ -15596,6 +15615,9 @@ ix86_legitimate_address_p (machine_mode, rtx addr, return false; case UNSPEC_GOTPCREL: + if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) + goto is_legitimate_pic; + /* FALLTHRU */ case UNSPEC_PCREL: gcc_assert (flag_pic); goto is_legitimate_pic; @@ -18169,6 +18191,12 @@ ix86_print_operand_address_as (FILE *file, rtx add fputs ("ds:", file); fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); } + /* Load the external function address via the GOT slot to avoid PLT. */ + else if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC + && XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL + && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) + output_pic_addr_const (file, disp, 0); else if (flag_pic) output_pic_addr_const (file, disp, 0); else @@ -19417,6 +19445,15 @@ ix86_expand_move (machine_mode mode, rtx operands[ if (model) op1 = legitimize_tls_address (op1, model, true); + else if (ix86_force_load_from_GOT_p (op1)) + { + /* Load the external function address via GOT slot to avoid PLT. */ + op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), + UNSPEC_GOTPCREL); + op1 = gen_rtx_CONST (Pmode, op1); + op1 = gen_const_mem (Pmode, op1); + set_mem_alias_set (op1, ix86_GOT_alias_set ()); + } else { tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX); Index: config/i386/predicates.md =================================================================== --- config/i386/predicates.md (revision 237716) +++ config/i386/predicates.md (working copy) @@ -160,13 +160,18 @@ return trunc_int_for_mode (val, SImode) == val; } case SYMBOL_REF: + /* TLS symbols are not constant. */ + if (SYMBOL_REF_TLS_MODEL (op)) + return false; + + /* Load the external function address via the GOT slot. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + /* For certain code models, the symbolic references are known to fit. in CM_SMALL_PIC model we know it fits if it is local to the shared library. Don't count TLS SYMBOL_REFs here, since they should fit only if inside of UNSPEC handled below. */ - /* TLS symbols are not constant. */ - if (SYMBOL_REF_TLS_MODEL (op)) - return false; return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op))); @@ -207,6 +212,11 @@ /* TLS symbols are not constant. */ if (SYMBOL_REF_TLS_MODEL (op1)) return false; + + /* Load the external function address via the GOT slot. */ + if (ix86_force_load_from_GOT_p (op1)) + return false; + /* For CM_SMALL assume that latest object is 16MB before end of 31bits boundary. We may also accept pretty large negative constants knowing that all objects are @@ -273,10 +283,15 @@ return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff); case SYMBOL_REF: - /* For certain code models, the symbolic references are known to fit. */ /* TLS symbols are not constant. */ if (SYMBOL_REF_TLS_MODEL (op)) return false; + + /* Load the external function address via the GOT slot. */ + if (ix86_force_load_from_GOT_p (op)) + return false; + + /* For certain code models, the symbolic references are known to fit. */ return (ix86_cmodel == CM_SMALL || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op))); @@ -301,6 +316,11 @@ /* TLS symbols are not constant. */ if (SYMBOL_REF_TLS_MODEL (op1)) return false; + + /* Load the external function address via the GOT slot. */ + if (ix86_force_load_from_GOT_p (op1)) + return false; + /* For small code model we may accept pretty large positive offsets, since one bit is available for free. Negative offsets are limited by the size of NULL pointer area Index: testsuite/gcc.target/i386/pr67400-1.c =================================================================== --- testsuite/gcc.target/i386/pr67400-1.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-1.c (working copy) @@ -0,0 +1,13 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */ +/* { dg-final { scan-assembler-not "\(mov|lea\)\(l|q\)\[ \t\]*\(\\\$|\)bar," } } */ Index: testsuite/gcc.target/i386/pr67400-2.c =================================================================== --- testsuite/gcc.target/i386/pr67400-2.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-2.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); +extern void *p; + +void +foo (void) +{ + p = &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," } } */ Index: testsuite/gcc.target/i386/pr67400-3.c =================================================================== --- testsuite/gcc.target/i386/pr67400-3.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-3.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +static void +bar (void) +{ +} + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */ Index: testsuite/gcc.target/i386/pr67400-4.c =================================================================== --- testsuite/gcc.target/i386/pr67400-4.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-4.c (working copy) @@ -0,0 +1,13 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void) __attribute__ ((visibility ("hidden"))); + +void * +foo (void) +{ + return &bar; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*\\\$bar," } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */ Index: testsuite/gcc.target/i386/pr67400-5.c =================================================================== --- testsuite/gcc.target/i386/pr67400-5.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-5.c (working copy) @@ -0,0 +1,11 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void foo (void); +extern void bar (int, int, int, int, int, int, void *); + +void +x (void) +{ + bar (1, 2, 3, 4, 5, 6, foo); +} Index: testsuite/gcc.target/i386/pr67400-6.c =================================================================== --- testsuite/gcc.target/i386/pr67400-6.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-6.c (working copy) @@ -0,0 +1,13 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern int bar (void); + +int +check (void *p) +{ + return p != &bar; +} + +/* { dg-final { scan-assembler "cmp\(l|q\)\[ \t\]*.*bar@GOTPCREL" } } */ +/* { dg-final { scan-assembler-not "mov\(l|q\)\[ \t\]*\\\$bar," } } */ Index: testsuite/gcc.target/i386/pr67400-7.c =================================================================== --- testsuite/gcc.target/i386/pr67400-7.c (nonexistent) +++ testsuite/gcc.target/i386/pr67400-7.c (working copy) @@ -0,0 +1,13 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fno-pic -fno-plt" } */ + +extern void bar (void); + +void * +foo (void) +{ + return &bar+1; +} + +/* { dg-final { scan-assembler "mov\(l|q\)\[ \t\]*bar@GOTPCREL" } } */ +/* { dg-final { scan-assembler-not "\(mov|lea\)\(l|q\)\[ \t\]*\(\\\$|\)bar," } } */ ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2016-06-22 22:12 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2016-06-20 17:05 [PATCH] x86-64: Load external function address via GOT slot H.J. Lu 2016-06-20 19:13 ` Uros Bizjak 2016-06-20 19:19 ` H.J. Lu 2016-06-20 19:27 ` Uros Bizjak 2016-06-20 19:47 ` Richard Sandiford 2016-06-20 20:34 ` H.J. Lu 2016-06-21 12:40 ` H.J. Lu 2016-06-21 18:22 ` Uros Bizjak 2016-06-21 19:51 ` H.J. Lu 2016-06-22 22:12 ` Uros Bizjak
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).