From e9d789f8dcb52984b0f894fdecc402a49c5ad6d7 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 5 Jan 2024 18:40:06 +0800 Subject: [PATCH v2] LoongArch: Don't split the instructions containing relocs for extreme code model The ABI mandates the pcalau12i/addi.d/lu32i.d/lu52i.d instructions for addressing a symbol to be adjacent. So model them as "one large instruction", i.e. define_insn, with two output registers. The real address is the sum of these two registers. The advantage of this approach is the RTL passes can still use ldx/stx instructions to skip an addi.d instruction. gcc/ChangeLog: * config/loongarch/loongarch.md (unspec): Add UNSPEC_LA_PCREL_64_PART1 and UNSPEC_LA_PCREL_64_PART2. (la_pcrel64_two_parts): New define_insn. * config/loongarch/loongarch.cc (loongarch_tls_symbol): Fix a typo in the comment. (loongarch_call_tls_get_addr): If TARGET_CMODEL_EXTREME, use la_pcrel64_two_parts for addressing the TLS symbol and __tls_get_addr. Emit an REG_EQUAL note to allow CSE addressing __tls_get_addr. (loongarch_legitimize_tls_address): If TARGET_CMODEL_EXTREME, address TLS IE symbols with la_pcrel64_two_parts. (loongarch_split_symbol): If TARGET_CMODEL_EXTREME, address symbols with la_pcrel64_two_parts. (TARGET_DELEGITIMIZE_ADDRESS): Define. (loongarch_delegitimize_address): Implement the target hook. gcc/testsuite/ChangeLog: * gcc.target/loongarch/func-call-extreme-1.c (dg-options): Use -O2 instead of -O0 to ensure the pcalau12i/addi/lu32i/lu52i instruction sequences are not reordered by the compiler. (NOIPA): Disallow interprocedural optimizations. * gcc.target/loongarch/func-call-extreme-2.c: Remove the content duplicated from func-call-extreme-1.c, include it instead. (dg-options): Likewise. * gcc.target/loongarch/func-call-extreme-3.c (dg-options): Likewise. * gcc.target/loongarch/func-call-extreme-4.c (dg-options): Likewise. * gcc.target/loongarch/cmodel-extreme-1.c: New test. * gcc.target/loongarch/cmodel-extreme-2.c: New test. --- gcc/config/loongarch/loongarch.cc | 135 +++++++++++------- gcc/config/loongarch/loongarch.md | 21 +++ .../gcc.target/loongarch/cmodel-extreme-1.c | 18 +++ .../gcc.target/loongarch/cmodel-extreme-2.c | 7 + .../loongarch/func-call-extreme-1.c | 14 +- .../loongarch/func-call-extreme-2.c | 29 +--- .../loongarch/func-call-extreme-3.c | 2 +- .../loongarch/func-call-extreme-4.c | 2 +- 8 files changed, 144 insertions(+), 84 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 82467474288..358d2f8f3f5 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -2733,7 +2733,7 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) return plus_constant (Pmode, reg, offset); } -/* The __tls_get_attr symbol. */ +/* The __tls_get_addr symbol. */ static GTY (()) rtx loongarch_tls_symbol; /* Load an entry from the GOT for a TLS GD access. */ @@ -2791,22 +2791,22 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) { - /* Split tls symbol to high and low. */ - rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); - high = loongarch_force_temporary (tmp, high); - if (TARGET_CMODEL_EXTREME) { - gcc_assert (TARGET_EXPLICIT_RELOCS); + rtx part1 = gen_reg_rtx (Pmode); + rtx part2 = gen_reg_rtx (Pmode); - rtx tmp1 = gen_reg_rtx (Pmode); - emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); - emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); - emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc)); - emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1)); + emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc)); + emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2)); } else - emit_insn (gen_tls_low (Pmode, a0, high, loc)); + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); + + high = loongarch_force_temporary (tmp, high); + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } } else { @@ -2883,22 +2883,27 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) case CMODEL_EXTREME: { - gcc_assert (TARGET_EXPLICIT_RELOCS); - - rtx tmp1 = gen_reg_rtx (Pmode); - rtx high = gen_reg_rtx (Pmode); - - loongarch_emit_move (high, - gen_rtx_HIGH (Pmode, loongarch_tls_symbol)); - loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode, - gen_rtx_REG (Pmode, 0), - loongarch_tls_symbol)); - emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); - emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); + gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + rtx part1 = gen_reg_rtx (Pmode); + rtx part2 = gen_reg_rtx (Pmode); + + emit_insn (gen_la_pcrel64_two_parts (part1, part2, + loongarch_tls_symbol)); loongarch_emit_move (dest, gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, - high, tmp1))); + part1, + part2))); + + /* Put an REG_EQUAL note here to allow CSE (storing + part1 + part2, i.e. the address of tls_get_addr into a + saved register and use it for multiple TLS accesses). */ + rtx sum = gen_rtx_UNSPEC ( + Pmode, gen_rtvec (1, loongarch_tls_symbol), + UNSPEC_ADDRESS_FIRST + + loongarch_classify_symbol (loongarch_tls_symbol)); + set_unique_reg_note (get_last_insn (), REG_EQUAL, sum); } break; @@ -2957,26 +2962,31 @@ loongarch_legitimize_tls_address (rtx loc) if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) { tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); - tmp3 = gen_reg_rtx (Pmode); - rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); - high = loongarch_force_temporary (tmp3, high); if (TARGET_CMODEL_EXTREME) { - gcc_assert (TARGET_EXPLICIT_RELOCS); + gcc_assert (la_opt_explicit_relocs + != EXPLICIT_RELOCS_NONE); + + rtx part1 = gen_reg_rtx (Pmode); + rtx part2 = gen_reg_rtx (Pmode); - rtx tmp3 = gen_reg_rtx (Pmode); - emit_insn (gen_tls_low (Pmode, tmp3, - gen_rtx_REG (Pmode, 0), tmp2)); - emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2)); - emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2)); + emit_insn (gen_la_pcrel64_two_parts (part1, part2, + tmp2)); emit_move_insn (tmp1, gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, - high, tmp3))); + part1, + part2))); } else - emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + + high = loongarch_force_temporary (tmp3, high); + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } } else emit_insn (loongarch_got_load_tls_ie (tmp1, loc)); @@ -3036,7 +3046,8 @@ loongarch_legitimize_tls_address (rtx loc) if (TARGET_CMODEL_EXTREME) { - gcc_assert (TARGET_EXPLICIT_RELOCS); + gcc_assert (la_opt_explicit_relocs + != EXPLICIT_RELOCS_NONE); emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2)); emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2)); @@ -3157,24 +3168,23 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) || !loongarch_split_symbol_type (symbol_type)) return false; - rtx high, temp1 = NULL; + rtx high; if (temp == NULL) temp = gen_reg_rtx (Pmode); - /* Get the 12-31 bits of the address. */ - high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); - high = loongarch_force_temporary (temp, high); - if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) { gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); - temp1 = gen_reg_rtx (Pmode); - emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), - addr)); - emit_insn (gen_lui_h_lo20 (temp1, temp1, addr)); - emit_insn (gen_lui_h_hi12 (temp1, temp1, addr)); + high = gen_reg_rtx (Pmode); + emit_insn (gen_la_pcrel64_two_parts (high, temp, addr)); + } + else + { + /* Get the 12-31 bits of the address. */ + high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); + high = loongarch_force_temporary (temp, high); } if (low_out) @@ -3183,7 +3193,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) case SYMBOL_PCREL64: if (can_create_pseudo_p ()) { - *low_out = gen_rtx_PLUS (Pmode, high, temp1); + *low_out = gen_rtx_PLUS (Pmode, high, temp); break; } /* fall through */ @@ -3195,7 +3205,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) /* SYMBOL_GOT_DISP symbols are loaded from the GOT. */ { if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ()) - *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1)); + *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, + temp)); else { rtx low = gen_rtx_LO_SUM (Pmode, high, addr); @@ -10932,6 +10943,28 @@ loongarch_asm_code_end (void) #undef DUMP_FEATURE } +static rtx +loongarch_delegitimize_address (rtx op) +{ + /* Call the default address delegitimizer first. */ + op = delegitimize_mem_from_attrs (op); + + if (GET_CODE (op) == UNSPEC) + { + int unspec = XINT (op, 1); + switch (unspec) + { + case UNSPEC_LA_PCREL_64_PART1: + case UNSPEC_LA_PCREL_64_PART2: + return XVECEXP (op, 0, 0); + default: + return op; + } + } + + return op; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11197,6 +11230,10 @@ loongarch_asm_code_end (void) #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ loongarch_builtin_support_vector_misalignment +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS \ + loongarch_delegitimize_address + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-loongarch.h" diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index dda3cdf8be5..6d8eda4dff4 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -85,6 +85,9 @@ (define_c_enum "unspec" [ UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1 UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1 + + UNSPEC_LA_PCREL_64_PART1 + UNSPEC_LA_PCREL_64_PART2 ]) (define_c_enum "unspecv" [ @@ -2185,6 +2188,24 @@ (define_insn_and_split "*movdi_64bit" [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") (set_attr "mode" "DI")]) +;; The 64-bit PC-relative part of address loading. +;; Note that the psABI does not allow splitting it. +(define_insn "la_pcrel64_two_parts" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 2 "") (pc)] UNSPEC_LA_PCREL_64_PART1)) + (set (match_operand:DI 1 "register_operand" "=r") + (unspec:DI [(match_dup 2) (pc)] UNSPEC_LA_PCREL_64_PART2))] + "TARGET_ABI_LP64 && la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE" + { + return "pcalau12i\t%0,%r2\n\t" + "addi.d\t%1,$r0,%L2\n\t" + "lu32i.d\t%1,%R2\n\t" + "lu52i.d\t%1,%1,%H2"; + } + [(set_attr "move_type" "move") + (set_attr "mode" "DI") + (set_attr "length" "16")]) + ;; 32-bit Integer moves (define_expand "movsi" diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c new file mode 100644 index 00000000000..564ee4017f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ + +int a; +extern int b; +__thread int c __attribute__ ((tls_model ("local-exec"))); +__thread int d __attribute__ ((tls_model ("initial-exec"))); +__thread int e __attribute__ ((tls_model ("local-dynamic"))); +__thread int f __attribute__ ((tls_model ("global-dynamic"))); + +void +test (void) +{ + a = b + c + d + e + f; +} + +/* a, b, d, e, f, and __tls_get_addr. */ +/* { dg-final { scan-rtl-dump-times "la_pcrel64_two_parts" 6 "final" } } */ diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c new file mode 100644 index 00000000000..ce834805f38 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ + +#include "cmodel-extreme-1.c" + +/* a, b, d, e, f, and __tls_get_addr. */ +/* { dg-final { scan-rtl-dump-times "la_pcrel64_two_parts" 6 "final" } } */ diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c index db1e0f85396..fdb4cf1ff7f 100644 --- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c @@ -1,31 +1,33 @@ /* { dg-do compile } */ -/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ +/* { dg-options "-mabi=lp64d -O2 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ +#define NOIPA __attribute__ ((noipa)) + extern void g (void); -void +NOIPA void f (void) {} -static void +NOIPA static void l (void) {} -void +NOIPA void test (void) { g (); } -void +NOIPA void test1 (void) { f (); } -void +NOIPA void test2 (void) { l (); diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c index 21bf81ae837..dfba3882b97 100644 --- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c @@ -1,32 +1,7 @@ /* { dg-do compile } */ -/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ +/* { dg-options "-mabi=lp64d -O2 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ /* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ -extern void g (void); -void -f (void) -{} - -static void -l (void) -{} - -void -test (void) -{ - g (); -} - -void -test1 (void) -{ - f (); -} - -void -test2 (void) -{ - l (); -} +#include "func-call-extreme-1.c" diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c index a4da44b4a3d..1f5234f83d1 100644 --- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ +/* { dg-options "-mabi=lp64d -O2 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c index 16b00f4c5f2..c4228500635 100644 --- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ +/* { dg-options "-mabi=lp64d -O2 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ /* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ -- 2.43.0