* [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
@ 2023-11-07 11:52 mengqinggang
2023-11-07 12:16 ` Xi Ruoyao
0 siblings, 1 reply; 8+ messages in thread
From: mengqinggang @ 2023-11-07 11:52 UTC (permalink / raw)
To: binutils
Cc: xuchenghua, chenglulu, liuzhensong, xry111, i.swmail, maskray,
cailulu, luweining, mengqinggang
R_LARCH_CALL36 is used for medium code model function call pcaddu18i+jirl, and
these two instructions must adjacent.
The LoongArch ABI v2.20 at here: https://github.com/loongson/la-abi-specs.
---
bfd/bfd-in2.h | 1 +
bfd/elfnn-loongarch.c | 19 ++++++++++-----
bfd/elfxx-loongarch.c | 24 +++++++++++++++++++
bfd/libbfd.h | 1 +
bfd/reloc.c | 3 +++
gas/config/tc-loongarch.c | 6 ++++-
gas/testsuite/gas/loongarch/medium-call.d | 15 ++++++++++++
gas/testsuite/gas/loongarch/medium-call.s | 6 +++++
include/elf/loongarch.h | 2 ++
.../ld-loongarch-elf/ld-loongarch-elf.exp | 12 ++++++++++
ld/testsuite/ld-loongarch-elf/medium-call.s | 7 ++++++
11 files changed, 89 insertions(+), 7 deletions(-)
create mode 100644 gas/testsuite/gas/loongarch/medium-call.d
create mode 100644 gas/testsuite/gas/loongarch/medium-call.s
create mode 100644 ld/testsuite/ld-loongarch-elf/medium-call.s
diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 96eef92fdc7..83fbc6f0732 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -7449,6 +7449,7 @@ enum bfd_reloc_code_real
BFD_RELOC_LARCH_ADD_ULEB128,
BFD_RELOC_LARCH_SUB_ULEB128,
BFD_RELOC_LARCH_64_PCREL,
+ BFD_RELOC_LARCH_CALL36,
BFD_RELOC_UNUSED
};
typedef enum bfd_reloc_code_real bfd_reloc_code_real_type;
diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c
index 18ad3cc91ca..c30d1e181e7 100644
--- a/bfd/elfnn-loongarch.c
+++ b/bfd/elfnn-loongarch.c
@@ -780,6 +780,7 @@ loongarch_elf_check_relocs (bfd *abfd, struct bfd_link_info *info,
case R_LARCH_B16:
case R_LARCH_B21:
case R_LARCH_B26:
+ case R_LARCH_CALL36:
if (h != NULL)
{
h->needs_plt = 1;
@@ -1884,20 +1885,24 @@ loongarch_check_offset (const Elf_Internal_Rela *rel,
ret; \
})
+/* Write immediate to instructions. */
+
static bfd_reloc_status_type
loongarch_reloc_rewrite_imm_insn (const Elf_Internal_Rela *rel,
const asection *input_section ATTRIBUTE_UNUSED,
reloc_howto_type *howto, bfd *input_bfd,
bfd_byte *contents, bfd_vma reloc_val)
{
- int bits = bfd_get_reloc_size (howto) * 8;
- uint32_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
-
+ /* Adjust the immediate based on alignment and
+ its position in the instruction. */
if (!loongarch_adjust_reloc_bitsfield (input_bfd, howto, &reloc_val))
return bfd_reloc_overflow;
- insn = (insn & (uint32_t)howto->src_mask)
- | ((insn & (~(uint32_t)howto->dst_mask)) | reloc_val);
+ int bits = bfd_get_reloc_size (howto) * 8;
+ uint64_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
+
+ /* Write immediate to instruction. */
+ insn = (insn & ~howto->dst_mask) | (reloc_val & howto->dst_mask);
bfd_put (bits, input_bfd, insn, contents + rel->r_offset);
@@ -2120,6 +2125,7 @@ perform_relocation (const Elf_Internal_Rela *rel, asection *input_section,
case R_LARCH_TLS_GD_PC_HI20:
case R_LARCH_TLS_GD_HI20:
case R_LARCH_PCREL20_S2:
+ case R_LARCH_CALL36:
r = loongarch_check_offset (rel, input_section);
if (r != bfd_reloc_ok)
break;
@@ -3127,9 +3133,10 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info,
break;
/* New reloc types. */
+ case R_LARCH_B16:
case R_LARCH_B21:
case R_LARCH_B26:
- case R_LARCH_B16:
+ case R_LARCH_CALL36:
unresolved_reloc = false;
if (is_undefweak)
{
diff --git a/bfd/elfxx-loongarch.c b/bfd/elfxx-loongarch.c
index a970a257aa9..bf8bf3fc83d 100644
--- a/bfd/elfxx-loongarch.c
+++ b/bfd/elfxx-loongarch.c
@@ -1547,6 +1547,24 @@ static loongarch_reloc_howto_type loongarch_howto_table[] =
NULL, /* adjust_reloc_bits */
NULL), /* larch_reloc_type_name */
+ /* Used for medium code model function call pcaddu18i+jirl,
+ these two instructions must adjacent. */
+ LOONGARCH_HOWTO (R_LARCH_CALL36, /* type (110). */
+ 2, /* rightshift. */
+ 8, /* size. */
+ 36, /* bitsize. */
+ true, /* pc_relative. */
+ 0, /* bitpos. */
+ complain_overflow_signed, /* complain_on_overflow. */
+ bfd_elf_generic_reloc, /* special_function. */
+ "R_LARCH_CALL36", /* name. */
+ false, /* partial_inplace. */
+ 0, /* src_mask. */
+ 0x03fffc0001ffffe0, /* dst_mask. */
+ false, /* pcrel_offset. */
+ BFD_RELOC_LARCH_CALL36, /* bfd_reloc_code_real_type. */
+ reloc_sign_bits, /* adjust_reloc_bits. */
+ "call36"), /* larch_reloc_type_name. */
};
reloc_howto_type *
@@ -1729,6 +1747,12 @@ reloc_sign_bits (bfd *abfd, reloc_howto_type *howto, bfd_vma *fix_val)
/* Perform insn bits field. 15:0<<10, 20:16>>16. */
val = ((val & 0xffff) << 10) | ((val >> 16) & 0x1f);
break;
+ case R_LARCH_CALL36:
+ /* 0x8000: If low 16-bit immediate greater than 0x7fff,
+ it become to a negative number due to sign-extended,
+ so the high part need to add 0x8000. */
+ val = (((val + 0x8000) >> 16) << 5) | (((val & 0xffff) << 10) << 32);
+ break;
default:
val <<= howto->bitpos;
break;
diff --git a/bfd/libbfd.h b/bfd/libbfd.h
index fce0680f3db..6583c5601a7 100644
--- a/bfd/libbfd.h
+++ b/bfd/libbfd.h
@@ -3596,6 +3596,7 @@ static const char *const bfd_reloc_code_real_names[] = { "@@uninitialized@@",
"BFD_RELOC_LARCH_ADD_ULEB128",
"BFD_RELOC_LARCH_SUB_ULEB128",
"BFD_RELOC_LARCH_64_PCREL",
+ "BFD_RELOC_LARCH_CALL36",
"@@overflow: BFD_RELOC_UNUSED@@",
};
#endif
diff --git a/bfd/reloc.c b/bfd/reloc.c
index 93ebad879e0..4d3ac4c1096 100644
--- a/bfd/reloc.c
+++ b/bfd/reloc.c
@@ -8292,6 +8292,9 @@ ENUMX
ENUMX
BFD_RELOC_LARCH_64_PCREL
+ENUMX
+ BFD_RELOC_LARCH_CALL36
+
ENUMDOC
LARCH relocations.
diff --git a/gas/config/tc-loongarch.c b/gas/config/tc-loongarch.c
index 33f3e71ce2f..7ec1ed3fb8f 100644
--- a/gas/config/tc-loongarch.c
+++ b/gas/config/tc-loongarch.c
@@ -687,7 +687,7 @@ loongarch_args_parser_can_match_arg_helper (char esc_ch1, char esc_ch2,
esc_ch1, esc_ch2, bit_field, arg);
if (ip->reloc_info[0].type >= BFD_RELOC_LARCH_B16
- && ip->reloc_info[0].type < BFD_RELOC_LARCH_64_PCREL)
+ && ip->reloc_info[0].type < BFD_RELOC_UNUSED)
{
/* As we compact stack-relocs, it is no need for pop operation.
But break out until here in order to check the imm field.
@@ -959,6 +959,10 @@ move_insn (struct loongarch_cl_insn *insn, fragS *frag, long where)
static void
append_fixed_insn (struct loongarch_cl_insn *insn)
{
+ /* Ensure the jirl is emitted to the same frag as the pcaddu18i. */
+ if (BFD_RELOC_LARCH_CALL36 == insn->reloc_info[0].type)
+ frag_grow (8);
+
char *f = frag_more (insn->insn_length);
move_insn (insn, frag_now, f - frag_now->fr_literal);
}
diff --git a/gas/testsuite/gas/loongarch/medium-call.d b/gas/testsuite/gas/loongarch/medium-call.d
new file mode 100644
index 00000000000..4183818cb4f
--- /dev/null
+++ b/gas/testsuite/gas/loongarch/medium-call.d
@@ -0,0 +1,15 @@
+#as:
+#objdump: -dr
+
+.*:[ ]+file format .*
+
+
+Disassembly of section .text:
+
+.* <.text>:
+[ ]+0:[ ]+1e000001[ ]+pcaddu18i[ ]+\$ra, 0
+[ ]+0: R_LARCH_CALL36[ ]+a
+[ ]+4:[ ]+4c000021[ ]+jirl[ ]+\$ra, \$ra, 0
+[ ]+8:[ ]+1e00000c[ ]+pcaddu18i[ ]+\$t0, 0
+[ ]+8: R_LARCH_CALL36[ ]+a
+[ ]+c:[ ]+4c000180[ ]+jr[ ]+\$t0
diff --git a/gas/testsuite/gas/loongarch/medium-call.s b/gas/testsuite/gas/loongarch/medium-call.s
new file mode 100644
index 00000000000..f2977d1c6d7
--- /dev/null
+++ b/gas/testsuite/gas/loongarch/medium-call.s
@@ -0,0 +1,6 @@
+ # call .L1, r1(ra) temp register, r1(ra) return register.
+ pcaddu18i $r1, %call36(a)
+ jirl $r1, $r1, 0
+ # tail .L1, r12(t0) temp register, r0(zero) return register.
+ pcaddu18i $r12, %call36(a)
+ jirl $r0, $r12, 0
diff --git a/include/elf/loongarch.h b/include/elf/loongarch.h
index e31395e13d5..34719ee8b8c 100644
--- a/include/elf/loongarch.h
+++ b/include/elf/loongarch.h
@@ -251,6 +251,8 @@ RELOC_NUMBER (R_LARCH_SUB_ULEB128, 108)
RELOC_NUMBER (R_LARCH_64_PCREL, 109)
+RELOC_NUMBER (R_LARCH_CALL36, 110)
+
END_RELOC_NUMBERS (R_LARCH_count)
/* Processor specific flags for the ELF header e_flags field. */
diff --git a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
index b95cc53e597..1fc70d0a61e 100644
--- a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
+++ b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
@@ -55,4 +55,16 @@ if [istarget "loongarch64-*-*"] {
"64_pcrel" \
] \
]
+
+ run_ld_link_tests \
+ [list \
+ [list \
+ "medium code model call" \
+ "-e 0x0" "" \
+ "" \
+ {medium-call.s} \
+ {} \
+ "medium-call" \
+ ] \
+ ]
}
diff --git a/ld/testsuite/ld-loongarch-elf/medium-call.s b/ld/testsuite/ld-loongarch-elf/medium-call.s
new file mode 100644
index 00000000000..4d1888b76a0
--- /dev/null
+++ b/ld/testsuite/ld-loongarch-elf/medium-call.s
@@ -0,0 +1,7 @@
+.L1:
+ # call .L1, r1(ra) temp register, r1(ra) return register.
+ pcaddu18i $r1, %call36(.L1)
+ jirl $r1, $r1, 0
+ # tail .L1, r12(t0) temp register, r0(zero) return register.
+ pcaddu18i $r12, %call36(.L1)
+ jirl $r0, $r12, 0
--
2.31.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-07 11:52 [PATCH] LoongArch: Add new relocation R_LARCH_CALL36 mengqinggang
@ 2023-11-07 12:16 ` Xi Ruoyao
2023-11-07 17:34 ` WANG Xuerui
0 siblings, 1 reply; 8+ messages in thread
From: Xi Ruoyao @ 2023-11-07 12:16 UTC (permalink / raw)
To: mengqinggang, binutils
Cc: xuchenghua, chenglulu, liuzhensong, i.swmail, maskray, cailulu,
luweining
I'd like to have some pseudo instructions:
- call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
- call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
- call36 func -> pcaddu18i ra, func + jirl ra, ra, func
These will make the work of the compiler (or assembly programmer)
easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
jirl instruction must be adjacent, so there is no benefit for the
compiler or programmer to use them separately to make a function call.
And the last one can be annotated with R_LARCH_RELAX so we may relax it
into a bl instruction if possible.
On Tue, 2023-11-07 at 19:52 +0800, mengqinggang wrote:
> R_LARCH_CALL36 is used for medium code model function call pcaddu18i+jirl, and
> these two instructions must adjacent.
>
> The LoongArch ABI v2.20 at here: https://github.com/loongson/la-abi-specs.
> ---
> bfd/bfd-in2.h | 1 +
> bfd/elfnn-loongarch.c | 19 ++++++++++-----
> bfd/elfxx-loongarch.c | 24 +++++++++++++++++++
> bfd/libbfd.h | 1 +
> bfd/reloc.c | 3 +++
> gas/config/tc-loongarch.c | 6 ++++-
> gas/testsuite/gas/loongarch/medium-call.d | 15 ++++++++++++
> gas/testsuite/gas/loongarch/medium-call.s | 6 +++++
> include/elf/loongarch.h | 2 ++
> .../ld-loongarch-elf/ld-loongarch-elf.exp | 12 ++++++++++
> ld/testsuite/ld-loongarch-elf/medium-call.s | 7 ++++++
> 11 files changed, 89 insertions(+), 7 deletions(-)
> create mode 100644 gas/testsuite/gas/loongarch/medium-call.d
> create mode 100644 gas/testsuite/gas/loongarch/medium-call.s
> create mode 100644 ld/testsuite/ld-loongarch-elf/medium-call.s
>
> diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
> index 96eef92fdc7..83fbc6f0732 100644
> --- a/bfd/bfd-in2.h
> +++ b/bfd/bfd-in2.h
> @@ -7449,6 +7449,7 @@ enum bfd_reloc_code_real
> BFD_RELOC_LARCH_ADD_ULEB128,
> BFD_RELOC_LARCH_SUB_ULEB128,
> BFD_RELOC_LARCH_64_PCREL,
> + BFD_RELOC_LARCH_CALL36,
> BFD_RELOC_UNUSED
> };
> typedef enum bfd_reloc_code_real bfd_reloc_code_real_type;
> diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c
> index 18ad3cc91ca..c30d1e181e7 100644
> --- a/bfd/elfnn-loongarch.c
> +++ b/bfd/elfnn-loongarch.c
> @@ -780,6 +780,7 @@ loongarch_elf_check_relocs (bfd *abfd, struct bfd_link_info *info,
> case R_LARCH_B16:
> case R_LARCH_B21:
> case R_LARCH_B26:
> + case R_LARCH_CALL36:
> if (h != NULL)
> {
> h->needs_plt = 1;
> @@ -1884,20 +1885,24 @@ loongarch_check_offset (const Elf_Internal_Rela *rel,
> ret; \
> })
>
> +/* Write immediate to instructions. */
> +
> static bfd_reloc_status_type
> loongarch_reloc_rewrite_imm_insn (const Elf_Internal_Rela *rel,
> const asection *input_section ATTRIBUTE_UNUSED,
> reloc_howto_type *howto, bfd *input_bfd,
> bfd_byte *contents, bfd_vma reloc_val)
> {
> - int bits = bfd_get_reloc_size (howto) * 8;
> - uint32_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
> -
> + /* Adjust the immediate based on alignment and
> + its position in the instruction. */
> if (!loongarch_adjust_reloc_bitsfield (input_bfd, howto, &reloc_val))
> return bfd_reloc_overflow;
>
> - insn = (insn & (uint32_t)howto->src_mask)
> - | ((insn & (~(uint32_t)howto->dst_mask)) | reloc_val);
> + int bits = bfd_get_reloc_size (howto) * 8;
> + uint64_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
> +
> + /* Write immediate to instruction. */
> + insn = (insn & ~howto->dst_mask) | (reloc_val & howto->dst_mask);
>
> bfd_put (bits, input_bfd, insn, contents + rel->r_offset);
>
> @@ -2120,6 +2125,7 @@ perform_relocation (const Elf_Internal_Rela *rel, asection *input_section,
> case R_LARCH_TLS_GD_PC_HI20:
> case R_LARCH_TLS_GD_HI20:
> case R_LARCH_PCREL20_S2:
> + case R_LARCH_CALL36:
> r = loongarch_check_offset (rel, input_section);
> if (r != bfd_reloc_ok)
> break;
> @@ -3127,9 +3133,10 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info,
> break;
>
> /* New reloc types. */
> + case R_LARCH_B16:
> case R_LARCH_B21:
> case R_LARCH_B26:
> - case R_LARCH_B16:
> + case R_LARCH_CALL36:
> unresolved_reloc = false;
> if (is_undefweak)
> {
> diff --git a/bfd/elfxx-loongarch.c b/bfd/elfxx-loongarch.c
> index a970a257aa9..bf8bf3fc83d 100644
> --- a/bfd/elfxx-loongarch.c
> +++ b/bfd/elfxx-loongarch.c
> @@ -1547,6 +1547,24 @@ static loongarch_reloc_howto_type loongarch_howto_table[] =
> NULL, /* adjust_reloc_bits */
> NULL), /* larch_reloc_type_name */
>
> + /* Used for medium code model function call pcaddu18i+jirl,
> + these two instructions must adjacent. */
> + LOONGARCH_HOWTO (R_LARCH_CALL36, /* type (110). */
> + 2, /* rightshift. */
> + 8, /* size. */
> + 36, /* bitsize. */
> + true, /* pc_relative. */
> + 0, /* bitpos. */
> + complain_overflow_signed, /* complain_on_overflow. */
> + bfd_elf_generic_reloc, /* special_function. */
> + "R_LARCH_CALL36", /* name. */
> + false, /* partial_inplace. */
> + 0, /* src_mask. */
> + 0x03fffc0001ffffe0, /* dst_mask. */
> + false, /* pcrel_offset. */
> + BFD_RELOC_LARCH_CALL36, /* bfd_reloc_code_real_type. */
> + reloc_sign_bits, /* adjust_reloc_bits. */
> + "call36"), /* larch_reloc_type_name. */
> };
>
> reloc_howto_type *
> @@ -1729,6 +1747,12 @@ reloc_sign_bits (bfd *abfd, reloc_howto_type *howto, bfd_vma *fix_val)
> /* Perform insn bits field. 15:0<<10, 20:16>>16. */
> val = ((val & 0xffff) << 10) | ((val >> 16) & 0x1f);
> break;
> + case R_LARCH_CALL36:
> + /* 0x8000: If low 16-bit immediate greater than 0x7fff,
> + it become to a negative number due to sign-extended,
> + so the high part need to add 0x8000. */
> + val = (((val + 0x8000) >> 16) << 5) | (((val & 0xffff) << 10) << 32);
> + break;
> default:
> val <<= howto->bitpos;
> break;
> diff --git a/bfd/libbfd.h b/bfd/libbfd.h
> index fce0680f3db..6583c5601a7 100644
> --- a/bfd/libbfd.h
> +++ b/bfd/libbfd.h
> @@ -3596,6 +3596,7 @@ static const char *const bfd_reloc_code_real_names[] = { "@@uninitialized@@",
> "BFD_RELOC_LARCH_ADD_ULEB128",
> "BFD_RELOC_LARCH_SUB_ULEB128",
> "BFD_RELOC_LARCH_64_PCREL",
> + "BFD_RELOC_LARCH_CALL36",
> "@@overflow: BFD_RELOC_UNUSED@@",
> };
> #endif
> diff --git a/bfd/reloc.c b/bfd/reloc.c
> index 93ebad879e0..4d3ac4c1096 100644
> --- a/bfd/reloc.c
> +++ b/bfd/reloc.c
> @@ -8292,6 +8292,9 @@ ENUMX
> ENUMX
> BFD_RELOC_LARCH_64_PCREL
>
> +ENUMX
> + BFD_RELOC_LARCH_CALL36
> +
> ENUMDOC
> LARCH relocations.
>
> diff --git a/gas/config/tc-loongarch.c b/gas/config/tc-loongarch.c
> index 33f3e71ce2f..7ec1ed3fb8f 100644
> --- a/gas/config/tc-loongarch.c
> +++ b/gas/config/tc-loongarch.c
> @@ -687,7 +687,7 @@ loongarch_args_parser_can_match_arg_helper (char esc_ch1, char esc_ch2,
> esc_ch1, esc_ch2, bit_field, arg);
>
> if (ip->reloc_info[0].type >= BFD_RELOC_LARCH_B16
> - && ip->reloc_info[0].type < BFD_RELOC_LARCH_64_PCREL)
> + && ip->reloc_info[0].type < BFD_RELOC_UNUSED)
> {
> /* As we compact stack-relocs, it is no need for pop operation.
> But break out until here in order to check the imm field.
> @@ -959,6 +959,10 @@ move_insn (struct loongarch_cl_insn *insn, fragS *frag, long where)
> static void
> append_fixed_insn (struct loongarch_cl_insn *insn)
> {
> + /* Ensure the jirl is emitted to the same frag as the pcaddu18i. */
> + if (BFD_RELOC_LARCH_CALL36 == insn->reloc_info[0].type)
> + frag_grow (8);
> +
> char *f = frag_more (insn->insn_length);
> move_insn (insn, frag_now, f - frag_now->fr_literal);
> }
> diff --git a/gas/testsuite/gas/loongarch/medium-call.d b/gas/testsuite/gas/loongarch/medium-call.d
> new file mode 100644
> index 00000000000..4183818cb4f
> --- /dev/null
> +++ b/gas/testsuite/gas/loongarch/medium-call.d
> @@ -0,0 +1,15 @@
> +#as:
> +#objdump: -dr
> +
> +.*:[ ]+file format .*
> +
> +
> +Disassembly of section .text:
> +
> +.* <.text>:
> +[ ]+0:[ ]+1e000001[ ]+pcaddu18i[ ]+\$ra, 0
> +[ ]+0: R_LARCH_CALL36[ ]+a
> +[ ]+4:[ ]+4c000021[ ]+jirl[ ]+\$ra, \$ra, 0
> +[ ]+8:[ ]+1e00000c[ ]+pcaddu18i[ ]+\$t0, 0
> +[ ]+8: R_LARCH_CALL36[ ]+a
> +[ ]+c:[ ]+4c000180[ ]+jr[ ]+\$t0
> diff --git a/gas/testsuite/gas/loongarch/medium-call.s b/gas/testsuite/gas/loongarch/medium-call.s
> new file mode 100644
> index 00000000000..f2977d1c6d7
> --- /dev/null
> +++ b/gas/testsuite/gas/loongarch/medium-call.s
> @@ -0,0 +1,6 @@
> + # call .L1, r1(ra) temp register, r1(ra) return register.
> + pcaddu18i $r1, %call36(a)
> + jirl $r1, $r1, 0
> + # tail .L1, r12(t0) temp register, r0(zero) return register.
> + pcaddu18i $r12, %call36(a)
> + jirl $r0, $r12, 0
> diff --git a/include/elf/loongarch.h b/include/elf/loongarch.h
> index e31395e13d5..34719ee8b8c 100644
> --- a/include/elf/loongarch.h
> +++ b/include/elf/loongarch.h
> @@ -251,6 +251,8 @@ RELOC_NUMBER (R_LARCH_SUB_ULEB128, 108)
>
> RELOC_NUMBER (R_LARCH_64_PCREL, 109)
>
> +RELOC_NUMBER (R_LARCH_CALL36, 110)
> +
> END_RELOC_NUMBERS (R_LARCH_count)
>
> /* Processor specific flags for the ELF header e_flags field. */
> diff --git a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
> index b95cc53e597..1fc70d0a61e 100644
> --- a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
> +++ b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
> @@ -55,4 +55,16 @@ if [istarget "loongarch64-*-*"] {
> "64_pcrel" \
> ] \
> ]
> +
> + run_ld_link_tests \
> + [list \
> + [list \
> + "medium code model call" \
> + "-e 0x0" "" \
> + "" \
> + {medium-call.s} \
> + {} \
> + "medium-call" \
> + ] \
> + ]
> }
> diff --git a/ld/testsuite/ld-loongarch-elf/medium-call.s b/ld/testsuite/ld-loongarch-elf/medium-call.s
> new file mode 100644
> index 00000000000..4d1888b76a0
> --- /dev/null
> +++ b/ld/testsuite/ld-loongarch-elf/medium-call.s
> @@ -0,0 +1,7 @@
> +.L1:
> + # call .L1, r1(ra) temp register, r1(ra) return register.
> + pcaddu18i $r1, %call36(.L1)
> + jirl $r1, $r1, 0
> + # tail .L1, r12(t0) temp register, r0(zero) return register.
> + pcaddu18i $r12, %call36(.L1)
> + jirl $r0, $r12, 0
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-07 12:16 ` Xi Ruoyao
@ 2023-11-07 17:34 ` WANG Xuerui
2023-11-08 11:04 ` Xi Ruoyao
0 siblings, 1 reply; 8+ messages in thread
From: WANG Xuerui @ 2023-11-07 17:34 UTC (permalink / raw)
To: Xi Ruoyao, mengqinggang, binutils
Cc: xuchenghua, chenglulu, liuzhensong, i.swmail, maskray, cailulu,
luweining
On 11/7/23 20:16, Xi Ruoyao wrote:
> I'd like to have some pseudo instructions:
>
> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>
> These will make the work of the compiler (or assembly programmer)
> easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
> jirl instruction must be adjacent, so there is no benefit for the
> compiler or programmer to use them separately to make a function call.
>
> And the last one can be annotated with R_LARCH_RELAX so we may relax it
> into a bl instruction if possible.
The suggestion sounds reasonable! Although IMO a name like "call.36"
might look better, the name "call36" also works for me.
And regarding the proposed "call36 func" reusing the $ra for the
temporary -- IIUC this will break the subroutine return prediction. I
think we've been hit by similar regalloc in GCC and have since fixed
that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013. So maybe it's
necessary to specify a different temp register after all...
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-07 17:34 ` WANG Xuerui
@ 2023-11-08 11:04 ` Xi Ruoyao
2023-11-10 8:36 ` chenglulu
2023-11-17 1:40 ` mengqinggang
0 siblings, 2 replies; 8+ messages in thread
From: Xi Ruoyao @ 2023-11-08 11:04 UTC (permalink / raw)
To: WANG Xuerui, mengqinggang, binutils
Cc: xuchenghua, chenglulu, liuzhensong, maskray, cailulu, luweining
On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
> On 11/7/23 20:16, Xi Ruoyao wrote:
> > I'd like to have some pseudo instructions:
> >
> > - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
> > - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
> > - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
> >
> > These will make the work of the compiler (or assembly programmer)
> > easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
> > jirl instruction must be adjacent, so there is no benefit for the
> > compiler or programmer to use them separately to make a function call.
> >
> > And the last one can be annotated with R_LARCH_RELAX so we may relax it
> > into a bl instruction if possible.
>
> The suggestion sounds reasonable! Although IMO a name like "call.36"
> might look better, the name "call36" also works for me.
>
> And regarding the proposed "call36 func" reusing the $ra for the
> temporary -- IIUC this will break the subroutine return prediction. I
> think we've been hit by similar regalloc in GCC and have since fixed
> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
The URL is wrong, PR 11013 is 20-years old.
> So maybe it's necessary to specify a different temp register after
> all...
I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
should be OK, but for a sibcall (call36 $zero, $ra, func) it will
confuse the return predictor and we better use another temp register.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-08 11:04 ` Xi Ruoyao
@ 2023-11-10 8:36 ` chenglulu
2023-11-17 1:40 ` mengqinggang
1 sibling, 0 replies; 8+ messages in thread
From: chenglulu @ 2023-11-10 8:36 UTC (permalink / raw)
To: Xi Ruoyao, WANG Xuerui, mengqinggang, binutils
Cc: xuchenghua, liuzhensong, maskray, cailulu, luweining
在 2023/11/8 下午7:04, Xi Ruoyao 写道:
> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>> I'd like to have some pseudo instructions:
>>>
>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>
>>> These will make the work of the compiler (or assembly programmer)
>>> easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>> jirl instruction must be adjacent, so there is no benefit for the
>>> compiler or programmer to use them separately to make a function call.
>>>
>>> And the last one can be annotated with R_LARCH_RELAX so we may relax it
>>> into a bl instruction if possible.
>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>> might look better, the name "call36" also works for me.
>>
>> And regarding the proposed "call36 func" reusing the $ra for the
>> temporary -- IIUC this will break the subroutine return prediction. I
>> think we've been hit by similar regalloc in GCC and have since fixed
>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
> The URL is wrong, PR 11013 is 20-years old.
>
>> So maybe it's necessary to specify a different temp register after
>> all...
> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
Under what circumstances do we generate such a function call?:-[
> confuse the return predictor and we better use another temp register.
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-08 11:04 ` Xi Ruoyao
2023-11-10 8:36 ` chenglulu
@ 2023-11-17 1:40 ` mengqinggang
2023-11-17 4:44 ` WANG Xuerui
1 sibling, 1 reply; 8+ messages in thread
From: mengqinggang @ 2023-11-17 1:40 UTC (permalink / raw)
To: Xi Ruoyao, WANG Xuerui, binutils
Cc: xuchenghua, chenglulu, liuzhensong, maskray, cailulu, luweining
[-- Attachment #1: Type: text/plain, Size: 1738 bytes --]
We will add call36 and tail36 pseudo instructions in gas:
call36 f -> pcaddu18i $ra, %call36(f) + jirl $ra, $ra, 0
tail36 f -> pcaddu18i $t0, %call36(f) + jirl $zero, $t0, 0
在 2023/11/8 下午7:04, Xi Ruoyao 写道:
> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>> I'd like to have some pseudo instructions:
>>>
>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>
>>> These will make the work of the compiler (or assembly programmer)
>>> easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>> jirl instruction must be adjacent, so there is no benefit for the
>>> compiler or programmer to use them separately to make a function call.
>>>
>>> And the last one can be annotated with R_LARCH_RELAX so we may relax it
>>> into a bl instruction if possible.
>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>> might look better, the name "call36" also works for me.
>>
>> And regarding the proposed "call36 func" reusing the $ra for the
>> temporary -- IIUC this will break the subroutine return prediction. I
>> think we've been hit by similar regalloc in GCC and have since fixed
>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
> The URL is wrong, PR 11013 is 20-years old.
>
>> So maybe it's necessary to specify a different temp register after
>> all...
> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
> confuse the return predictor and we better use another temp register.
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-17 1:40 ` mengqinggang
@ 2023-11-17 4:44 ` WANG Xuerui
2023-11-17 9:00 ` chenglulu
0 siblings, 1 reply; 8+ messages in thread
From: WANG Xuerui @ 2023-11-17 4:44 UTC (permalink / raw)
To: mengqinggang, Xi Ruoyao, WANG Xuerui, binutils
Cc: xuchenghua, chenglulu, liuzhensong, maskray, cailulu, luweining
On 11/17/23 09:40, mengqinggang wrote:
> We will add call36 and tail36 pseudo instructions in gas:
> call36 f -> pcaddu18i $ra, %call36(f) + jirl $ra, $ra, 0
> tail36 f -> pcaddu18i $t0, %call36(f) + jirl $zero, $t0, 0
>
>
> 在 2023/11/8 下午7:04, Xi Ruoyao 写道:
>> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>>> I'd like to have some pseudo instructions:
>>>>
>>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>>
>>>> These will make the work of the compiler (or assembly programmer)
>>>> easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>>> jirl instruction must be adjacent, so there is no benefit for the
>>>> compiler or programmer to use them separately to make a function call.
>>>>
>>>> And the last one can be annotated with R_LARCH_RELAX so we may
>>>> relax it
>>>> into a bl instruction if possible.
>>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>>> might look better, the name "call36" also works for me.
>>>
>>> And regarding the proposed "call36 func" reusing the $ra for the
>>> temporary -- IIUC this will break the subroutine return prediction. I
>>> think we've been hit by similar regalloc in GCC and have since fixed
>>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
>> The URL is wrong, PR 11013 is 20-years old.
>>
>>> So maybe it's necessary to specify a different temp register after
>>> all...
>> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
>> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
>> confuse the return predictor and we better use another temp register.
Hmm I found out the correct bug number (it was a copy-paste mistake):
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110136
So the main point is: is any jirl in the "jirl <any>, $ra, <any>" form
able to trigger the return stack optimization, or is it just "jirl
$zero, $ra, 0" i.e. "ret"? If the branch predictors in popular LoongArch
models are smart enough to not consider "jirl $ra, $ra, 0" also as a
"ret", then re-using the $ra as scratch space is okay. Otherwise we have
to make the pseudo-insn take another temp register (and optionally
disallow/warn usage of $ra in that place).
BTW, could we have the RISC-V-like bare "call" and "tail" too? It could
be useful for more uniform asm among different code models ("bl" vs
"pcaddu18i + jirl" vs "lu12i + addi + lu32i + lu52i + jirl"), apart from
lowering learning curve for those with some RISC-V asm background. We
could make "call" and "tail" behavior vary based on the code model
chosen, and also provide explicit control via more pseudo-insns like
"{call,tail}26" and "{call,tail}64" apart from "{call,tail}36".
(The above is a suggestion for you to consider; it's not immediately
necessary for the medium code model work at hand, just for symmetry and
enabling users to gain more control over their asm.)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
2023-11-17 4:44 ` WANG Xuerui
@ 2023-11-17 9:00 ` chenglulu
0 siblings, 0 replies; 8+ messages in thread
From: chenglulu @ 2023-11-17 9:00 UTC (permalink / raw)
To: WANG Xuerui, mengqinggang, Xi Ruoyao, binutils
Cc: xuchenghua, liuzhensong, maskray, cailulu, luweining
在 2023/11/17 下午12:44, WANG Xuerui 写道:
> On 11/17/23 09:40, mengqinggang wrote:
>> We will add call36 and tail36 pseudo instructions in gas:
>> call36 f -> pcaddu18i $ra, %call36(f) + jirl $ra, $ra, 0
>> tail36 f -> pcaddu18i $t0, %call36(f) + jirl $zero, $t0, 0
>>
>>
>> 在 2023/11/8 下午7:04, Xi Ruoyao 写道:
>>> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>>>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>>>> I'd like to have some pseudo instructions:
>>>>>
>>>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>>>
>>>>> These will make the work of the compiler (or assembly programmer)
>>>>> easier. Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>>>> jirl instruction must be adjacent, so there is no benefit for the
>>>>> compiler or programmer to use them separately to make a function
>>>>> call.
>>>>>
>>>>> And the last one can be annotated with R_LARCH_RELAX so we may
>>>>> relax it
>>>>> into a bl instruction if possible.
>>>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>>>> might look better, the name "call36" also works for me.
>>>>
>>>> And regarding the proposed "call36 func" reusing the $ra for the
>>>> temporary -- IIUC this will break the subroutine return prediction. I
>>>> think we've been hit by similar regalloc in GCC and have since fixed
>>>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
>>> The URL is wrong, PR 11013 is 20-years old.
>>>
>>>> So maybe it's necessary to specify a different temp register after
>>>> all...
>>> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
>>> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
>>> confuse the return predictor and we better use another temp register.
>
> Hmm I found out the correct bug number (it was a copy-paste mistake):
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110136
>
> So the main point is: is any jirl in the "jirl <any>, $ra, <any>" form
> able to trigger the return stack optimization, or is it just "jirl
> $zero, $ra, 0" i.e. "ret"? If the branch predictors in popular
> LoongArch models are smart enough to not consider "jirl $ra, $ra, 0"
> also as a "ret", then re-using the $ra as scratch space is okay.
> Otherwise we have to make the pseudo-insn take another temp register
> (and optionally disallow/warn usage of $ra in that place).
I don't quite understand the circumstances of this worrying sequence of
instructions.
Now we have two scenarios:
1. call36 (pcaddu18i $ra, %call36(func)+jirl $ra, $ra,0)
2. tail36 (pcaddu18i $t0, %call36(func)+jirl $r0,$t0,0)
In neither case will 'jirl $r0,$ra,0' be generated.
>
> BTW, could we have the RISC-V-like bare "call" and "tail" too? It
> could be useful for more uniform asm among different code models ("bl"
> vs "pcaddu18i + jirl" vs "lu12i + addi + lu32i + lu52i + jirl"), apart
> from lowering learning curve for those with some RISC-V asm
> background. We could make "call" and "tail" behavior vary based on the
> code model chosen, and also provide explicit control via more
> pseudo-insns like "{call,tail}26" and "{call,tail}64" apart from
> "{call,tail}36".
>
> (The above is a suggestion for you to consider; it's not immediately
> necessary for the medium code model work at hand, just for symmetry
> and enabling users to gain more control over their asm.)
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-11-17 9:00 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-07 11:52 [PATCH] LoongArch: Add new relocation R_LARCH_CALL36 mengqinggang
2023-11-07 12:16 ` Xi Ruoyao
2023-11-07 17:34 ` WANG Xuerui
2023-11-08 11:04 ` Xi Ruoyao
2023-11-10 8:36 ` chenglulu
2023-11-17 1:40 ` mengqinggang
2023-11-17 4:44 ` WANG Xuerui
2023-11-17 9:00 ` chenglulu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).