public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
@ 2023-11-07 11:52 mengqinggang
  2023-11-07 12:16 ` Xi Ruoyao
  0 siblings, 1 reply; 8+ messages in thread
From: mengqinggang @ 2023-11-07 11:52 UTC (permalink / raw)
  To: binutils
  Cc: xuchenghua, chenglulu, liuzhensong, xry111, i.swmail, maskray,
	cailulu, luweining, mengqinggang

R_LARCH_CALL36 is used for medium code model function call pcaddu18i+jirl, and
these two instructions must adjacent.

The LoongArch ABI v2.20 at here: https://github.com/loongson/la-abi-specs.
---
 bfd/bfd-in2.h                                 |  1 +
 bfd/elfnn-loongarch.c                         | 19 ++++++++++-----
 bfd/elfxx-loongarch.c                         | 24 +++++++++++++++++++
 bfd/libbfd.h                                  |  1 +
 bfd/reloc.c                                   |  3 +++
 gas/config/tc-loongarch.c                     |  6 ++++-
 gas/testsuite/gas/loongarch/medium-call.d     | 15 ++++++++++++
 gas/testsuite/gas/loongarch/medium-call.s     |  6 +++++
 include/elf/loongarch.h                       |  2 ++
 .../ld-loongarch-elf/ld-loongarch-elf.exp     | 12 ++++++++++
 ld/testsuite/ld-loongarch-elf/medium-call.s   |  7 ++++++
 11 files changed, 89 insertions(+), 7 deletions(-)
 create mode 100644 gas/testsuite/gas/loongarch/medium-call.d
 create mode 100644 gas/testsuite/gas/loongarch/medium-call.s
 create mode 100644 ld/testsuite/ld-loongarch-elf/medium-call.s

diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 96eef92fdc7..83fbc6f0732 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -7449,6 +7449,7 @@ enum bfd_reloc_code_real
   BFD_RELOC_LARCH_ADD_ULEB128,
   BFD_RELOC_LARCH_SUB_ULEB128,
   BFD_RELOC_LARCH_64_PCREL,
+  BFD_RELOC_LARCH_CALL36,
   BFD_RELOC_UNUSED
 };
 typedef enum bfd_reloc_code_real bfd_reloc_code_real_type;
diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c
index 18ad3cc91ca..c30d1e181e7 100644
--- a/bfd/elfnn-loongarch.c
+++ b/bfd/elfnn-loongarch.c
@@ -780,6 +780,7 @@ loongarch_elf_check_relocs (bfd *abfd, struct bfd_link_info *info,
 	case R_LARCH_B16:
 	case R_LARCH_B21:
 	case R_LARCH_B26:
+	case R_LARCH_CALL36:
 	  if (h != NULL)
 	    {
 	      h->needs_plt = 1;
@@ -1884,20 +1885,24 @@ loongarch_check_offset (const Elf_Internal_Rela *rel,
     ret;					      \
    })
 
+/* Write immediate to instructions.  */
+
 static bfd_reloc_status_type
 loongarch_reloc_rewrite_imm_insn (const Elf_Internal_Rela *rel,
 				  const asection *input_section ATTRIBUTE_UNUSED,
 				  reloc_howto_type *howto, bfd *input_bfd,
 				  bfd_byte *contents, bfd_vma reloc_val)
 {
-  int bits = bfd_get_reloc_size (howto) * 8;
-  uint32_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
-
+  /* Adjust the immediate based on alignment and
+     its position in the instruction.  */
   if (!loongarch_adjust_reloc_bitsfield (input_bfd, howto, &reloc_val))
     return bfd_reloc_overflow;
 
-  insn = (insn & (uint32_t)howto->src_mask)
-    | ((insn & (~(uint32_t)howto->dst_mask)) | reloc_val);
+  int bits = bfd_get_reloc_size (howto) * 8;
+  uint64_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
+
+  /* Write immediate to instruction.  */
+  insn = (insn & ~howto->dst_mask) | (reloc_val & howto->dst_mask);
 
   bfd_put (bits, input_bfd, insn, contents + rel->r_offset);
 
@@ -2120,6 +2125,7 @@ perform_relocation (const Elf_Internal_Rela *rel, asection *input_section,
     case R_LARCH_TLS_GD_PC_HI20:
     case R_LARCH_TLS_GD_HI20:
     case R_LARCH_PCREL20_S2:
+    case R_LARCH_CALL36:
       r = loongarch_check_offset (rel, input_section);
       if (r != bfd_reloc_ok)
 	break;
@@ -3127,9 +3133,10 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info,
 	  break;
 
 	/* New reloc types.  */
+	case R_LARCH_B16:
 	case R_LARCH_B21:
 	case R_LARCH_B26:
-	case R_LARCH_B16:
+	case R_LARCH_CALL36:
 	  unresolved_reloc = false;
 	  if (is_undefweak)
 	    {
diff --git a/bfd/elfxx-loongarch.c b/bfd/elfxx-loongarch.c
index a970a257aa9..bf8bf3fc83d 100644
--- a/bfd/elfxx-loongarch.c
+++ b/bfd/elfxx-loongarch.c
@@ -1547,6 +1547,24 @@ static loongarch_reloc_howto_type loongarch_howto_table[] =
 	 NULL,					/* adjust_reloc_bits */
 	 NULL),					/* larch_reloc_type_name */
 
+  /* Used for medium code model function call pcaddu18i+jirl,
+     these two instructions must adjacent.  */
+  LOONGARCH_HOWTO (R_LARCH_CALL36,		/* type (110).  */
+	 2,					/* rightshift.  */
+	 8,					/* size.  */
+	 36,					/* bitsize.  */
+	 true,					/* pc_relative.  */
+	 0,					/* bitpos.  */
+	 complain_overflow_signed,		/* complain_on_overflow.  */
+	 bfd_elf_generic_reloc,			/* special_function.  */
+	 "R_LARCH_CALL36",			/* name.  */
+	 false,					/* partial_inplace.  */
+	 0,					/* src_mask.  */
+	 0x03fffc0001ffffe0,			/* dst_mask.  */
+	 false,					/* pcrel_offset.  */
+	 BFD_RELOC_LARCH_CALL36,		/* bfd_reloc_code_real_type.  */
+	 reloc_sign_bits,			/* adjust_reloc_bits.  */
+	 "call36"),				/* larch_reloc_type_name.  */
 };
 
 reloc_howto_type *
@@ -1729,6 +1747,12 @@ reloc_sign_bits (bfd *abfd, reloc_howto_type *howto, bfd_vma *fix_val)
       /* Perform insn bits field. 15:0<<10, 20:16>>16.  */
       val = ((val & 0xffff) << 10) | ((val >> 16) & 0x1f);
       break;
+    case R_LARCH_CALL36:
+      /* 0x8000: If low 16-bit immediate greater than 0x7fff,
+	 it become to a negative number due to sign-extended,
+	 so the high part need to add 0x8000.  */
+      val = (((val + 0x8000) >> 16) << 5) | (((val & 0xffff) << 10) << 32);
+      break;
     default:
       val <<= howto->bitpos;
       break;
diff --git a/bfd/libbfd.h b/bfd/libbfd.h
index fce0680f3db..6583c5601a7 100644
--- a/bfd/libbfd.h
+++ b/bfd/libbfd.h
@@ -3596,6 +3596,7 @@ static const char *const bfd_reloc_code_real_names[] = { "@@uninitialized@@",
   "BFD_RELOC_LARCH_ADD_ULEB128",
   "BFD_RELOC_LARCH_SUB_ULEB128",
   "BFD_RELOC_LARCH_64_PCREL",
+  "BFD_RELOC_LARCH_CALL36",
  "@@overflow: BFD_RELOC_UNUSED@@",
 };
 #endif
diff --git a/bfd/reloc.c b/bfd/reloc.c
index 93ebad879e0..4d3ac4c1096 100644
--- a/bfd/reloc.c
+++ b/bfd/reloc.c
@@ -8292,6 +8292,9 @@ ENUMX
 ENUMX
   BFD_RELOC_LARCH_64_PCREL
 
+ENUMX
+  BFD_RELOC_LARCH_CALL36
+
 ENUMDOC
   LARCH relocations.
 
diff --git a/gas/config/tc-loongarch.c b/gas/config/tc-loongarch.c
index 33f3e71ce2f..7ec1ed3fb8f 100644
--- a/gas/config/tc-loongarch.c
+++ b/gas/config/tc-loongarch.c
@@ -687,7 +687,7 @@ loongarch_args_parser_can_match_arg_helper (char esc_ch1, char esc_ch2,
 		      esc_ch1, esc_ch2, bit_field, arg);
 
 	  if (ip->reloc_info[0].type >= BFD_RELOC_LARCH_B16
-	      && ip->reloc_info[0].type < BFD_RELOC_LARCH_64_PCREL)
+	      && ip->reloc_info[0].type < BFD_RELOC_UNUSED)
 	    {
 	      /* As we compact stack-relocs, it is no need for pop operation.
 		 But break out until here in order to check the imm field.
@@ -959,6 +959,10 @@ move_insn (struct loongarch_cl_insn *insn, fragS *frag, long where)
 static void
 append_fixed_insn (struct loongarch_cl_insn *insn)
 {
+  /* Ensure the jirl is emitted to the same frag as the pcaddu18i.  */
+  if (BFD_RELOC_LARCH_CALL36 == insn->reloc_info[0].type)
+    frag_grow (8);
+
   char *f = frag_more (insn->insn_length);
   move_insn (insn, frag_now, f - frag_now->fr_literal);
 }
diff --git a/gas/testsuite/gas/loongarch/medium-call.d b/gas/testsuite/gas/loongarch/medium-call.d
new file mode 100644
index 00000000000..4183818cb4f
--- /dev/null
+++ b/gas/testsuite/gas/loongarch/medium-call.d
@@ -0,0 +1,15 @@
+#as:
+#objdump: -dr
+
+.*:[    ]+file format .*
+
+
+Disassembly of section .text:
+
+.* <.text>:
+[ 	]+0:[ 	]+1e000001[ 	]+pcaddu18i[ 	]+\$ra, 0
+[ 	]+0: R_LARCH_CALL36[ 	]+a
+[ 	]+4:[ 	]+4c000021[ 	]+jirl[ 	]+\$ra, \$ra, 0
+[ 	]+8:[ 	]+1e00000c[ 	]+pcaddu18i[ 	]+\$t0, 0
+[ 	]+8: R_LARCH_CALL36[ 	]+a
+[ 	]+c:[ 	]+4c000180[ 	]+jr[ 	]+\$t0
diff --git a/gas/testsuite/gas/loongarch/medium-call.s b/gas/testsuite/gas/loongarch/medium-call.s
new file mode 100644
index 00000000000..f2977d1c6d7
--- /dev/null
+++ b/gas/testsuite/gas/loongarch/medium-call.s
@@ -0,0 +1,6 @@
+  # call .L1, r1(ra) temp register, r1(ra) return register.
+  pcaddu18i $r1, %call36(a)
+  jirl	    $r1, $r1, 0
+  # tail .L1, r12(t0) temp register, r0(zero) return register.
+  pcaddu18i $r12, %call36(a)
+  jirl	    $r0, $r12, 0
diff --git a/include/elf/loongarch.h b/include/elf/loongarch.h
index e31395e13d5..34719ee8b8c 100644
--- a/include/elf/loongarch.h
+++ b/include/elf/loongarch.h
@@ -251,6 +251,8 @@ RELOC_NUMBER (R_LARCH_SUB_ULEB128, 108)
 
 RELOC_NUMBER (R_LARCH_64_PCREL, 109)
 
+RELOC_NUMBER (R_LARCH_CALL36, 110)
+
 END_RELOC_NUMBERS (R_LARCH_count)
 
 /* Processor specific flags for the ELF header e_flags field.  */
diff --git a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
index b95cc53e597..1fc70d0a61e 100644
--- a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
+++ b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
@@ -55,4 +55,16 @@ if [istarget "loongarch64-*-*"] {
 	      "64_pcrel" \
 	  ] \
       ]
+
+  run_ld_link_tests \
+      [list \
+	  [list \
+	      "medium code model call" \
+	      "-e 0x0" "" \
+	      "" \
+	      {medium-call.s} \
+	      {} \
+	      "medium-call" \
+	  ] \
+      ]
 }
diff --git a/ld/testsuite/ld-loongarch-elf/medium-call.s b/ld/testsuite/ld-loongarch-elf/medium-call.s
new file mode 100644
index 00000000000..4d1888b76a0
--- /dev/null
+++ b/ld/testsuite/ld-loongarch-elf/medium-call.s
@@ -0,0 +1,7 @@
+.L1:
+  # call .L1, r1(ra) temp register, r1(ra) return register.
+  pcaddu18i $r1, %call36(.L1)
+  jirl	    $r1, $r1, 0
+  # tail .L1, r12(t0) temp register, r0(zero) return register.
+  pcaddu18i $r12, %call36(.L1)
+  jirl	    $r0, $r12, 0
-- 
2.31.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-07 11:52 [PATCH] LoongArch: Add new relocation R_LARCH_CALL36 mengqinggang
@ 2023-11-07 12:16 ` Xi Ruoyao
  2023-11-07 17:34   ` WANG Xuerui
  0 siblings, 1 reply; 8+ messages in thread
From: Xi Ruoyao @ 2023-11-07 12:16 UTC (permalink / raw)
  To: mengqinggang, binutils
  Cc: xuchenghua, chenglulu, liuzhensong, i.swmail, maskray, cailulu,
	luweining

I'd like to have some pseudo instructions:

- call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
- call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
- call36 func -> pcaddu18i ra, func + jirl ra, ra, func

These will make the work of the compiler (or assembly programmer)
easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
jirl instruction must be adjacent, so there is no benefit for the
compiler or programmer to use them separately to make a function call.

And the last one can be annotated with R_LARCH_RELAX so we may relax it
into a bl instruction if possible.

On Tue, 2023-11-07 at 19:52 +0800, mengqinggang wrote:
> R_LARCH_CALL36 is used for medium code model function call pcaddu18i+jirl, and
> these two instructions must adjacent.
> 
> The LoongArch ABI v2.20 at here: https://github.com/loongson/la-abi-specs.
> ---
>  bfd/bfd-in2.h                                 |  1 +
>  bfd/elfnn-loongarch.c                         | 19 ++++++++++-----
>  bfd/elfxx-loongarch.c                         | 24 +++++++++++++++++++
>  bfd/libbfd.h                                  |  1 +
>  bfd/reloc.c                                   |  3 +++
>  gas/config/tc-loongarch.c                     |  6 ++++-
>  gas/testsuite/gas/loongarch/medium-call.d     | 15 ++++++++++++
>  gas/testsuite/gas/loongarch/medium-call.s     |  6 +++++
>  include/elf/loongarch.h                       |  2 ++
>  .../ld-loongarch-elf/ld-loongarch-elf.exp     | 12 ++++++++++
>  ld/testsuite/ld-loongarch-elf/medium-call.s   |  7 ++++++
>  11 files changed, 89 insertions(+), 7 deletions(-)
>  create mode 100644 gas/testsuite/gas/loongarch/medium-call.d
>  create mode 100644 gas/testsuite/gas/loongarch/medium-call.s
>  create mode 100644 ld/testsuite/ld-loongarch-elf/medium-call.s
> 
> diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
> index 96eef92fdc7..83fbc6f0732 100644
> --- a/bfd/bfd-in2.h
> +++ b/bfd/bfd-in2.h
> @@ -7449,6 +7449,7 @@ enum bfd_reloc_code_real
>    BFD_RELOC_LARCH_ADD_ULEB128,
>    BFD_RELOC_LARCH_SUB_ULEB128,
>    BFD_RELOC_LARCH_64_PCREL,
> +  BFD_RELOC_LARCH_CALL36,
>    BFD_RELOC_UNUSED
>  };
>  typedef enum bfd_reloc_code_real bfd_reloc_code_real_type;
> diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c
> index 18ad3cc91ca..c30d1e181e7 100644
> --- a/bfd/elfnn-loongarch.c
> +++ b/bfd/elfnn-loongarch.c
> @@ -780,6 +780,7 @@ loongarch_elf_check_relocs (bfd *abfd, struct bfd_link_info *info,
>  	case R_LARCH_B16:
>  	case R_LARCH_B21:
>  	case R_LARCH_B26:
> +	case R_LARCH_CALL36:
>  	  if (h != NULL)
>  	    {
>  	      h->needs_plt = 1;
> @@ -1884,20 +1885,24 @@ loongarch_check_offset (const Elf_Internal_Rela *rel,
>      ret;					      \
>     })
>  
> +/* Write immediate to instructions.  */
> +
>  static bfd_reloc_status_type
>  loongarch_reloc_rewrite_imm_insn (const Elf_Internal_Rela *rel,
>  				  const asection *input_section ATTRIBUTE_UNUSED,
>  				  reloc_howto_type *howto, bfd *input_bfd,
>  				  bfd_byte *contents, bfd_vma reloc_val)
>  {
> -  int bits = bfd_get_reloc_size (howto) * 8;
> -  uint32_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
> -
> +  /* Adjust the immediate based on alignment and
> +     its position in the instruction.  */
>    if (!loongarch_adjust_reloc_bitsfield (input_bfd, howto, &reloc_val))
>      return bfd_reloc_overflow;
>  
> -  insn = (insn & (uint32_t)howto->src_mask)
> -    | ((insn & (~(uint32_t)howto->dst_mask)) | reloc_val);
> +  int bits = bfd_get_reloc_size (howto) * 8;
> +  uint64_t insn = bfd_get (bits, input_bfd, contents + rel->r_offset);
> +
> +  /* Write immediate to instruction.  */
> +  insn = (insn & ~howto->dst_mask) | (reloc_val & howto->dst_mask);
>  
>    bfd_put (bits, input_bfd, insn, contents + rel->r_offset);
>  
> @@ -2120,6 +2125,7 @@ perform_relocation (const Elf_Internal_Rela *rel, asection *input_section,
>      case R_LARCH_TLS_GD_PC_HI20:
>      case R_LARCH_TLS_GD_HI20:
>      case R_LARCH_PCREL20_S2:
> +    case R_LARCH_CALL36:
>        r = loongarch_check_offset (rel, input_section);
>        if (r != bfd_reloc_ok)
>  	break;
> @@ -3127,9 +3133,10 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info,
>  	  break;
>  
>  	/* New reloc types.  */
> +	case R_LARCH_B16:
>  	case R_LARCH_B21:
>  	case R_LARCH_B26:
> -	case R_LARCH_B16:
> +	case R_LARCH_CALL36:
>  	  unresolved_reloc = false;
>  	  if (is_undefweak)
>  	    {
> diff --git a/bfd/elfxx-loongarch.c b/bfd/elfxx-loongarch.c
> index a970a257aa9..bf8bf3fc83d 100644
> --- a/bfd/elfxx-loongarch.c
> +++ b/bfd/elfxx-loongarch.c
> @@ -1547,6 +1547,24 @@ static loongarch_reloc_howto_type loongarch_howto_table[] =
>  	 NULL,					/* adjust_reloc_bits */
>  	 NULL),					/* larch_reloc_type_name */
>  
> +  /* Used for medium code model function call pcaddu18i+jirl,
> +     these two instructions must adjacent.  */
> +  LOONGARCH_HOWTO (R_LARCH_CALL36,		/* type (110).  */
> +	 2,					/* rightshift.  */
> +	 8,					/* size.  */
> +	 36,					/* bitsize.  */
> +	 true,					/* pc_relative.  */
> +	 0,					/* bitpos.  */
> +	 complain_overflow_signed,		/* complain_on_overflow.  */
> +	 bfd_elf_generic_reloc,			/* special_function.  */
> +	 "R_LARCH_CALL36",			/* name.  */
> +	 false,					/* partial_inplace.  */
> +	 0,					/* src_mask.  */
> +	 0x03fffc0001ffffe0,			/* dst_mask.  */
> +	 false,					/* pcrel_offset.  */
> +	 BFD_RELOC_LARCH_CALL36,		/* bfd_reloc_code_real_type.  */
> +	 reloc_sign_bits,			/* adjust_reloc_bits.  */
> +	 "call36"),				/* larch_reloc_type_name.  */
>  };
>  
>  reloc_howto_type *
> @@ -1729,6 +1747,12 @@ reloc_sign_bits (bfd *abfd, reloc_howto_type *howto, bfd_vma *fix_val)
>        /* Perform insn bits field. 15:0<<10, 20:16>>16.  */
>        val = ((val & 0xffff) << 10) | ((val >> 16) & 0x1f);
>        break;
> +    case R_LARCH_CALL36:
> +      /* 0x8000: If low 16-bit immediate greater than 0x7fff,
> +	 it become to a negative number due to sign-extended,
> +	 so the high part need to add 0x8000.  */
> +      val = (((val + 0x8000) >> 16) << 5) | (((val & 0xffff) << 10) << 32);
> +      break;
>      default:
>        val <<= howto->bitpos;
>        break;
> diff --git a/bfd/libbfd.h b/bfd/libbfd.h
> index fce0680f3db..6583c5601a7 100644
> --- a/bfd/libbfd.h
> +++ b/bfd/libbfd.h
> @@ -3596,6 +3596,7 @@ static const char *const bfd_reloc_code_real_names[] = { "@@uninitialized@@",
>    "BFD_RELOC_LARCH_ADD_ULEB128",
>    "BFD_RELOC_LARCH_SUB_ULEB128",
>    "BFD_RELOC_LARCH_64_PCREL",
> +  "BFD_RELOC_LARCH_CALL36",
>   "@@overflow: BFD_RELOC_UNUSED@@",
>  };
>  #endif
> diff --git a/bfd/reloc.c b/bfd/reloc.c
> index 93ebad879e0..4d3ac4c1096 100644
> --- a/bfd/reloc.c
> +++ b/bfd/reloc.c
> @@ -8292,6 +8292,9 @@ ENUMX
>  ENUMX
>    BFD_RELOC_LARCH_64_PCREL
>  
> +ENUMX
> +  BFD_RELOC_LARCH_CALL36
> +
>  ENUMDOC
>    LARCH relocations.
>  
> diff --git a/gas/config/tc-loongarch.c b/gas/config/tc-loongarch.c
> index 33f3e71ce2f..7ec1ed3fb8f 100644
> --- a/gas/config/tc-loongarch.c
> +++ b/gas/config/tc-loongarch.c
> @@ -687,7 +687,7 @@ loongarch_args_parser_can_match_arg_helper (char esc_ch1, char esc_ch2,
>  		      esc_ch1, esc_ch2, bit_field, arg);
>  
>  	  if (ip->reloc_info[0].type >= BFD_RELOC_LARCH_B16
> -	      && ip->reloc_info[0].type < BFD_RELOC_LARCH_64_PCREL)
> +	      && ip->reloc_info[0].type < BFD_RELOC_UNUSED)
>  	    {
>  	      /* As we compact stack-relocs, it is no need for pop operation.
>  		 But break out until here in order to check the imm field.
> @@ -959,6 +959,10 @@ move_insn (struct loongarch_cl_insn *insn, fragS *frag, long where)
>  static void
>  append_fixed_insn (struct loongarch_cl_insn *insn)
>  {
> +  /* Ensure the jirl is emitted to the same frag as the pcaddu18i.  */
> +  if (BFD_RELOC_LARCH_CALL36 == insn->reloc_info[0].type)
> +    frag_grow (8);
> +
>    char *f = frag_more (insn->insn_length);
>    move_insn (insn, frag_now, f - frag_now->fr_literal);
>  }
> diff --git a/gas/testsuite/gas/loongarch/medium-call.d b/gas/testsuite/gas/loongarch/medium-call.d
> new file mode 100644
> index 00000000000..4183818cb4f
> --- /dev/null
> +++ b/gas/testsuite/gas/loongarch/medium-call.d
> @@ -0,0 +1,15 @@
> +#as:
> +#objdump: -dr
> +
> +.*:[    ]+file format .*
> +
> +
> +Disassembly of section .text:
> +
> +.* <.text>:
> +[ 	]+0:[ 	]+1e000001[ 	]+pcaddu18i[ 	]+\$ra, 0
> +[ 	]+0: R_LARCH_CALL36[ 	]+a
> +[ 	]+4:[ 	]+4c000021[ 	]+jirl[ 	]+\$ra, \$ra, 0
> +[ 	]+8:[ 	]+1e00000c[ 	]+pcaddu18i[ 	]+\$t0, 0
> +[ 	]+8: R_LARCH_CALL36[ 	]+a
> +[ 	]+c:[ 	]+4c000180[ 	]+jr[ 	]+\$t0
> diff --git a/gas/testsuite/gas/loongarch/medium-call.s b/gas/testsuite/gas/loongarch/medium-call.s
> new file mode 100644
> index 00000000000..f2977d1c6d7
> --- /dev/null
> +++ b/gas/testsuite/gas/loongarch/medium-call.s
> @@ -0,0 +1,6 @@
> +  # call .L1, r1(ra) temp register, r1(ra) return register.
> +  pcaddu18i $r1, %call36(a)
> +  jirl	    $r1, $r1, 0
> +  # tail .L1, r12(t0) temp register, r0(zero) return register.
> +  pcaddu18i $r12, %call36(a)
> +  jirl	    $r0, $r12, 0
> diff --git a/include/elf/loongarch.h b/include/elf/loongarch.h
> index e31395e13d5..34719ee8b8c 100644
> --- a/include/elf/loongarch.h
> +++ b/include/elf/loongarch.h
> @@ -251,6 +251,8 @@ RELOC_NUMBER (R_LARCH_SUB_ULEB128, 108)
>  
>  RELOC_NUMBER (R_LARCH_64_PCREL, 109)
>  
> +RELOC_NUMBER (R_LARCH_CALL36, 110)
> +
>  END_RELOC_NUMBERS (R_LARCH_count)
>  
>  /* Processor specific flags for the ELF header e_flags field.  */
> diff --git a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
> index b95cc53e597..1fc70d0a61e 100644
> --- a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
> +++ b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
> @@ -55,4 +55,16 @@ if [istarget "loongarch64-*-*"] {
>  	      "64_pcrel" \
>  	  ] \
>        ]
> +
> +  run_ld_link_tests \
> +      [list \
> +	  [list \
> +	      "medium code model call" \
> +	      "-e 0x0" "" \
> +	      "" \
> +	      {medium-call.s} \
> +	      {} \
> +	      "medium-call" \
> +	  ] \
> +      ]
>  }
> diff --git a/ld/testsuite/ld-loongarch-elf/medium-call.s b/ld/testsuite/ld-loongarch-elf/medium-call.s
> new file mode 100644
> index 00000000000..4d1888b76a0
> --- /dev/null
> +++ b/ld/testsuite/ld-loongarch-elf/medium-call.s
> @@ -0,0 +1,7 @@
> +.L1:
> +  # call .L1, r1(ra) temp register, r1(ra) return register.
> +  pcaddu18i $r1, %call36(.L1)
> +  jirl	    $r1, $r1, 0
> +  # tail .L1, r12(t0) temp register, r0(zero) return register.
> +  pcaddu18i $r12, %call36(.L1)
> +  jirl	    $r0, $r12, 0

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-07 12:16 ` Xi Ruoyao
@ 2023-11-07 17:34   ` WANG Xuerui
  2023-11-08 11:04     ` Xi Ruoyao
  0 siblings, 1 reply; 8+ messages in thread
From: WANG Xuerui @ 2023-11-07 17:34 UTC (permalink / raw)
  To: Xi Ruoyao, mengqinggang, binutils
  Cc: xuchenghua, chenglulu, liuzhensong, i.swmail, maskray, cailulu,
	luweining

On 11/7/23 20:16, Xi Ruoyao wrote:
> I'd like to have some pseudo instructions:
>
> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>
> These will make the work of the compiler (or assembly programmer)
> easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
> jirl instruction must be adjacent, so there is no benefit for the
> compiler or programmer to use them separately to make a function call.
>
> And the last one can be annotated with R_LARCH_RELAX so we may relax it
> into a bl instruction if possible.

The suggestion sounds reasonable! Although IMO a name like "call.36" 
might look better, the name "call36" also works for me.

And regarding the proposed "call36 func" reusing the $ra for the 
temporary -- IIUC this will break the subroutine return prediction. I 
think we've been hit by similar regalloc in GCC and have since fixed 
that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013. So maybe it's 
necessary to specify a different temp register after all...


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-07 17:34   ` WANG Xuerui
@ 2023-11-08 11:04     ` Xi Ruoyao
  2023-11-10  8:36       ` chenglulu
  2023-11-17  1:40       ` mengqinggang
  0 siblings, 2 replies; 8+ messages in thread
From: Xi Ruoyao @ 2023-11-08 11:04 UTC (permalink / raw)
  To: WANG Xuerui, mengqinggang, binutils
  Cc: xuchenghua, chenglulu, liuzhensong, maskray, cailulu, luweining

On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
> On 11/7/23 20:16, Xi Ruoyao wrote:
> > I'd like to have some pseudo instructions:
> > 
> > - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
> > - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
> > - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
> > 
> > These will make the work of the compiler (or assembly programmer)
> > easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
> > jirl instruction must be adjacent, so there is no benefit for the
> > compiler or programmer to use them separately to make a function call.
> > 
> > And the last one can be annotated with R_LARCH_RELAX so we may relax it
> > into a bl instruction if possible.
> 
> The suggestion sounds reasonable! Although IMO a name like "call.36" 
> might look better, the name "call36" also works for me.
> 
> And regarding the proposed "call36 func" reusing the $ra for the 
> temporary -- IIUC this will break the subroutine return prediction. I 
> think we've been hit by similar regalloc in GCC and have since fixed 
> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.

The URL is wrong, PR 11013 is 20-years old.

> So maybe it's necessary to specify a different temp register after
> all...

I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
should be OK, but for a sibcall (call36 $zero, $ra, func) it will
confuse the return predictor and we better use another temp register.

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-08 11:04     ` Xi Ruoyao
@ 2023-11-10  8:36       ` chenglulu
  2023-11-17  1:40       ` mengqinggang
  1 sibling, 0 replies; 8+ messages in thread
From: chenglulu @ 2023-11-10  8:36 UTC (permalink / raw)
  To: Xi Ruoyao, WANG Xuerui, mengqinggang, binutils
  Cc: xuchenghua, liuzhensong, maskray, cailulu, luweining


在 2023/11/8 下午7:04, Xi Ruoyao 写道:
> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>> I'd like to have some pseudo instructions:
>>>
>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>
>>> These will make the work of the compiler (or assembly programmer)
>>> easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>> jirl instruction must be adjacent, so there is no benefit for the
>>> compiler or programmer to use them separately to make a function call.
>>>
>>> And the last one can be annotated with R_LARCH_RELAX so we may relax it
>>> into a bl instruction if possible.
>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>> might look better, the name "call36" also works for me.
>>
>> And regarding the proposed "call36 func" reusing the $ra for the
>> temporary -- IIUC this will break the subroutine return prediction. I
>> think we've been hit by similar regalloc in GCC and have since fixed
>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
> The URL is wrong, PR 11013 is 20-years old.
>
>> So maybe it's necessary to specify a different temp register after
>> all...
> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
Under what circumstances do we generate such a function call?:-[
> confuse the return predictor and we better use another temp register.
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-08 11:04     ` Xi Ruoyao
  2023-11-10  8:36       ` chenglulu
@ 2023-11-17  1:40       ` mengqinggang
  2023-11-17  4:44         ` WANG Xuerui
  1 sibling, 1 reply; 8+ messages in thread
From: mengqinggang @ 2023-11-17  1:40 UTC (permalink / raw)
  To: Xi Ruoyao, WANG Xuerui, binutils
  Cc: xuchenghua, chenglulu, liuzhensong, maskray, cailulu, luweining

[-- Attachment #1: Type: text/plain, Size: 1738 bytes --]

We will add call36 and tail36 pseudo instructions in gas:
     call36 f -> pcaddu18i $ra, %call36(f) + jirl $ra, $ra, 0
     tail36 f -> pcaddu18i $t0, %call36(f) + jirl $zero, $t0, 0


在 2023/11/8 下午7:04, Xi Ruoyao 写道:
> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>> I'd like to have some pseudo instructions:
>>>
>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>
>>> These will make the work of the compiler (or assembly programmer)
>>> easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>> jirl instruction must be adjacent, so there is no benefit for the
>>> compiler or programmer to use them separately to make a function call.
>>>
>>> And the last one can be annotated with R_LARCH_RELAX so we may relax it
>>> into a bl instruction if possible.
>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>> might look better, the name "call36" also works for me.
>>
>> And regarding the proposed "call36 func" reusing the $ra for the
>> temporary -- IIUC this will break the subroutine return prediction. I
>> think we've been hit by similar regalloc in GCC and have since fixed
>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
> The URL is wrong, PR 11013 is 20-years old.
>
>> So maybe it's necessary to specify a different temp register after
>> all...
> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
> confuse the return predictor and we better use another temp register.
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-17  1:40       ` mengqinggang
@ 2023-11-17  4:44         ` WANG Xuerui
  2023-11-17  9:00           ` chenglulu
  0 siblings, 1 reply; 8+ messages in thread
From: WANG Xuerui @ 2023-11-17  4:44 UTC (permalink / raw)
  To: mengqinggang, Xi Ruoyao, WANG Xuerui, binutils
  Cc: xuchenghua, chenglulu, liuzhensong, maskray, cailulu, luweining

On 11/17/23 09:40, mengqinggang wrote:
> We will add call36 and tail36 pseudo instructions in gas:
>     call36 f -> pcaddu18i $ra, %call36(f) + jirl $ra, $ra, 0
>     tail36 f -> pcaddu18i $t0, %call36(f) + jirl $zero, $t0, 0
>
>
> 在 2023/11/8 下午7:04, Xi Ruoyao 写道:
>> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>>> I'd like to have some pseudo instructions:
>>>>
>>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>>
>>>> These will make the work of the compiler (or assembly programmer)
>>>> easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>>> jirl instruction must be adjacent, so there is no benefit for the
>>>> compiler or programmer to use them separately to make a function call.
>>>>
>>>> And the last one can be annotated with R_LARCH_RELAX so we may 
>>>> relax it
>>>> into a bl instruction if possible.
>>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>>> might look better, the name "call36" also works for me.
>>>
>>> And regarding the proposed "call36 func" reusing the $ra for the
>>> temporary -- IIUC this will break the subroutine return prediction. I
>>> think we've been hit by similar regalloc in GCC and have since fixed
>>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
>> The URL is wrong, PR 11013 is 20-years old.
>>
>>> So maybe it's necessary to specify a different temp register after
>>> all...
>> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
>> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
>> confuse the return predictor and we better use another temp register.

Hmm I found out the correct bug number (it was a copy-paste mistake): 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110136

So the main point is: is any jirl in the "jirl <any>, $ra, <any>" form 
able to trigger the return stack optimization, or is it just "jirl 
$zero, $ra, 0" i.e. "ret"? If the branch predictors in popular LoongArch 
models are smart enough to not consider "jirl $ra, $ra, 0" also as a 
"ret", then re-using the $ra as scratch space is okay. Otherwise we have 
to make the pseudo-insn take another temp register (and optionally 
disallow/warn usage of $ra in that place).

BTW, could we have the RISC-V-like bare "call" and "tail" too? It could 
be useful for more uniform asm among different code models ("bl" vs 
"pcaddu18i + jirl" vs "lu12i + addi + lu32i + lu52i + jirl"), apart from 
lowering learning curve for those with some RISC-V asm background. We 
could make "call" and "tail" behavior vary based on the code model 
chosen, and also provide explicit control via more pseudo-insns like 
"{call,tail}26" and "{call,tail}64" apart from "{call,tail}36".

(The above is a suggestion for you to consider; it's not immediately 
necessary for the medium code model work at hand, just for symmetry and 
enabling users to gain more control over their asm.)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] LoongArch: Add new relocation R_LARCH_CALL36
  2023-11-17  4:44         ` WANG Xuerui
@ 2023-11-17  9:00           ` chenglulu
  0 siblings, 0 replies; 8+ messages in thread
From: chenglulu @ 2023-11-17  9:00 UTC (permalink / raw)
  To: WANG Xuerui, mengqinggang, Xi Ruoyao, binutils
  Cc: xuchenghua, liuzhensong, maskray, cailulu, luweining


在 2023/11/17 下午12:44, WANG Xuerui 写道:
> On 11/17/23 09:40, mengqinggang wrote:
>> We will add call36 and tail36 pseudo instructions in gas:
>>     call36 f -> pcaddu18i $ra, %call36(f) + jirl $ra, $ra, 0
>>     tail36 f -> pcaddu18i $t0, %call36(f) + jirl $zero, $t0, 0
>>
>>
>> 在 2023/11/8 下午7:04, Xi Ruoyao 写道:
>>> On Wed, 2023-11-08 at 01:34 +0800, WANG Xuerui wrote:
>>>> On 11/7/23 20:16, Xi Ruoyao wrote:
>>>>> I'd like to have some pseudo instructions:
>>>>>
>>>>> - call36 r0, t0, func -> pcaddu18i t0, func + jirl r0, t0, func
>>>>> - call36 t0, func -> pcaddu18i t0, func + jirl t0, t0, func
>>>>> - call36 func -> pcaddu18i ra, func + jirl ra, ra, func
>>>>>
>>>>> These will make the work of the compiler (or assembly programmer)
>>>>> easier.  Anyway with R_LARCH_CALL36 the pcaddu18i instruction and the
>>>>> jirl instruction must be adjacent, so there is no benefit for the
>>>>> compiler or programmer to use them separately to make a function 
>>>>> call.
>>>>>
>>>>> And the last one can be annotated with R_LARCH_RELAX so we may 
>>>>> relax it
>>>>> into a bl instruction if possible.
>>>> The suggestion sounds reasonable! Although IMO a name like "call.36"
>>>> might look better, the name "call36" also works for me.
>>>>
>>>> And regarding the proposed "call36 func" reusing the $ra for the
>>>> temporary -- IIUC this will break the subroutine return prediction. I
>>>> think we've been hit by similar regalloc in GCC and have since fixed
>>>> that: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11013.
>>> The URL is wrong, PR 11013 is 20-years old.
>>>
>>>> So maybe it's necessary to specify a different temp register after
>>>> all...
>>> I think for a normal call (call36 $ra, $ra, func or just call36 $ra) it
>>> should be OK, but for a sibcall (call36 $zero, $ra, func) it will
>>> confuse the return predictor and we better use another temp register.
>
> Hmm I found out the correct bug number (it was a copy-paste mistake): 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110136
>
> So the main point is: is any jirl in the "jirl <any>, $ra, <any>" form 
> able to trigger the return stack optimization, or is it just "jirl 
> $zero, $ra, 0" i.e. "ret"? If the branch predictors in popular 
> LoongArch models are smart enough to not consider "jirl $ra, $ra, 0" 
> also as a "ret", then re-using the $ra as scratch space is okay. 
> Otherwise we have to make the pseudo-insn take another temp register 
> (and optionally disallow/warn usage of $ra in that place).

I don't quite understand the circumstances of this worrying sequence of 
instructions.

Now we have two scenarios:

1. call36 (pcaddu18i $ra, %call36(func)+jirl $ra, $ra,0)

2. tail36 (pcaddu18i $t0, %call36(func)+jirl $r0,$t0,0)

In neither case will 'jirl $r0,$ra,0' be generated.

>
> BTW, could we have the RISC-V-like bare "call" and "tail" too? It 
> could be useful for more uniform asm among different code models ("bl" 
> vs "pcaddu18i + jirl" vs "lu12i + addi + lu32i + lu52i + jirl"), apart 
> from lowering learning curve for those with some RISC-V asm 
> background. We could make "call" and "tail" behavior vary based on the 
> code model chosen, and also provide explicit control via more 
> pseudo-insns like "{call,tail}26" and "{call,tail}64" apart from 
> "{call,tail}36".
>
> (The above is a suggestion for you to consider; it's not immediately 
> necessary for the medium code model work at hand, just for symmetry 
> and enabling users to gain more control over their asm.)


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-11-17  9:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-07 11:52 [PATCH] LoongArch: Add new relocation R_LARCH_CALL36 mengqinggang
2023-11-07 12:16 ` Xi Ruoyao
2023-11-07 17:34   ` WANG Xuerui
2023-11-08 11:04     ` Xi Ruoyao
2023-11-10  8:36       ` chenglulu
2023-11-17  1:40       ` mengqinggang
2023-11-17  4:44         ` WANG Xuerui
2023-11-17  9:00           ` chenglulu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).