public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/5] xtensa: Add support for sibling call optimization
@ 2022-06-14  3:36 Takayuki 'January June' Suwa
  2022-06-14 20:17 ` Max Filippov
  0 siblings, 1 reply; 4+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-06-14  3:36 UTC (permalink / raw)
  To: GCC Patches

This patch introduces support for sibling call optimization, when call0
ABI is in effect.

gcc/ChangeLog:

	* config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call,
	xtensa_emit_sibcall): New prototypes.
	(xtensa_expand_epilogue): Add new argument that specifies whether
	or not sibling call.
	* config/xtensa/xtensa.cc (TARGET_FUNCTION_OK_FOR_SIBCALL):
	New macro definition.
	(xtensa_prepare_expand_call): New function in order to share
	the common code.
	(xtensa_emit_sibcall, xtensa_function_ok_for_sibcall):
	New functions.
	(xtensa_expand_epilogue): Add new argument sibcall_p and use it
	for sibling call handling.
	* config/xtensa/xtensa.md (call, call_value):
	Use xtensa_prepare_expand_call.
	(call_internal, call_value_internal):
	Add the condition in order to be disabled if sibling call.
	(sibcall, sibcall_value, sibcall_epilogue): New expansions.
	(sibcall_internal, sibcall_value_internal): New insn patterns.

gcc/testsuite/ChangeLog:

	* gcc.target/xtensa/sibcalls.c: New.
---
 gcc/config/xtensa/xtensa-protos.h          |  4 +-
 gcc/config/xtensa/xtensa.cc                | 63 ++++++++++++++++++--
 gcc/config/xtensa/xtensa.md                | 68 +++++++++++++++++-----
 gcc/testsuite/gcc.target/xtensa/sibcalls.c | 15 +++++
 4 files changed, 130 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c

diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 168ad70710b..e020a332b03 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool);
 extern void xtensa_emit_loop_end (rtx_insn *, rtx *);
 extern char *xtensa_emit_branch (bool, rtx *);
 extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
+extern void xtensa_prepare_expand_call (int, rtx *);
 extern char *xtensa_emit_call (int, rtx *);
+extern char *xtensa_emit_sibcall (int, rtx *);
 extern bool xtensa_tls_referenced_p (rtx);
 extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
 
@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int);
 extern long compute_frame_size (poly_int64);
 extern bool xtensa_use_return_instruction_p (void);
 extern void xtensa_expand_prologue (void);
-extern void xtensa_expand_epilogue (void);
+extern void xtensa_expand_epilogue (bool);
 extern void order_regs_for_local_alloc (void);
 extern enum reg_class xtensa_regno_to_class (int regno);
 extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 58b6eb0b711..b97f37ac956 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -189,7 +189,7 @@ static bool xtensa_can_eliminate (const int from ATTRIBUTE_UNUSED,
 				  const int to);
 static HOST_WIDE_INT xtensa_starting_frame_offset (void);
 static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void);
-
+static bool xtensa_function_ok_for_sibcall (tree, tree);
 static rtx xtensa_delegitimize_address (rtx);
 
 \f
@@ -347,6 +347,9 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_DELEGITIMIZE_ADDRESS
 #define TARGET_DELEGITIMIZE_ADDRESS xtensa_delegitimize_address
 
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 \f
@@ -2127,6 +2130,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands)
 }
 
 
+void
+xtensa_prepare_expand_call (int callop, rtx *operands)
+{
+  rtx addr = XEXP (operands[callop], 0);
+
+  if (flag_pic && SYMBOL_REF_P (addr)
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr);
+}
+
+
 char *
 xtensa_emit_call (int callop, rtx *operands)
 {
@@ -2145,6 +2162,24 @@ xtensa_emit_call (int callop, rtx *operands)
 }
 
 
+char *
+xtensa_emit_sibcall (int callop, rtx *operands)
+{
+  static char result[64];
+  rtx tgt = operands[callop];
+
+  if (GET_CODE (tgt) == CONST_INT)
+    sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9",
+	     INTVAL (tgt));
+  else if (register_operand (tgt, VOIDmode))
+    sprintf (result, "jx\t%%%d", callop);
+  else
+    sprintf (result, "j.l\t%%%d, a9", callop);
+
+  return result;
+}
+
+
 bool
 xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
 {
@@ -3270,7 +3305,7 @@ xtensa_expand_prologue (void)
 }
 
 void
-xtensa_expand_epilogue (void)
+xtensa_expand_epilogue (bool sibcall_p)
 {
   if (!TARGET_WINDOWED_ABI)
     {
@@ -3304,10 +3339,13 @@ xtensa_expand_epilogue (void)
 	  if (xtensa_call_save_reg(regno))
 	    {
 	      rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	      rtx reg;
 
 	      offset -= UNITS_PER_WORD;
-	      emit_move_insn (gen_rtx_REG (SImode, regno),
+	      emit_move_insn (reg = gen_rtx_REG (SImode, regno),
 			      gen_frame_mem (SImode, x));
+	      if (regno == A0_REG && sibcall_p)
+		emit_use (reg);
 	    }
 	}
 
@@ -3342,7 +3380,8 @@ xtensa_expand_epilogue (void)
 				  EH_RETURN_STACKADJ_RTX));
     }
   cfun->machine->epilogue_done = true;
-  emit_jump_insn (gen_return ());
+  if (!sibcall_p)
+    emit_jump_insn (gen_return ());
 }
 
 bool
@@ -4869,6 +4908,22 @@ xtensa_asan_shadow_offset (void)
   return HOST_WIDE_INT_UC (0x10000000);
 }
 
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+static bool
+xtensa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Do not allow tailcalls if the Windowed Register Option is
+     configured.  */
+  if (TARGET_WINDOWED_ABI)
+    return false;
+
+  /* Do not allow indirect tailcalls.  */
+  if (decl == NULL)
+    return false;
+
+  return true;
+}
+
 static rtx
 xtensa_delegitimize_address (rtx op)
 {
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 5d0f346b01a..181f935e3c3 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2148,18 +2148,13 @@
 	 (match_operand 1 "" ""))]
   ""
 {
-  rtx addr = XEXP (operands[0], 0);
-  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
-      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
-    addr = gen_sym_PLT (addr);
-  if (!call_insn_operand (addr, VOIDmode))
-    XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+  xtensa_prepare_expand_call (0, operands);
 })
 
 (define_insn "call_internal"
   [(call (mem (match_operand:SI 0 "call_insn_operand" "nir"))
 	 (match_operand 1 "" "i"))]
-  ""
+  "!SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_call (0, operands);
 }
@@ -2173,19 +2168,14 @@
 	      (match_operand 2 "" "")))]
   ""
 {
-  rtx addr = XEXP (operands[1], 0);
-  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
-      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
-    addr = gen_sym_PLT (addr);
-  if (!call_insn_operand (addr, VOIDmode))
-    XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+  xtensa_prepare_expand_call (1, operands);
 })
 
 (define_insn "call_value_internal"
   [(set (match_operand 0 "register_operand" "=a")
         (call (mem (match_operand:SI 1 "call_insn_operand" "nir"))
               (match_operand 2 "" "i")))]
-  ""
+  "!SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_call (1, operands);
 }
@@ -2193,6 +2183,46 @@
    (set_attr "mode"	"none")
    (set_attr "length"	"3")])
 
+(define_expand "sibcall"
+  [(call (match_operand 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  "!TARGET_WINDOWED_ABI"
+{
+  xtensa_prepare_expand_call (0, operands);
+})
+
+(define_insn "sibcall_internal"
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir"))
+	 (match_operand 1 "" "i"))]
+  "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
+{
+  return xtensa_emit_sibcall (0, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "register_operand" "")
+	(call (match_operand 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  "!TARGET_WINDOWED_ABI"
+{
+  xtensa_prepare_expand_call (1, operands);
+})
+
+(define_insn "sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "=a")
+	(call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir"))
+	      (match_operand 2 "" "i")))]
+  "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
+{
+  return xtensa_emit_sibcall (1, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
 (define_insn "entry"
   [(set (reg:SI A1_REG)
 	(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")]
@@ -2260,7 +2290,15 @@
   [(return)]
   ""
 {
-  xtensa_expand_epilogue ();
+  xtensa_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  "!TARGET_WINDOWED_ABI"
+{
+  xtensa_expand_epilogue (true);
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c
new file mode 100644
index 00000000000..50a7b1aa431
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */
+
+extern int foo(int);
+extern void bar(int);
+
+int test_0(int a) {
+    return foo(a);
+}
+
+void test_1(int a) {
+    bar(a);
+}
+
+/* { dg-final { scan-assembler-not "ret" } } */
-- 
2.20.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/5] xtensa: Add support for sibling call optimization
  2022-06-14  3:36 [PATCH 2/5] xtensa: Add support for sibling call optimization Takayuki 'January June' Suwa
@ 2022-06-14 20:17 ` Max Filippov
  2022-06-15 12:21   ` [PATCH v2 " Takayuki 'January June' Suwa
  0 siblings, 1 reply; 4+ messages in thread
From: Max Filippov @ 2022-06-14 20:17 UTC (permalink / raw)
  To: Takayuki 'January June' Suwa; +Cc: GCC Patches

Hi Suwa-san,

On Mon, Jun 13, 2022 at 8:54 PM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch introduces support for sibling call optimization, when call0
> ABI is in effect.
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call,
>         xtensa_emit_sibcall): New prototypes.
>         (xtensa_expand_epilogue): Add new argument that specifies whether
>         or not sibling call.
>         * config/xtensa/xtensa.cc (TARGET_FUNCTION_OK_FOR_SIBCALL):
>         New macro definition.
>         (xtensa_prepare_expand_call): New function in order to share
>         the common code.
>         (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall):
>         New functions.
>         (xtensa_expand_epilogue): Add new argument sibcall_p and use it
>         for sibling call handling.
>         * config/xtensa/xtensa.md (call, call_value):
>         Use xtensa_prepare_expand_call.
>         (call_internal, call_value_internal):
>         Add the condition in order to be disabled if sibling call.
>         (sibcall, sibcall_value, sibcall_epilogue): New expansions.
>         (sibcall_internal, sibcall_value_internal): New insn patterns.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/xtensa/sibcalls.c: New.
> ---
>  gcc/config/xtensa/xtensa-protos.h          |  4 +-
>  gcc/config/xtensa/xtensa.cc                | 63 ++++++++++++++++++--
>  gcc/config/xtensa/xtensa.md                | 68 +++++++++++++++++-----
>  gcc/testsuite/gcc.target/xtensa/sibcalls.c | 15 +++++
>  4 files changed, 130 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c

This change results in a bunch of new regression test failures:

FAIL: gcc.c-torture/execute/builtins/fprintf.c execution,  -O2
FAIL: gcc.c-torture/execute/builtins/fprintf.c execution,  -O3 -g
FAIL: gcc.c-torture/execute/builtins/fprintf.c execution,  -Os
FAIL: gcc.c-torture/execute/builtins/fprintf.c execution,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
FAIL: gcc.c-torture/execute/builtins/memset.c execution,  -O2
FAIL: gcc.c-torture/execute/builtins/memset.c execution,  -O3 -g
FAIL: gcc.c-torture/execute/builtins/memset.c execution,  -Os
FAIL: gcc.c-torture/execute/builtins/memset.c execution,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
FAIL: gcc.c-torture/execute/builtins/printf.c execution,  -O2
FAIL: gcc.c-torture/execute/builtins/printf.c execution,  -O3 -g
FAIL: gcc.c-torture/execute/builtins/printf.c execution,  -Os
FAIL: gcc.c-torture/execute/builtins/printf.c execution,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
FAIL: gcc.c-torture/execute/builtins/strcpy-2.c execution,  -O2
FAIL: gcc.c-torture/execute/builtins/strcpy-2.c execution,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
FAIL: gcc.c-torture/execute/builtins/strcpy-2.c execution,  -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects
FAIL: gcc.c-torture/execute/20000121-1.c   -O2  execution test
FAIL: gcc.c-torture/execute/20000121-1.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/20000121-1.c   -Os  execution test
FAIL: gcc.c-torture/execute/20000121-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/921208-2.c   -O2  execution test
FAIL: gcc.c-torture/execute/921208-2.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.c-torture/execute/921208-2.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/921208-2.c   -Os  execution test
FAIL: gcc.c-torture/execute/921208-2.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr33992.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr33992.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.c-torture/execute/pr33992.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr33992.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr49161.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr49161.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr49161.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr88714.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr88714.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr88714.c   -Os  execution test
FAIL: gcc.c-torture/execute/pr88714.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr88714.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  execution test
FAIL: gcc.c-torture/execute/pr88739.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr88739.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr88739.c   -Os  execution test
FAIL: gcc.c-torture/execute/pr88739.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr90949.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr90949.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr90949.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr90949.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  execution test
FAIL: gcc.c-torture/execute/printf-2.c   -O2  execution test
FAIL: gcc.c-torture/execute/printf-2.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/printf-2.c   -Os  execution test
FAIL: gcc.c-torture/execute/printf-2.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/printf-2.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  execution test
FAIL: gcc.dg/packed-array.c execution test
FAIL: gcc.dg/pr20115.c execution test
FAIL: gcc.dg/pr44404.c execution test
FAIL: gcc.dg/pr81292-2.c execution test
FAIL: gcc.dg/strlenopt-31.c execution test
FAIL: gcc.dg/strlenopt-81.c execution test
FAIL: gcc.dg/torture/builtin-complex-1.c   -O2  execution test
FAIL: gcc.dg/torture/builtin-complex-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.dg/torture/builtin-complex-1.c   -O3 -g  execution test
FAIL: gcc.dg/torture/builtin-complex-1.c   -Os  execution test
FAIL: gcc.dg/torture/builtin-complex-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.dg/torture/pr56661.c   -Os  execution test
FAIL: gcc.dg/torture/pr65077.c   -O2  execution test
FAIL: gcc.dg/torture/pr65077.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.dg/torture/pr65077.c   -O3 -g  execution test
FAIL: gcc.dg/torture/pr65077.c   -Os  execution test
FAIL: gcc.dg/torture/pr65077.c   -O2 -flto -fno-use-linker-plugin
-flto-partition=none  execution test
FAIL: gcc.dg/torture/pr67916.c   -O2  execution test
FAIL: gcc.dg/torture/pr67916.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.dg/torture/pr67916.c   -O3 -g  execution test
FAIL: gcc.dg/torture/pr67916.c   -Os  execution test
FAIL: gcc.dg/torture/pr67916.c   -O2 -flto -fno-use-linker-plugin
-flto-partition=none  execution test
FAIL: gcc.dg/tree-ssa/cswtch-3.c execution test
FAIL: gcc.dg/tree-ssa/predcom-dse-5.c execution test
FAIL: gcc.dg/tree-ssa/predcom-dse-6.c execution test
FAIL: gcc.dg/tree-ssa/predcom-dse-7.c execution test

The code generated for e.g. gcc.c-torture/execute/921208-2.c looks like this:

       .file   "921208-2.c"
       .text
       .literal_position
       .align  4
       .global g
       .type   g, @function
g:
       ret.n
       .size   g, .-g
       .literal_position
       .literal .LC1, g@PLT
       .literal .LC3, 1072693248
       .literal .LC4, 1073741824
       .align  4
       .global f
       .type   f, @function
f:
       addi    sp, sp, -16
       s32i.n  a13, sp, 4
       l32r    a13, .LC3
       s32i.n  a12, sp, 8
       s32i.n  a14, sp, 0
       movi.n  a12, 0
       l32r    a14, .LC1
       s32i.n  a0, sp, 12
       mov.n   a3, a13
       mov.n   a4, a12
       mov.n   a5, a13
       mov.n   a2, a12
       callx0  a14
       l32i.n  a0, sp, 12
       l32i.n  a14, sp, 0
       mov.n   a4, a12
       mov.n   a5, a13
       l32i.n  a12, sp, 8
       l32i.n  a13, sp, 4
       l32r    a3, .LC4
       movi.n  a2, 0
       addi    sp, sp, 16
       jx      a14
       .size   f, .-f
       .section        .text.startup,"ax",@progbits
       .literal_position
       .literal .LC5, f@PLT
       .literal .LC6, exit@PLT
       .align  4
       .global main
       .type   main, @function
main:
       addi    sp, sp, -16
       l32r    a2, .LC5
       s32i.n  a0, sp, 12
       callx0  a2
       l32r    a3, .LC6
       movi.n  a2, 0
       callx0  a3
       .size   main, .-main
       .ident  "GCC: (GNU) 13.0.0 20220614 (experimental)"

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 2/5] xtensa: Add support for sibling call optimization
  2022-06-14 20:17 ` Max Filippov
@ 2022-06-15 12:21   ` Takayuki 'January June' Suwa
  2022-06-16  0:00     ` Max Filippov
  0 siblings, 1 reply; 4+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-06-15 12:21 UTC (permalink / raw)
  To: Max Filippov; +Cc: GCC Patches

On 2022/06/15 5:17, Max Filippov wrote:
> Hi Suwa-san,
hi!

> This change results in a bunch of new regression test failures:
> The code generated for e.g. gcc.c-torture/execute/921208-2.c looks like this:
oh, PICed...

indirect (incl. via function pointer, virtual functions and of course PIC ones in Xtensa ISA) sibcalls must be avoided if pointer to the target cannot be retained during function epilogue.
otherwise, it will be look like this:
> 	callx0  a14
> 	l32i.n  a0, sp, 12
>  	l32i.n	a14, sp, 0	// restored by the epilogue because A14 is callee-saved
> 	mov.n	a4, a12
> 	mov.n	a5, a13
> 	l32i.n	a12, sp, 8
> 	l32i.n	a13, sp, 4
> 	l32r	a3, .LC4
> 	movi.n	a2, 0
> 	addi	sp, sp, 16
> 	jx	a14		// but A14 pointed to g@PLT in this function...

luckily, no role is assigned from A9 to A11 in call0 ABI (A9 is already used inside of the pro/epilogue), and the "split2" stage is prior to "pro_and_epilogue"...

===
This patch introduces support for sibling call optimization, when call0
ABI is in effect.

gcc/ChangeLog:

	* config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call,
	xtensa_emit_sibcall): New prototypes.
	(xtensa_expand_epilogue): Add new argument that specifies whether
	or not sibling call.
	* config/xtensa/xtensa.cc (TARGET_FUNCTION_OK_FOR_SIBCALL):
	New macro definition.
	(xtensa_prepare_expand_call): New function in order to share
	the common code.
	(xtensa_emit_sibcall, xtensa_function_ok_for_sibcall):
	New functions.
	(xtensa_expand_epilogue): Add new argument sibcall_p and use it
	for sibling call handling.
	* config/xtensa/xtensa.md (call, call_value):
	Use xtensa_prepare_expand_call.
	(call_internal, call_value_internal):
	Add the condition in order to be disabled if sibling call.
	(sibcall, sibcall_value, sibcall_epilogue): New expansions.
	(sibcall_internal, sibcall_value_internal): New insn patterns,
	and split ones in order to take care of the indirect sibcalls.

gcc/testsuite/ChangeLog:

	* gcc.target/xtensa/sibcalls.c: New.
---
 gcc/config/xtensa/xtensa-protos.h          |  4 +-
 gcc/config/xtensa/xtensa.cc                | 58 +++++++++++++-
 gcc/config/xtensa/xtensa.md                | 93 ++++++++++++++++++----
 gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++
 4 files changed, 155 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c

diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 168ad70710b..e020a332b03 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -53,7 +53,9 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool);
 extern void xtensa_emit_loop_end (rtx_insn *, rtx *);
 extern char *xtensa_emit_branch (bool, rtx *);
 extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
+extern void xtensa_prepare_expand_call (int, rtx *);
 extern char *xtensa_emit_call (int, rtx *);
+extern char *xtensa_emit_sibcall (int, rtx *);
 extern bool xtensa_tls_referenced_p (rtx);
 extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
 
@@ -73,7 +75,7 @@ extern int xtensa_dbx_register_number (int);
 extern long compute_frame_size (poly_int64);
 extern bool xtensa_use_return_instruction_p (void);
 extern void xtensa_expand_prologue (void);
-extern void xtensa_expand_epilogue (void);
+extern void xtensa_expand_epilogue (bool);
 extern void order_regs_for_local_alloc (void);
 extern enum reg_class xtensa_regno_to_class (int regno);
 extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 58b6eb0b711..d98f8236bdd 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -189,7 +189,7 @@ static bool xtensa_can_eliminate (const int from ATTRIBUTE_UNUSED,
 				  const int to);
 static HOST_WIDE_INT xtensa_starting_frame_offset (void);
 static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void);
-
+static bool xtensa_function_ok_for_sibcall (tree, tree);
 static rtx xtensa_delegitimize_address (rtx);
 
 \f
@@ -347,6 +347,9 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_DELEGITIMIZE_ADDRESS
 #define TARGET_DELEGITIMIZE_ADDRESS xtensa_delegitimize_address
 
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 \f
@@ -2127,6 +2130,20 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands)
 }
 
 
+void
+xtensa_prepare_expand_call (int callop, rtx *operands)
+{
+  rtx addr = XEXP (operands[callop], 0);
+
+  if (flag_pic && SYMBOL_REF_P (addr)
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[callop], 0) = copy_to_mode_reg (Pmode, addr);
+}
+
+
 char *
 xtensa_emit_call (int callop, rtx *operands)
 {
@@ -2145,6 +2162,24 @@ xtensa_emit_call (int callop, rtx *operands)
 }
 
 
+char *
+xtensa_emit_sibcall (int callop, rtx *operands)
+{
+  static char result[64];
+  rtx tgt = operands[callop];
+
+  if (GET_CODE (tgt) == CONST_INT)
+    sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9",
+	     INTVAL (tgt));
+  else if (register_operand (tgt, VOIDmode))
+    sprintf (result, "jx\t%%%d", callop);
+  else
+    sprintf (result, "j.l\t%%%d, a9", callop);
+
+  return result;
+}
+
+
 bool
 xtensa_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
 {
@@ -3270,7 +3305,7 @@ xtensa_expand_prologue (void)
 }
 
 void
-xtensa_expand_epilogue (void)
+xtensa_expand_epilogue (bool sibcall_p)
 {
   if (!TARGET_WINDOWED_ABI)
     {
@@ -3304,10 +3339,13 @@ xtensa_expand_epilogue (void)
 	  if (xtensa_call_save_reg(regno))
 	    {
 	      rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	      rtx reg;
 
 	      offset -= UNITS_PER_WORD;
-	      emit_move_insn (gen_rtx_REG (SImode, regno),
+	      emit_move_insn (reg = gen_rtx_REG (SImode, regno),
 			      gen_frame_mem (SImode, x));
+	      if (regno == A0_REG && sibcall_p)
+		emit_use (reg);
 	    }
 	}
 
@@ -3342,7 +3380,8 @@ xtensa_expand_epilogue (void)
 				  EH_RETURN_STACKADJ_RTX));
     }
   cfun->machine->epilogue_done = true;
-  emit_jump_insn (gen_return ());
+  if (!sibcall_p)
+    emit_jump_insn (gen_return ());
 }
 
 bool
@@ -4869,6 +4908,17 @@ xtensa_asan_shadow_offset (void)
   return HOST_WIDE_INT_UC (0x10000000);
 }
 
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+static bool
+xtensa_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Do not allow sibcalls when windowed registers ABI is in effect.  */
+  if (TARGET_WINDOWED_ABI)
+    return false;
+
+  return true;
+}
+
 static rtx
 xtensa_delegitimize_address (rtx op)
 {
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 5d0f346b01a..3b05166988f 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -25,6 +25,7 @@
   (A7_REG		7)
   (A8_REG		8)
   (A9_REG		9)
+  (A10_REG		10)
 
   (UNSPEC_NOP		2)
   (UNSPEC_PLT		3)
@@ -2148,18 +2149,13 @@
 	 (match_operand 1 "" ""))]
   ""
 {
-  rtx addr = XEXP (operands[0], 0);
-  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
-      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
-    addr = gen_sym_PLT (addr);
-  if (!call_insn_operand (addr, VOIDmode))
-    XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+  xtensa_prepare_expand_call (0, operands);
 })
 
 (define_insn "call_internal"
   [(call (mem (match_operand:SI 0 "call_insn_operand" "nir"))
 	 (match_operand 1 "" "i"))]
-  ""
+  "!SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_call (0, operands);
 }
@@ -2173,19 +2169,14 @@
 	      (match_operand 2 "" "")))]
   ""
 {
-  rtx addr = XEXP (operands[1], 0);
-  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
-      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
-    addr = gen_sym_PLT (addr);
-  if (!call_insn_operand (addr, VOIDmode))
-    XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+  xtensa_prepare_expand_call (1, operands);
 })
 
 (define_insn "call_value_internal"
   [(set (match_operand 0 "register_operand" "=a")
         (call (mem (match_operand:SI 1 "call_insn_operand" "nir"))
               (match_operand 2 "" "i")))]
-  ""
+  "!SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_call (1, operands);
 }
@@ -2193,6 +2184,70 @@
    (set_attr "mode"	"none")
    (set_attr "length"	"3")])
 
+(define_expand "sibcall"
+  [(call (match_operand 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  "!TARGET_WINDOWED_ABI"
+{
+  xtensa_prepare_expand_call (0, operands);
+})
+
+(define_insn "sibcall_internal"
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir"))
+	 (match_operand 1 "" "i"))]
+  "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
+{
+  return xtensa_emit_sibcall (0, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_split
+  [(call (mem:SI (match_operand:SI 0 "register_operand"))
+	 (match_operand 1 ""))]
+  "reload_completed
+   && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)
+   && IN_RANGE (REGNO (operands[0]), 12, 15)"
+  [(set (reg:SI A10_REG)
+	(match_dup 0))
+   (call (mem:SI (reg:SI A10_REG))
+	 (match_dup 1))])
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "register_operand" "")
+	(call (match_operand 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  "!TARGET_WINDOWED_ABI"
+{
+  xtensa_prepare_expand_call (1, operands);
+})
+
+(define_insn "sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "=a")
+	(call (mem:SI (match_operand:SI 1 "call_insn_operand" "nir"))
+	      (match_operand 2 "" "i")))]
+  "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
+{
+  return xtensa_emit_sibcall (1, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(call (mem:SI (match_operand:SI 1 "register_operand"))
+	      (match_operand 2 "")))]
+  "reload_completed
+   && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)
+   && IN_RANGE (REGNO (operands[1]), 12, 15)"
+  [(set (reg:SI A10_REG)
+	(match_dup 1))
+   (set (match_dup 0)
+	(call (mem:SI (reg:SI A10_REG))
+	      (match_dup 2)))])
+
 (define_insn "entry"
   [(set (reg:SI A1_REG)
 	(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")]
@@ -2260,7 +2315,15 @@
   [(return)]
   ""
 {
-  xtensa_expand_epilogue ();
+  xtensa_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  "!TARGET_WINDOWED_ABI"
+{
+  xtensa_expand_epilogue (true);
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/xtensa/sibcalls.c b/gcc/testsuite/gcc.target/xtensa/sibcalls.c
new file mode 100644
index 00000000000..d2b3fccf1e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/sibcalls.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=call0 -foptimize-sibling-calls" } */
+
+extern int foo(int);
+extern void bar(int);
+
+int test_0(int a) {
+    return foo(a);
+}
+
+void test_1(int a) {
+    bar(a);
+}
+
+int test_2(int (*a)(void)) {
+    bar(0);
+    return a();
+}
+
+/* { dg-final { scan-assembler-not "ret" } } */
-- 
2.20.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 2/5] xtensa: Add support for sibling call optimization
  2022-06-15 12:21   ` [PATCH v2 " Takayuki 'January June' Suwa
@ 2022-06-16  0:00     ` Max Filippov
  0 siblings, 0 replies; 4+ messages in thread
From: Max Filippov @ 2022-06-16  0:00 UTC (permalink / raw)
  To: Takayuki 'January June' Suwa; +Cc: GCC Patches

On Wed, Jun 15, 2022 at 5:23 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> On 2022/06/15 5:17, Max Filippov wrote:
> > Hi Suwa-san,
> hi!
>
> > This change results in a bunch of new regression test failures:
> > The code generated for e.g. gcc.c-torture/execute/921208-2.c looks like this:
> oh, PICed...
>
> indirect (incl. via function pointer, virtual functions and of course PIC ones in Xtensa ISA) sibcalls must be avoided if pointer to the target cannot be retained during function epilogue.
> otherwise, it will be look like this:
> >       callx0  a14
> >       l32i.n  a0, sp, 12
> >       l32i.n  a14, sp, 0      // restored by the epilogue because A14 is callee-saved
> >       mov.n   a4, a12
> >       mov.n   a5, a13
> >       l32i.n  a12, sp, 8
> >       l32i.n  a13, sp, 4
> >       l32r    a3, .LC4
> >       movi.n  a2, 0
> >       addi    sp, sp, 16
> >       jx      a14             // but A14 pointed to g@PLT in this function...
>
> luckily, no role is assigned from A9 to A11 in call0 ABI (A9 is already used inside of the pro/epilogue), and the "split2" stage is prior to "pro_and_epilogue"...
>
> ===
> This patch introduces support for sibling call optimization, when call0
> ABI is in effect.
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa-protos.h (xtensa_prepare_expand_call,
>         xtensa_emit_sibcall): New prototypes.
>         (xtensa_expand_epilogue): Add new argument that specifies whether
>         or not sibling call.
>         * config/xtensa/xtensa.cc (TARGET_FUNCTION_OK_FOR_SIBCALL):
>         New macro definition.
>         (xtensa_prepare_expand_call): New function in order to share
>         the common code.
>         (xtensa_emit_sibcall, xtensa_function_ok_for_sibcall):
>         New functions.
>         (xtensa_expand_epilogue): Add new argument sibcall_p and use it
>         for sibling call handling.
>         * config/xtensa/xtensa.md (call, call_value):
>         Use xtensa_prepare_expand_call.
>         (call_internal, call_value_internal):
>         Add the condition in order to be disabled if sibling call.
>         (sibcall, sibcall_value, sibcall_epilogue): New expansions.
>         (sibcall_internal, sibcall_value_internal): New insn patterns,
>         and split ones in order to take care of the indirect sibcalls.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/xtensa/sibcalls.c: New.
> ---
>  gcc/config/xtensa/xtensa-protos.h          |  4 +-
>  gcc/config/xtensa/xtensa.cc                | 58 +++++++++++++-
>  gcc/config/xtensa/xtensa.md                | 93 ++++++++++++++++++----
>  gcc/testsuite/gcc.target/xtensa/sibcalls.c | 20 +++++
>  4 files changed, 155 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/sibcalls.c

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-06-16  0:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-14  3:36 [PATCH 2/5] xtensa: Add support for sibling call optimization Takayuki 'January June' Suwa
2022-06-14 20:17 ` Max Filippov
2022-06-15 12:21   ` [PATCH v2 " Takayuki 'January June' Suwa
2022-06-16  0:00     ` Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).