public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix up Morello purecap PCS
@ 2021-09-21  9:15 Matthew Malcomson
  0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2021-09-21  9:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9fb790d178294700fdcb5297cfedcae9f1d0d64b

commit 9fb790d178294700fdcb5297cfedcae9f1d0d64b
Author: Alex Coplan <alex.coplan@arm.com>
Date:   Thu Sep 16 10:17:48 2021 +0100

    aarch64: Fix up Morello purecap PCS
    
    This patch fixes up some wrong code bugs with register save/restore on
    purecap.  Prior to this patch, we were e.g. using x29 and x30 instead of
    c29 and c30 for the CFP and CLR. We were also calculating the offsets
    for GPR saves incorrectly in aarch64_layout_frame (assuming 8-byte
    registers instead of 16-byte capability registers).
    
    The patch also extends a number of the stp/ldp patterns to handle
    capabilities.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.c (aarch64_reg_save_mode): Use CADImode
            for GPRs on purecap.
            (aarch64_layout_frame): Use aarch64_reg_save_mode to calculate
            GPR save size instead of hard-coding UNITS_PER_WORD.
            (aarch64_wb_pair): Adjust calculation for purecap.
            (aarch64_gen_store_pair): Refactor using @ pattern.
            (aarch64_gen_load_pair): Likewise.
            * config/aarch64/aarch64.md (load_pair_dw_<DX:mode><DX2:mode>):
            Refactor with @, extend to capabilities.
            (store_pair_dw_<DX:mode><DX2:mode>): Likewise.
            (@loadwb_pair<GPI:mode>_<ADDR:mode>): Extend GPI -> GPIC.
            (@storewb_pair<GPI:mode>_<ADDR:mode>): Likewise.
            * config/aarch64/iterators.md (DXC): New.
            (DXC2): New.
            (dxc_gpr): New.
            (ldpstp_sz): Extend domain to CADImode.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/morello/purecap-pcs.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.c                       | 73 ++++++++--------------
 gcc/config/aarch64/aarch64.md                      | 64 +++++++++----------
 gcc/config/aarch64/iterators.md                    | 21 ++++++-
 .../gcc.target/aarch64/morello/purecap-pcs.c       | 54 ++++++++++++++++
 4 files changed, 132 insertions(+), 80 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7551dddba07..4d30f91d4d7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2786,16 +2786,8 @@ aarch64_emit_cfi_for_reg_p (unsigned int regno)
 static machine_mode
 aarch64_reg_save_mode (unsigned int regno)
 {
-  /* MORELLO TODO This needs to be given something for CADImode.
-     The value it would need will be determined based on the ABI, which we'll
-     have to get from any ABI documents and ensure our behaviour matches LLVM.
-
-     I believe that we'll be saving CADImode values in CADImode, however I
-     wonder whether spills of e.g. r0 will *always* have to be done in
-     CADImode, or whether we'll be able to distinguish (elsewhere in the
-     code) if we can spill&restore in DImode.  */
   if (GP_REGNUM_P (regno))
-    return DImode;
+    return TARGET_CAPABILITY_PURE ? CADImode : DImode;
 
   if (FP_REGNUM_P (regno))
     switch (crtl->abi->id ())
@@ -6951,6 +6943,8 @@ aarch64_layout_frame (void)
 {
   poly_int64 offset = 0;
   int regno, last_fp_reg = INVALID_REGNUM;
+  const machine_mode gpr_save_mode = aarch64_reg_save_mode (R0_REGNUM);
+  const poly_int64 gpr_save_size = GET_MODE_SIZE (gpr_save_mode);
   machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
   bool frame_related_fp_reg_p = false;
@@ -7082,9 +7076,9 @@ aarch64_layout_frame (void)
       /* FP and LR are placed in the linkage record.  */
       frame.reg_offset[R29_REGNUM] = offset;
       frame.wb_candidate1 = R29_REGNUM;
-      frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
+      frame.reg_offset[R30_REGNUM] = offset + gpr_save_size;
       frame.wb_candidate2 = R30_REGNUM;
-      offset += 2 * UNITS_PER_WORD;
+      offset += 2 * gpr_save_size;
     }
 
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
@@ -7095,7 +7089,7 @@ aarch64_layout_frame (void)
 	  frame.wb_candidate1 = regno;
 	else if (frame.wb_candidate2 == INVALID_REGNUM)
 	  frame.wb_candidate2 = regno;
-	offset += UNITS_PER_WORD;
+	offset += gpr_save_size;
       }
 
   poly_int64 max_int_offset = offset;
@@ -7300,7 +7294,14 @@ aarch64_wb_pair (machine_mode mode,
   HOST_WIDE_INT c1, c2;
 
   c1 = (store ? -1 : 1) * adjustment;
-  c2 = (mode == E_TFmode) ? UNITS_PER_VREG : UNITS_PER_WORD;
+
+  if (mode == E_TFmode)
+    c2 = UNITS_PER_VREG;
+  else if (mode == E_CADImode && TARGET_MORELLO)
+    c2 = GET_MODE_SIZE (CADImode);
+  else
+    c2 = UNITS_PER_WORD;
+
   if (store)
     c2 -= adjustment;
 
@@ -7387,23 +7388,12 @@ static rtx
 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
 			rtx reg2)
 {
-  switch (mode)
-    {
-    case E_DImode:
-      return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
-
-    case E_DFmode:
-      return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
-
-    case E_TFmode:
-      return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
-
-    case E_V4SImode:
-      return gen_vec_store_pairv4siv4si (mem1, reg1, mem2, reg2);
-
-    default:
-      gcc_unreachable ();
-    }
+  if (mode == E_V4SImode)
+    return gen_vec_store_pairv4siv4si (mem1, reg1, mem2, reg2);
+  else if (mode == E_TFmode)
+    return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
+  else
+    return gen_store_pair_dw (mode, mode, mem1, reg1, mem2, reg2);
 }
 
 /* Generate and regurn a load pair isntruction of mode MODE to load register
@@ -7413,23 +7403,12 @@ static rtx
 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
 		       rtx mem2)
 {
-  switch (mode)
-    {
-    case E_DImode:
-      return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
-
-    case E_DFmode:
-      return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
-
-    case E_TFmode:
-      return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
-
-    case E_V4SImode:
-      return gen_load_pairv4siv4si (reg1, mem1, reg2, mem2);
-
-    default:
-      gcc_unreachable ();
-    }
+  if (mode == E_V4SImode)
+    return gen_load_pairv4siv4si (reg1, mem1, reg2, mem2);
+  else if (mode == E_TFmode)
+    return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
+  else
+    return gen_load_pair_dw (mode, mode, reg1, mem1, reg2, mem2);
 }
 
 /* Return TRUE if return address signing should be enabled for the current
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 922f956991e..9b8d8cbc85c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1582,17 +1582,17 @@
 )
 
 ;; Storing different modes that can still be merged
-(define_insn "load_pair_dw_<DX:mode><DX2:mode>"
-  [(set (match_operand:DX 0 "register_operand" "=r,w")
-	(match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:DX2 2 "register_operand" "=r,w")
-	(match_operand:DX2 3 "memory_operand" "m,m"))]
+(define_insn "@load_pair_dw_<DXC:mode><DXC2:mode>"
+  [(set (match_operand:DXC 0 "register_operand" "=r,w")
+	(match_operand:DXC 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:DXC2 2 "register_operand" "=r,w")
+	(match_operand:DXC2 3 "memory_operand" "m,m"))]
    "rtx_equal_p (XEXP (operands[3], 0),
 		 plus_constant (Pmode,
 				XEXP (operands[1], 0),
-				GET_MODE_SIZE (<DX:MODE>mode)))"
+				GET_MODE_SIZE (<DXC:MODE>mode)))"
   "@
-   ldp\\t%x0, %x2, %z1
+   ldp\\t%0, %2, %z1
    ldp\\t%d0, %d2, %z1"
   [(set_attr "type" "load_16,neon_load1_2reg")
    (set_attr "arch" "*,fp")]
@@ -1632,17 +1632,17 @@
 )
 
 ;; Storing different modes that can still be merged
-(define_insn "store_pair_dw_<DX:mode><DX2:mode>"
-  [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-	(match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
-   (set (match_operand:DX2 2 "memory_operand" "=m,m")
-	(match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+(define_insn "@store_pair_dw_<DXC:mode><DXC2:mode>"
+  [(set (match_operand:DXC 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+	(match_operand:DXC 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+   (set (match_operand:DXC2 2 "memory_operand" "=m,m")
+	(match_operand:DXC2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
    "rtx_equal_p (XEXP (operands[2], 0),
 		 plus_constant (Pmode,
 				XEXP (operands[0], 0),
-				GET_MODE_SIZE (<DX:MODE>mode)))"
+				GET_MODE_SIZE (<DXC:MODE>mode)))"
   "@
-   stp\\t%x1, %x3, %z0
+   stp\\t%<DXC:dxc_gpr>1, %<DXC2:dxc_gpr>3, %z0
    stp\\t%d1, %d3, %z0"
   [(set_attr "type" "store_16,neon_store1_2reg")
    (set_attr "arch" "*,fp")]
@@ -1667,21 +1667,21 @@
 ;; epilogues.
 ;;
 ;; MORELLO TODO: pure-cap.
-(define_insn "@loadwb_pair<GPI:mode>_<ADDR:mode>"
+(define_insn "@loadwb_pair<GPIC:mode>_<ADDR:mode>"
   [(parallel
     [(set (match_operand:ADDR 0 "register_operand" "=k")
-          (<ADDR:PLUS>:ADDR
+	  (<ADDR:PLUS>:ADDR
 	    (match_operand:ADDR 1 "register_operand" "0")
 	    (match_operand:DI 4 "aarch64_mem_pair_offset" "n")))
-     (set (match_operand:GPI 2 "register_operand" "=r")
-          (mem:GPI (match_dup 1)))
-     (set (match_operand:GPI 3 "register_operand" "=r")
-          (mem:GPI (<ADDR:PLUS>:ADDR
+     (set (match_operand:GPIC 2 "register_operand" "=r")
+	  (mem:GPIC (match_dup 1)))
+     (set (match_operand:GPIC 3 "register_operand" "=r")
+	  (mem:GPIC (<ADDR:PLUS>:ADDR
 		     (match_dup 1)
 		     (match_operand:DI 5 "const_int_operand" "n"))))])]
-  "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
-  "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4"
-  [(set_attr "type" "load_<GPI:ldpstp_sz>")]
+  "INTVAL (operands[5]) == GET_MODE_SIZE (<GPIC:MODE>mode)"
+  "ldp\\t%<GPIC:w>2, %<GPIC:w>3, [%1], %4"
+  [(set_attr "type" "load_<GPIC:ldpstp_sz>")]
 )
 
 (define_insn "@loadwb_pair<GPF:mode>_<ADDR:mode>"
@@ -1722,21 +1722,21 @@
 ;; prologues.
 ;;
 ;; MORELLO TODO: pure-cap.
-(define_insn "@storewb_pair<GPI:mode>_<ADDR:mode>"
+(define_insn "@storewb_pair<GPIC:mode>_<ADDR:mode>"
   [(parallel
     [(set (match_operand:ADDR 0 "register_operand" "=&k")
-          (<ADDR:PLUS>:ADDR
+	  (<ADDR:PLUS>:ADDR
 	    (match_operand:ADDR 1 "register_operand" "0")
 	    (match_operand:DI 4 "aarch64_mem_pair_offset" "n")))
-     (set (mem:GPI (<ADDR:PLUS>:ADDR (match_dup 0) (match_dup 4)))
-          (match_operand:GPI 2 "register_operand" "r"))
-     (set (mem:GPI (<ADDR:PLUS>:ADDR
+     (set (mem:GPIC (<ADDR:PLUS>:ADDR (match_dup 0) (match_dup 4)))
+	  (match_operand:GPIC 2 "register_operand" "r"))
+     (set (mem:GPIC (<ADDR:PLUS>:ADDR
 		     (match_dup 0)
 		     (match_operand:DI 5 "const_int_operand" "n")))
-          (match_operand:GPI 3 "register_operand" "r"))])]
-  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
-  "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!"
-  [(set_attr "type" "store_<GPI:ldpstp_sz>")]
+	  (match_operand:GPIC 3 "register_operand" "r"))])]
+  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPIC:MODE>mode)"
+  "stp\\t%<GPIC:w>2, %<GPIC:w>3, [%0, %4]!"
+  [(set_attr "type" "store_<GPIC:ldpstp_sz>")]
 )
 
 (define_insn "@storewb_pair<GPF:mode>_<ADDR:mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index f486af4c761..681888b68f2 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -314,6 +314,12 @@
 ;; Duplicate of the above
 (define_mode_iterator DX2 [DI DF])
 
+;; Double scalar modes + CADImode
+(define_mode_iterator DXC [DI DF CADI])
+
+;; Duplicate of the above
+(define_mode_iterator DXC2 [DI DF CADI])
+
 ;; Single scalar modes
 (define_mode_iterator SX [SI SF])
 
@@ -919,13 +925,26 @@
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")
 		     (CADI "")])
 
+; Similar to the 'w' attribute, but maps DF -> x.  The domain of this
+; attribute is the DXC[2] iterator.  It is intended to be used with the
+; store_pair_dw_<DXC:mode><DXC2:mode> pattern which implements an
+; optimization whereby (const_double:DF 0.0) is stored to two
+; consecutive doubles using:
+; stp xzr, xzr [addr].  Hence, the pattern accepts (const_double 0.0) in
+; the GPR alternative.
+(define_mode_attr dxc_gpr [(DI "x") (DF "x") (CADI "")])
+
 ;; The size of access, in bytes.
 ;; Morello TODO: this is right for fake capabilities but wrong for PureCap.
 ;; Doesn't really matter since this iterator is only used for scheduling and
 ;; that's a performance thing.
 (define_mode_attr ldst_sz [(SI "4") (DI "8") (CADI "8")])
 ;; Likewise for load/store pair.
-(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+;;
+;; MORELLO TODO: maybe add a store_32 attribute, but this is
+;; only needed for optimisation rather than correctness, so
+;; let's leave this for now.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16") (CADI "16")])
 
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/purecap-pcs.c b/gcc/testsuite/gcc.target/aarch64/morello/purecap-pcs.c
new file mode 100644
index 00000000000..c961fae1c25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/purecap-pcs.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-march=morello+c64 -mabi=purecap -O2" } */
+
+void g(void);
+
+/*
+** foo:
+**	stp	c29, c30, \[csp, -32\]!
+**	mov	c29, csp
+**	bl	g
+**	ldp	c29, c30, \[csp\], 32
+**	b	g
+*/
+void foo(void) { g(); g(); }
+
+
+/* Check save/restore of callee-saved regs.  */
+
+/*
+** clob1:
+**	str	c19, \[csp, -16\]!
+**	ldr	c19, \[csp\], 16
+**	ret
+*/
+void clob1(void)
+{
+  asm("" ::: "c19");
+}
+
+/*
+** clob2:
+**	stp	c19, c20, \[csp, -32\]!
+**	ldp	c19, c20, \[csp\], 32
+**	ret
+*/
+void clob2(void)
+{
+  asm("" ::: "c19", "c20");
+}
+
+/*
+** clob3:
+**	stp	c19, c20, \[csp, -48\]!
+**	str	c21, \[csp, 32\]
+**	ldr	c21, \[csp, 32\]
+**	ldp	c19, c20, \[csp\], 48
+**	ret
+*/
+void clob3(void)
+{
+  asm("" ::: "c19", "c20", "c21");
+}
+
+/* { dg-final { check-function-bodies "**" "" "" } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-21  9:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-21  9:15 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix up Morello purecap PCS Matthew Malcomson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).