public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64] Add support for 64-bit vector-mode ldp/stp
@ 2015-10-16 12:59 Kyrill Tkachov
  2015-10-20 16:14 ` Marcus Shawcroft
  0 siblings, 1 reply; 4+ messages in thread
From: Kyrill Tkachov @ 2015-10-16 12:59 UTC (permalink / raw)
  To: GCC Patches; +Cc: Marcus Shawcroft, Richard Earnshaw, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 1184 bytes --]

Hi all,

We already support load/store-pair operations on the D-registers when they contain an FP value, but the peepholes/sched-fusion machinery that
do all the hard work currently ignore 64-bit vector modes.

This patch adds support for fusing loads/stores of 64-bit vector operands into ldp and stp instructions.
I've seen this trigger a few times in SPEC2006. Not too many times, but the times it did trigger the code seemed objectively better
i.e. long sequences of ldr and str instructions essentially halved in size.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2015-10-16  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c (aarch64_mode_valid_for_sched_fusion_p):
     New function.
     (fusion_load_store): Use it.
     * config/aarch64/aarch64-ldpstp.md: Add new peephole2s for
     ldp and stp in VD modes.
     * config/aarch64/aarch64-simd.md (load_pair<mode>, VD): New pattern.
     (store_pair<mode>, VD): Likewise.

2015-10-16  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/aarch64/stp_vec_64_1.c: New test.
     * gcc.target/aarch64/ldp_vec_64_1.c: New test.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: aarch64-ldp-stp-64.patch --]
[-- Type: text/x-patch; name=aarch64-ldp-stp-64.patch, Size: 5969 bytes --]

commit b5f4a5b87a7315fb8a4d88da3e4c4afc52d16052
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Tue Oct 6 12:08:24 2015 +0100

    [AArch64] Add support for 64-bit vector-mode ldp/stp

diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md
index 8d6d882..458829c 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -98,6 +98,47 @@ (define_peephole2
     }
 })
 
+(define_peephole2
+  [(set (match_operand:VD 0 "register_operand" "")
+	(match_operand:VD 1 "aarch64_mem_pair_operand" ""))
+   (set (match_operand:VD 2 "register_operand" "")
+	(match_operand:VD 3 "memory_operand" ""))]
+  "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+{
+  rtx base, offset_1, offset_2;
+
+  extract_base_offset_in_addr (operands[1], &base, &offset_1);
+  extract_base_offset_in_addr (operands[3], &base, &offset_2);
+  if (INTVAL (offset_1) > INTVAL (offset_2))
+    {
+      std::swap (operands[0], operands[2]);
+      std::swap (operands[1], operands[3]);
+    }
+})
+
+(define_peephole2
+  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
+	(match_operand:VD 1 "register_operand" ""))
+   (set (match_operand:VD 2 "memory_operand" "")
+	(match_operand:VD 3 "register_operand" ""))]
+  "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+{
+  rtx base, offset_1, offset_2;
+
+  extract_base_offset_in_addr (operands[0], &base, &offset_1);
+  extract_base_offset_in_addr (operands[2], &base, &offset_2);
+  if (INTVAL (offset_1) > INTVAL (offset_2))
+    {
+      std::swap (operands[0], operands[2]);
+      std::swap (operands[1], operands[3]);
+    }
+})
+
+
 ;; Handle sign/zero extended consecutive load/store.
 
 (define_peephole2
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 6a2ab61..bf051c3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -153,6 +153,34 @@ (define_insn "*aarch64_simd_mov<mode>"
    (set_attr "length" "4,4,4,8,8,8,4")]
 )
 
+(define_insn "load_pair<mode>"
+  [(set (match_operand:VD 0 "register_operand" "=w")
+	(match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:VD 2 "register_operand" "=w")
+	(match_operand:VD 3 "memory_operand" "m"))]
+  "TARGET_SIMD
+   && rtx_equal_p (XEXP (operands[3], 0),
+		   plus_constant (Pmode,
+				  XEXP (operands[1], 0),
+				  GET_MODE_SIZE (<MODE>mode)))"
+  "ldp\\t%d0, %d2, %1"
+  [(set_attr "type" "neon_ldp")]
+)
+
+(define_insn "store_pair<mode>"
+  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
+	(match_operand:VD 1 "register_operand" "w"))
+   (set (match_operand:VD 2 "memory_operand" "=m")
+	(match_operand:VD 3 "register_operand" "w"))]
+  "TARGET_SIMD
+   && rtx_equal_p (XEXP (operands[2], 0),
+		   plus_constant (Pmode,
+				  XEXP (operands[0], 0),
+				  GET_MODE_SIZE (<MODE>mode)))"
+  "stp\\t%d1, %d3, %0"
+  [(set_attr "type" "neon_stp")]
+)
+
 (define_split
   [(set (match_operand:VQ 0 "register_operand" "")
       (match_operand:VQ 1 "register_operand" ""))]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7d05b8..7682417 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3491,6 +3491,18 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
 	  && offset % GET_MODE_SIZE (mode) == 0);
 }
 
+/* Return true if MODE is one of the modes for which we
+   support LDP/STP operations.  */
+
+static bool
+aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
+{
+  return mode == SImode || mode == DImode
+	 || mode == SFmode || mode == DFmode
+	 || (aarch64_vector_mode_supported_p (mode)
+	     && GET_MODE_SIZE (mode) == 8);
+}
+
 /* Return true if X is a valid address for machine mode MODE.  If it is,
    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
@@ -12863,8 +12875,9 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
   src = SET_SRC (x);
   dest = SET_DEST (x);
 
-  if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
-      && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
+  machine_mode dest_mode = GET_MODE (dest);
+
+  if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
     return SCHED_FUSION_NONE;
 
   if (GET_CODE (src) == SIGN_EXTEND)
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c b/gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c
new file mode 100644
index 0000000..62213f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
+
+void
+foo (int32x2_t *foo, int32x2_t *bar)
+{
+  int i = 0;
+  int32x2_t val = { 3, 2 };
+
+  for (i = 0; i < 1024; i+=2)
+    foo[i] = bar[i] + bar[i + 1];
+}
+
+/* { dg-final { scan-assembler "ldp\td\[0-9\]+, d\[0-9\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c b/gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c
new file mode 100644
index 0000000..11e757a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+
+typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
+
+void
+bar (int32x2_t *foo)
+{
+  int i = 0;
+  int32x2_t val = { 3, 2 };
+
+  for (i = 0; i < 256; i+=2)
+    {
+      foo[i] = val;
+      foo[i+1] = val;
+    }
+}
+
+/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-10-20 17:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-16 12:59 [PATCH][AArch64] Add support for 64-bit vector-mode ldp/stp Kyrill Tkachov
2015-10-20 16:14 ` Marcus Shawcroft
2015-10-20 16:28   ` Kyrill Tkachov
2015-10-20 17:15     ` Marcus Shawcroft

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).