public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/5]AArch64 sve: combine nested if predicates
@ 2021-08-31 13:31 Tamar Christina
  2021-09-03 11:15 ` Richard Sandiford
  0 siblings, 1 reply; 10+ messages in thread
From: Tamar Christina @ 2021-08-31 13:31 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 4152 bytes --]

Hi All,

The following example

void f5(float * restrict z0, float * restrict z1, float *restrict x,
	float * restrict y, float c, int n)
{
    for (int i = 0; i < n; i++) {
        float a = x[i];
        float b = y[i];
        if (a > b) {
            z0[i] = a + b;
            if (a > c) {
                z1[i] = a - b;
            }
        }
    }
}

generates currently:

        ptrue   p3.b, all
        ld1w    z1.s, p1/z, [x2, x5, lsl 2]
        ld1w    z2.s, p1/z, [x3, x5, lsl 2]
        fcmgt   p0.s, p3/z, z1.s, z0.s
        fcmgt   p2.s, p1/z, z1.s, z2.s
        fcmgt   p0.s, p0/z, z1.s, z2.s

The conditions for a > b and a > c become separate comparisons.

After this patch using a 2 -> 2 split we generate:

        ld1w    z1.s, p0/z, [x2, x5, lsl 2]
        ld1w    z2.s, p0/z, [x3, x5, lsl 2]
        fcmgt   p1.s, p0/z, z1.s, z2.s
        fcmgt   p1.s, p1/z, z1.s, z0.s

Where the condition a > b && a > c are folded by using the predicate result of
the previous compare and thus allows the removal of one of the compares.

Note: This patch series is working incrementally towards generating the most
      efficient code for this and other loops in small steps.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-sve.md (*mask_cmp_and_combine): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/pred-combine-and.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 2c23c6b12bafb038d82920e7141a418e078a2c65..ee9d32c0a5534209689d9d3abaa560ee5b66347d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8162,6 +8162,48 @@ (define_insn_and_split "*mask_inv_combine"
 }
 )
 
+;; Combine multiple masks where the comparisons operators are the same and
+;; each comparison has one parameter shared. e.g. combine a > b && a > c
+(define_insn_and_split "*mask_cmp_and_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(and:<VPRED>
+	  (and:<VPRED>
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_operand:SVE_FULL_F 2 "register_operand" "w")
+	       (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+	      SVE_COND_FP_CMP_I0)
+	    (unspec:<VPRED>
+	      [(match_dup 1)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_dup 2)
+	       (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "wDz")]
+	      SVE_COND_FP_CMP_I0))
+	    (match_operand:<VPRED> 5 "register_operand" "Upa")))
+   (clobber (match_scratch:<VPRED> 6 "=&Upa"))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 6)
+	(unspec:<VPRED>
+	  [(match_dup 5)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  SVE_COND_FP_CMP_I0))
+   (set (match_dup 0)
+	(unspec:<VPRED>
+	  [(match_dup 6)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 4)]
+	  SVE_COND_FP_CMP_I0))]
+{
+  operands[6] = gen_reg_rtx (<VPRED>mode);
+}
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Absolute comparisons
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
new file mode 100644
index 0000000000000000000000000000000000000000..d395b7f84bb15b588493611df5a47549726ac24a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+void f5(float * restrict z0, float * restrict z1, float *restrict x, float * restrict y, float c, int n)
+{
+    for (int i = 0; i < n; i++) {
+        float a = x[i];
+        float b = y[i];
+        if (a > b) {
+            z0[i] = a + b;
+            if (a > c) {
+                z1[i] = a - b;
+            }
+        }
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s} 2 } } */


-- 

[-- Attachment #2: rb14777.patch --]
[-- Type: text/x-diff, Size: 2669 bytes --]

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 2c23c6b12bafb038d82920e7141a418e078a2c65..ee9d32c0a5534209689d9d3abaa560ee5b66347d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8162,6 +8162,48 @@ (define_insn_and_split "*mask_inv_combine"
 }
 )
 
+;; Combine multiple masks where the comparisons operators are the same and
+;; each comparison has one parameter shared. e.g. combine a > b && a > c
+(define_insn_and_split "*mask_cmp_and_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(and:<VPRED>
+	  (and:<VPRED>
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_operand:SVE_FULL_F 2 "register_operand" "w")
+	       (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+	      SVE_COND_FP_CMP_I0)
+	    (unspec:<VPRED>
+	      [(match_dup 1)
+	       (const_int SVE_KNOWN_PTRUE)
+	       (match_dup 2)
+	       (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "wDz")]
+	      SVE_COND_FP_CMP_I0))
+	    (match_operand:<VPRED> 5 "register_operand" "Upa")))
+   (clobber (match_scratch:<VPRED> 6 "=&Upa"))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 6)
+	(unspec:<VPRED>
+	  [(match_dup 5)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  SVE_COND_FP_CMP_I0))
+   (set (match_dup 0)
+	(unspec:<VPRED>
+	  [(match_dup 6)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 4)]
+	  SVE_COND_FP_CMP_I0))]
+{
+  operands[6] = gen_reg_rtx (<VPRED>mode);
+}
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Absolute comparisons
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
new file mode 100644
index 0000000000000000000000000000000000000000..d395b7f84bb15b588493611df5a47549726ac24a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+void f5(float * restrict z0, float * restrict z1, float *restrict x, float * restrict y, float c, int n)
+{
+    for (int i = 0; i < n; i++) {
+        float a = x[i];
+        float b = y[i];
+        if (a > b) {
+            z0[i] = a + b;
+            if (a > c) {
+                z1[i] = a - b;
+            }
+        }
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s} 2 } } */


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-12-03 11:55 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-31 13:31 [PATCH 2/5]AArch64 sve: combine nested if predicates Tamar Christina
2021-09-03 11:15 ` Richard Sandiford
2021-09-21 16:54   ` Tamar Christina
2021-10-11 16:40     ` Richard Sandiford
2021-11-02 13:49       ` Tamar Christina
2021-11-02 15:04         ` Richard Sandiford
2021-11-15 10:47           ` Tamar Christina
2021-11-30 16:24             ` Richard Sandiford
2021-12-02 21:33               ` Tamar Christina
2021-12-03 11:55                 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).