public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]AArch64 Add SVE implementation for cond_copysign.
@ 2023-10-05 18:21 Tamar Christina
  2023-10-05 19:28 ` Richard Sandiford
  2023-10-19 21:29 ` Richard Sandiford
  0 siblings, 2 replies; 16+ messages in thread
From: Tamar Christina @ 2023-10-05 18:21 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
	richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 3753 bytes --]

Hi All,

This adds an implementation for masked copysign along with an optimized
pattern for masked copysign (x, -1).

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/109154
	* config/aarch64/aarch64-sve.md (cond_copysign<mode>): New.

gcc/testsuite/ChangeLog:

	PR tree-optimization/109154
	* gcc.target/aarch64/sve/fneg-abs_5.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 071400c820a5b106ddf9dc9faebb117975d74ea0..00ca30c24624dc661254568f45b61a14aa11c305 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6429,6 +6429,57 @@ (define_expand "copysign<mode>3"
   }
 )
 
+(define_expand "cond_copysign<mode>"
+  [(match_operand:SVE_FULL_F 0 "register_operand")
+   (match_operand:<VPRED> 1 "register_operand")
+   (match_operand:SVE_FULL_F 2 "register_operand")
+   (match_operand:SVE_FULL_F 3 "nonmemory_operand")
+   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+  "TARGET_SVE"
+  {
+    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+    rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
+    rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
+
+    rtx v_sign_bitmask
+      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+					   HOST_WIDE_INT_M1U << bits);
+
+    /* copysign (x, -1) should instead be expanded as orr with the sign
+       bit.  */
+    if (!REG_P (operands[3]))
+      {
+	auto r0
+	  = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[3]));
+	if (-1 == real_to_integer (r0))
+	  {
+	    arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
+	    emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
+						  arg3, arg4));
+	    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+	    DONE;
+	  }
+      }
+
+    operands[2] = force_reg (<MODE>mode, operands[3]);
+    emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
+    emit_insn (gen_and<v_int_equiv>3
+	       (mant, arg2,
+		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+						   ~(HOST_WIDE_INT_M1U
+						     << bits))));
+    emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
+					  arg4));
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+    DONE;
+  }
+)
+
 (define_expand "xorsign<mode>3"
   [(match_operand:SVE_FULL_F 0 "register_operand")
    (match_operand:SVE_FULL_F 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..f4ecbeecbe1290134e688f46a4389d17155e4a0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	orr	z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   if (a[i] > n)
+     a[i] = -fabsf (a[i]);
+   else
+     a[i] = n;
+}
+
+/*
+** f2:
+**	...
+**	orr	z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   if (a[i] > n)
+     a[i] = -fabs (a[i]);
+   else
+     a[i] = n;
+}




-- 

[-- Attachment #2: rb17812.patch --]
[-- Type: text/plain, Size: 3291 bytes --]

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 071400c820a5b106ddf9dc9faebb117975d74ea0..00ca30c24624dc661254568f45b61a14aa11c305 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6429,6 +6429,57 @@ (define_expand "copysign<mode>3"
   }
 )
 
+(define_expand "cond_copysign<mode>"
+  [(match_operand:SVE_FULL_F 0 "register_operand")
+   (match_operand:<VPRED> 1 "register_operand")
+   (match_operand:SVE_FULL_F 2 "register_operand")
+   (match_operand:SVE_FULL_F 3 "nonmemory_operand")
+   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+  "TARGET_SVE"
+  {
+    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+    rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
+    rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
+
+    rtx v_sign_bitmask
+      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+					   HOST_WIDE_INT_M1U << bits);
+
+    /* copysign (x, -1) should instead be expanded as orr with the sign
+       bit.  */
+    if (!REG_P (operands[3]))
+      {
+	auto r0
+	  = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[3]));
+	if (-1 == real_to_integer (r0))
+	  {
+	    arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
+	    emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
+						  arg3, arg4));
+	    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+	    DONE;
+	  }
+      }
+
+    operands[2] = force_reg (<MODE>mode, operands[3]);
+    emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
+    emit_insn (gen_and<v_int_equiv>3
+	       (mant, arg2,
+		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+						   ~(HOST_WIDE_INT_M1U
+						     << bits))));
+    emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
+					  arg4));
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+    DONE;
+  }
+)
+
 (define_expand "xorsign<mode>3"
   [(match_operand:SVE_FULL_F 0 "register_operand")
    (match_operand:SVE_FULL_F 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..f4ecbeecbe1290134e688f46a4389d17155e4a0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	orr	z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   if (a[i] > n)
+     a[i] = -fabsf (a[i]);
+   else
+     a[i] = n;
+}
+
+/*
+** f2:
+**	...
+**	orr	z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   if (a[i] > n)
+     a[i] = -fabs (a[i]);
+   else
+     a[i] = n;
+}




^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-10-19 21:30 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-05 18:21 [PATCH]AArch64 Add SVE implementation for cond_copysign Tamar Christina
2023-10-05 19:28 ` Richard Sandiford
2023-10-05 19:47   ` Tamar Christina
2023-10-05 20:25     ` Richard Sandiford
2023-10-05 20:45       ` Tamar Christina
2023-10-06  7:32         ` Richard Biener
2023-10-07  9:57           ` Richard Sandiford
2023-10-09  9:38             ` Tamar Christina
2023-10-09  9:45               ` Richard Biener
2023-10-09  9:55                 ` Tamar Christina
2023-10-09  9:56               ` Richard Sandiford
2023-10-09 10:09                 ` Tamar Christina
2023-10-09 10:17                   ` Richard Sandiford
2023-10-09 11:30                     ` Richard Biener
2023-10-05 20:34     ` Andrew Pinski
2023-10-19 21:29 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).