public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH]middle-end match.pd: optimize fneg (fabs (x)) to x | (1 << signbit(x)) [PR109154]
@ 2023-09-27  0:50 Tamar Christina
  2023-09-27  1:17 ` Andrew Pinski
  2023-09-29 15:00 ` Jeff Law
  0 siblings, 2 replies; 17+ messages in thread
From: Tamar Christina @ 2023-09-27  0:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther, jlaw

[-- Attachment #1: Type: text/plain, Size: 11489 bytes --]

Hi All,

For targets that allow conversion between int and float modes this adds a new
optimization transforming fneg (fabs (x)) into x | (1 << signbit(x)).  Such
sequences are common in scientific code working with gradients.

The transformed instruction if the target has an inclusive-OR that takes an
immediate is both shorter an faster.  For those that don't the immediate has
to be seperate constructed but this still ends up being faster as the immediate
construction is not on the critical path.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/109154
	* match.pd: Add new neg+abs rule.

gcc/testsuite/ChangeLog:

	PR tree-optimization/109154
	* gcc.target/aarch64/fneg-abs_1.c: New test.
	* gcc.target/aarch64/fneg-abs_2.c: New test.
	* gcc.target/aarch64/fneg-abs_3.c: New test.
	* gcc.target/aarch64/fneg-abs_4.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_1.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_2.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_3.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_4.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 39c7ea1088f25538ed8bd26ee89711566141a71f..8ebde06dcd4b26d694826cffad0fb17e1136600a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9476,3 +9476,57 @@ and,
       }
       (if (full_perm_p)
 	(vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* Transform fneg (fabs (X)) -> X | 1 << signbit (X).  */
+
+(simplify
+ (negate (abs @0))
+ (if (FLOAT_TYPE_P (type)
+      /* We have to delay this rewriting till after forward prop because otherwise
+	 it's harder to do trigonometry optimizations. e.g. cos(-fabs(x)) is not
+	 matched in one go.  Instead cos (-x) is matched first followed by cos(|x|).
+	 The bottom op approach makes this rule match first and it's not untill
+	 fwdprop that we match top down.  There are manu such simplications so we
+	 delay this optimization till later on.  */
+      && canonicalize_math_after_vectorization_p ())
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+	&& float_fmt->signbit_rw >= 0
+	&& targetm.can_change_mode_class (TYPE_MODE (itype),
+					  TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+	    int sbit = float_fmt->signbit_rw;
+	    auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+	    tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (bit_ior (view_convert:itype @0)
+	       { build_uniform_cst (itype, sign_bit); } )))))))
+
+/* Repeat the same but for conditional negate.  */
+
+(simplify
+ (IFN_COND_NEG @1 (abs @0) @2)
+ (if (FLOAT_TYPE_P (type))
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+	&& float_fmt->signbit_rw >= 0
+	&& targetm.can_change_mode_class (TYPE_MODE (itype),
+					  TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+	    int sbit = float_fmt->signbit_rw;
+	    auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+	    tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (IFN_COND_IOR @1 (view_convert:itype @0)
+	       { build_uniform_cst (itype, sign_bit); }
+	       (view_convert:itype @2) )))))))
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
+**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+




-- 

[-- Attachment #2: rb17718.patch --]
[-- Type: text/plain, Size: 10334 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index 39c7ea1088f25538ed8bd26ee89711566141a71f..8ebde06dcd4b26d694826cffad0fb17e1136600a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9476,3 +9476,57 @@ and,
       }
       (if (full_perm_p)
 	(vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* Transform fneg (fabs (X)) -> X | 1 << signbit (X).  */
+
+(simplify
+ (negate (abs @0))
+ (if (FLOAT_TYPE_P (type)
+      /* We have to delay this rewriting till after forward prop because otherwise
+	 it's harder to do trigonometry optimizations. e.g. cos(-fabs(x)) is not
+	 matched in one go.  Instead cos (-x) is matched first followed by cos(|x|).
+	 The bottom op approach makes this rule match first and it's not untill
+	 fwdprop that we match top down.  There are manu such simplications so we
+	 delay this optimization till later on.  */
+      && canonicalize_math_after_vectorization_p ())
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+	&& float_fmt->signbit_rw >= 0
+	&& targetm.can_change_mode_class (TYPE_MODE (itype),
+					  TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+	    int sbit = float_fmt->signbit_rw;
+	    auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+	    tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (bit_ior (view_convert:itype @0)
+	       { build_uniform_cst (itype, sign_bit); } )))))))
+
+/* Repeat the same but for conditional negate.  */
+
+(simplify
+ (IFN_COND_NEG @1 (abs @0) @2)
+ (if (FLOAT_TYPE_P (type))
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+	&& float_fmt->signbit_rw >= 0
+	&& targetm.can_change_mode_class (TYPE_MODE (itype),
+					  TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+	    int sbit = float_fmt->signbit_rw;
+	    auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+	    tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (IFN_COND_IOR @1 (view_convert:itype @0)
+	       { build_uniform_cst (itype, sign_bit); }
+	       (view_convert:itype @2) )))))))
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
+**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+




^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2023-10-09  9:06 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-27  0:50 [PATCH]middle-end match.pd: optimize fneg (fabs (x)) to x | (1 << signbit(x)) [PR109154] Tamar Christina
2023-09-27  1:17 ` Andrew Pinski
2023-09-27  2:31   ` Tamar Christina
2023-09-27  7:11     ` Richard Biener
2023-09-27  7:56       ` Tamar Christina
2023-09-27  9:35         ` Tamar Christina
2023-09-27  9:39           ` Richard Biener
2023-10-05 18:11             ` Tamar Christina
2023-10-06  6:24               ` Richard Biener
2023-10-07  9:22                 ` Richard Sandiford
2023-10-07 10:34                   ` Richard Biener
2023-10-07 11:34                     ` Richard Sandiford
2023-10-09  7:20                       ` Richard Biener
2023-10-09  7:36                         ` Andrew Pinski
2023-10-09  9:06                           ` Richard Biener
2023-09-29 15:00 ` Jeff Law
2023-10-05 18:09   ` Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).