public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)]
@ 2017-06-12  7:57 Tamar Christina
  2017-07-10 15:49 ` Tamar Christina
  0 siblings, 1 reply; 3+ messages in thread
From: Tamar Christina @ 2017-06-12  7:57 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd, James Greenhalgh, Richard Earnshaw, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 1088 bytes --]

Hi All,

this patch implements a optimization rewriting

x * copysign (1.0, y) and 
x * copysign (-1.0, y) 

to:

x ^ (y & (1 << sign_bit_position))

The patch provides AArch64 optabs for XORSIGN, both vectorized and scalar.

This patch is a revival of a previous patch
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html

Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
Regression done on aarch64-none-linux-gnu and no regressions.

AArch64 now generates in GCC:

	movi	v2.2s, 0x80, lsl 24
	and	v1.8b, v1.8b, v2.8b
	eor	v0.8b, v0.8b, v1.8b

as opposed to before:

	fmov	s2, 1.0e+0
	mov	x0, 2147483648
	fmov	d3, x0
	bsl	v3.8b, v1.8b, v2.8b
	fmul	s0, s0, s3

Ok for trunk?

gcc/
2017-06-07  Tamar Christina  <tamar.christina@arm.com>


	* config/aarch64/aarch64.md (xorsign<mode>3): New optabs.
	* config/aarch64/aarch64-builtins.c
	(aarch64_builtin_vectorized_function): Added CASE_CFN_XORSIGN.
	* config/aarch64/aarch64-simd-builtins.def: Added xorsign BINOP.
	* config/aarch64/aarch64-simd.md: Added xorsign<mode>3.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: xorgsign-2.patch --]
[-- Type: text/x-patch; name="xorgsign-2.patch", Size: 4382 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index f09399f4c158112c90c270856bffb4cafd03e7d4..8a2e214db2bd590fc809cf8c58bfe4aca2af9bef 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1432,6 +1432,15 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
       return AARCH64_FIND_FRINT_VARIANT (nearbyint);
     CASE_CFN_SQRT:
       return AARCH64_FIND_FRINT_VARIANT (sqrt);
+    CASE_CFN_XORSIGN:
+      if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_BINOP_xorsignv2sf];
+      else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_BINOP_xorsignv4sf];
+      else if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_BINOP_xorsignv2df];
+      else
+	return NULL_TREE;
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == SImode && out_n == C \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d713d5d8b88837ec6f2dc51188fb252f8d5bc8bd..b7f50b849dba8d788be142cd839c4a5560e9204e 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -151,6 +151,9 @@
   BUILTIN_VQN (TERNOP, raddhn2, 0)
   BUILTIN_VQN (TERNOP, rsubhn2, 0)
 
+  /* Implemented by xorsign<mode>3.  */
+  BUILTIN_VHSDF (BINOP, xorsign, 3)
+
   BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
   /* Implemented by aarch64_<sur>qmovn<mode>.  */
   BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c5a86ff6f7196eb634be426ecea97cdfbfc7a7a4..1e92fa1b54a592db5dde9048e51988c03ece141c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -351,6 +351,35 @@
   }
 )
 
+(define_expand "xorsign<mode>3"
+  [(match_operand:VHSDF 0 "register_operand")
+   (match_operand:VHSDF 1 "register_operand")
+   (match_operand:VHSDF 2 "register_operand")]
+  "TARGET_SIMD"
+{
+
+  machine_mode imode = <V_cmp_result>mode;
+  rtx v_bitmask = gen_reg_rtx (imode);
+  rtx op1x = gen_reg_rtx (imode);
+  rtx op2x = gen_reg_rtx (imode);
+
+  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
+  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
+
+  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+						     HOST_WIDE_INT_M1U << bits));
+
+  emit_insn (gen_and<v_cmp_result>3 (op2x, v_bitmask, arg2));
+  emit_insn (gen_xor<v_cmp_result>3 (op1x, arg1, op2x));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op1x, imode));
+  DONE;
+}
+)
+
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2e9331fd72b3f36270b8741d97fb3275b4bf2657..8ecdae41a2f4ec42cf28dc6309f3e69fe74ba39d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4988,6 +4988,42 @@
 }
 )
 
+;; For xorsign (x, y), we want to generate:
+;;
+;; LDR   d2, #1<<63
+;; AND   v3.8B, v1.8B, v2.8B
+;; EOR   v0.8B, v0.8B, v3.8B
+;;
+
+(define_expand "xorsign<mode>3"
+  [(match_operand:GPF 0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")]
+  "TARGET_FLOAT && TARGET_SIMD"
+{
+
+  machine_mode imode = <V_cmp_result>mode;
+  rtx mask = gen_reg_rtx (imode);
+  rtx op1x = gen_reg_rtx (imode);
+  rtx op2x = gen_reg_rtx (imode);
+
+  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
+  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
+						     imode)));
+
+  emit_insn (gen_and<v_cmp_result>3 (op2x, mask,
+				     lowpart_subreg (imode, operands[2],
+						     <MODE>mode)));
+  emit_insn (gen_xor<v_cmp_result>3 (op1x,
+				     lowpart_subreg (imode, operands[1],
+						     <MODE>mode),
+				     op2x));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op1x, imode));
+  DONE;
+}
+)
+
 ;; -------------------------------------------------------------------
 ;; Reload support
 ;; -------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)]
  2017-06-12  7:57 [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)] Tamar Christina
@ 2017-07-10 15:49 ` Tamar Christina
  2017-07-24 11:22   ` James Greenhalgh
  0 siblings, 1 reply; 3+ messages in thread
From: Tamar Christina @ 2017-07-10 15:49 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd, James Greenhalgh, Richard Earnshaw, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 3010 bytes --]

Hi All,

As the mid-end patch has been respun  I've had to respin this one as well.
So this is a new version and a ping as well.

The patch provides AArch64 optabs for XORSIGN, both vectorized and scalar.

This patch is a revival of a previous patch
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html

Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
Regression done on aarch64-none-linux-gnu and no regressions.

AArch64 now generates in GCC:

        movi    v2.2s, 0x80, lsl 24
        and     v1.8b, v1.8b, v2.8b
        eor     v0.8b, v0.8b, v1.8b

as opposed to before:

        fmov    s2, 1.0e+0
        mov     x0, 2147483648
        fmov    d3, x0
        bsl     v3.8b, v1.8b, v2.8b
        fmul    s0, s0, s3

Ok for trunk?

gcc/
2017-07-10  Tamar Christina  <tamar.christina@arm.com>

        PR middle-end/19706
        * config/aarch64/aarch64.md (xorsign<mode>3): New optabs.
        * config/aarch64/aarch64-builtins.c
        (aarch64_builtin_vectorized_function): Added CASE_CFN_XORSIGN.
        * config/aarch64/aarch64-simd-builtins.def: Added xorsign BINOP.
        * config/aarch64/aarch64-simd.md: Added xorsign<mode>3.

gcc/testsuite/
2017-07-10  Tamar Christina  <tamar.christina@arm.com>

        * gcc.target/aarch64/xorsign.c: New.
        * gcc.target/aarch64/xorsign_exec.c: New.
        * gcc.target/aarch64/vect-xorsign_exec.c: New.
________________________________________
From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org> on behalf of Tamar Christina <Tamar.Christina@arm.com>
Sent: Monday, June 12, 2017 8:56:58 AM
To: GCC Patches
Cc: nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft
Subject: [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)]

Hi All,

this patch implements a optimization rewriting

x * copysign (1.0, y) and
x * copysign (-1.0, y)

to:

x ^ (y & (1 << sign_bit_position))

The patch provides AArch64 optabs for XORSIGN, both vectorized and scalar.

This patch is a revival of a previous patch
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html

Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
Regression done on aarch64-none-linux-gnu and no regressions.

AArch64 now generates in GCC:

        movi    v2.2s, 0x80, lsl 24
        and     v1.8b, v1.8b, v2.8b
        eor     v0.8b, v0.8b, v1.8b

as opposed to before:

        fmov    s2, 1.0e+0
        mov     x0, 2147483648
        fmov    d3, x0
        bsl     v3.8b, v1.8b, v2.8b
        fmul    s0, s0, s3

Ok for trunk?

gcc/
2017-06-07  Tamar Christina  <tamar.christina@arm.com>


        * config/aarch64/aarch64.md (xorsign<mode>3): New optabs.
        * config/aarch64/aarch64-builtins.c
        (aarch64_builtin_vectorized_function): Added CASE_CFN_XORSIGN.
        * config/aarch64/aarch64-simd-builtins.def: Added xorsign BINOP.
        * config/aarch64/aarch64-simd.md: Added xorsign<mode>3.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: xorsign-2-spin3.patch --]
[-- Type: text/x-patch; name="xorsign-2-spin3.patch", Size: 7400 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1cb6eeb318716aadacb84a44aa2062d486e0186b..db6a882eb42819569a127bc4526d73e94771c970 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -351,6 +351,35 @@
   }
 )
 
+(define_expand "xorsign<mode>3"
+  [(match_operand:VHSDF 0 "register_operand")
+   (match_operand:VHSDF 1 "register_operand")
+   (match_operand:VHSDF 2 "register_operand")]
+  "TARGET_SIMD"
+{
+
+  machine_mode imode = <V_cmp_result>mode;
+  rtx v_bitmask = gen_reg_rtx (imode);
+  rtx op1x = gen_reg_rtx (imode);
+  rtx op2x = gen_reg_rtx (imode);
+
+  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
+  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
+
+  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+						     HOST_WIDE_INT_M1U << bits));
+
+  emit_insn (gen_and<v_cmp_result>3 (op2x, v_bitmask, arg2));
+  emit_insn (gen_xor<v_cmp_result>3 (op1x, arg1, op2x));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op1x, imode));
+  DONE;
+}
+)
+
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6bdbf650d9281f95fc7fa49b38e1a6da538cdd27..583bb2af4026bec68ecd129988b9aee6918b814c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5000,6 +5000,42 @@
 }
 )
 
+;; For xorsign (x, y), we want to generate:
+;;
+;; LDR   d2, #1<<63
+;; AND   v3.8B, v1.8B, v2.8B
+;; EOR   v0.8B, v0.8B, v3.8B
+;;
+
+(define_expand "xorsign<mode>3"
+  [(match_operand:GPF 0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")]
+  "TARGET_FLOAT && TARGET_SIMD"
+{
+
+  machine_mode imode = <V_cmp_result>mode;
+  rtx mask = gen_reg_rtx (imode);
+  rtx op1x = gen_reg_rtx (imode);
+  rtx op2x = gen_reg_rtx (imode);
+
+  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
+  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
+						     imode)));
+
+  emit_insn (gen_and<v_cmp_result>3 (op2x, mask,
+				     lowpart_subreg (imode, operands[2],
+						     <MODE>mode)));
+  emit_insn (gen_xor<v_cmp_result>3 (op1x,
+				     lowpart_subreg (imode, operands[1],
+						     <MODE>mode),
+				     op2x));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op1x, imode));
+  DONE;
+}
+)
+
 ;; -------------------------------------------------------------------
 ;; Reload support
 ;; -------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-xorsign_exec.c b/gcc/testsuite/gcc.target/aarch64/vect-xorsign_exec.c
new file mode 100644
index 0000000000000000000000000000000000000000..d57350c6ab8cf0a6fda43a91bc5fc59a985ccfc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-xorsign_exec.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+
+extern void abort ();
+
+#define N 16
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+	      -12.5f, -15.6f, -18.7f, -21.8f,
+	      24.9f, 27.1f, 30.2f, 33.3f,
+	      36.4f, 39.5f, 42.6f, 45.7f};
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+	      -9.0f, 1.0f, -2.0f, 3.0f,
+	      -4.0f, -5.0f, 6.0f, 7.0f,
+	      -8.0f, -9.0f, 10.0f, 11.0f};
+float r[N];
+
+double ad[N] = {-0.1d,  -3.2d,  -6.3d,  -9.4d,
+		-12.5d, -15.6d, -18.7d, -21.8d,
+		 24.9d,  27.1d,  30.2d,  33.3d,
+		 36.4d,  39.5d,  42.6d, 45.7d};
+double bd[N] = {-1.2d,  3.4d, -5.6d,  7.8d,
+		-9.0d,  1.0d, -2.0d,  3.0d,
+		-4.0d, -5.0d,  6.0d,  7.0d,
+		-8.0d, -9.0d, 10.0d, 11.0d};
+double rd[N];
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = a[i] * __builtin_copysignf (1.0f, b[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
+      abort ();
+
+  for (i = 0; i < N; i++)
+    rd[i] = ad[i] * __builtin_copysign (1.0d, bd[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
+      abort ();
+
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
+/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c
new file mode 100644
index 0000000000000000000000000000000000000000..22c5829449d932bed08de7e453c435ade3b787b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -0,0 +1,86 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+double
+check_d_pos (double x, double y)
+{
+  return x * __builtin_copysign (1.0, y);
+}
+
+float
+check_f_pos (float x, float y)
+{
+  return x * __builtin_copysignf (1.0f, y);
+}
+
+long double
+check_l_pos (long double x, long double y)
+{
+  return x * __builtin_copysignl (1.0, y);
+}
+
+/* --------------- */
+
+double
+check_d_neg (double x, double y)
+{
+  return x * __builtin_copysign (-1.0, y);
+}
+
+float
+check_f_neg (float x, float y)
+{
+  return x * __builtin_copysignf (-1.0f, y);
+}
+
+long double
+check_l_neg (long double x, long double y)
+{
+  return x * __builtin_copysignl (-1.0, y);
+}
+
+/* --------------- */
+
+double
+check_d_pos_rev (double x, double y)
+{
+  return __builtin_copysign (1.0, y) * x;
+}
+
+float
+check_f_pos_rev (float x, float y)
+{
+  return __builtin_copysignf (1.0f, y) * x;
+}
+
+long double
+check_l_pos_rev (long double x, long double y)
+{
+  return __builtin_copysignl (1.0, y) * x;
+}
+
+/* --------------- */
+
+double
+check_d_neg_rev (double x, double y)
+{
+  return __builtin_copysign (-1.0, y) * x;
+}
+
+float
+check_f_neg_rev (float x, float y)
+{
+  return __builtin_copysignf (-1.0f, y) * x;
+}
+
+long double
+check_l_neg_rev (long double x, long double y)
+{
+  return __builtin_copysignl (-1.0, y) * x;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
+/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign_exec.c b/gcc/testsuite/gcc.target/aarch64/xorsign_exec.c
new file mode 100644
index 0000000000000000000000000000000000000000..64bf8044cbd12c1cc744ff9b2a3308d71267bff0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign_exec.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O -ffast-math" } */
+
+#include <math.h>
+
+extern void abort(void);
+
+static double x = 2.0;
+static float  y = 2.0;
+
+int main()
+{
+  if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5)
+     abort();
+
+  if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5)
+     abort();
+
+  if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5)
+     abort();
+
+  if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5)
+     abort();
+
+  return 0;
+}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)]
  2017-07-10 15:49 ` Tamar Christina
@ 2017-07-24 11:22   ` James Greenhalgh
  0 siblings, 0 replies; 3+ messages in thread
From: James Greenhalgh @ 2017-07-24 11:22 UTC (permalink / raw)
  To: Tamar Christina; +Cc: GCC Patches, nd, Richard Earnshaw, Marcus Shawcroft

On Mon, Jul 10, 2017 at 04:49:13PM +0100, Tamar Christina wrote:
> Hi All,
> 
> As the mid-end patch has been respun  I've had to respin this one as well.
> So this is a new version and a ping as well.
> 
> The patch provides AArch64 optabs for XORSIGN, both vectorized and scalar.
> 
> This patch is a revival of a previous patch
> https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> 
> Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
> Regression done on aarch64-none-linux-gnu and no regressions.
> 
> AArch64 now generates in GCC:
> 
>         movi    v2.2s, 0x80, lsl 24
>         and     v1.8b, v1.8b, v2.8b
>         eor     v0.8b, v0.8b, v1.8b
> 
> as opposed to before:
> 
>         fmov    s2, 1.0e+0
>         mov     x0, 2147483648
>         fmov    d3, x0
>         bsl     v3.8b, v1.8b, v2.8b
>         fmul    s0, s0, s3
> 
> Ok for trunk?

I have a question in-line below, and your ChangeLog is out of date, but
otherwise this looks good to me when the prerequisite makes it through
review.

> 
> gcc/
> 2017-07-10  Tamar Christina  <tamar.christina@arm.com>
> 
>         PR middle-end/19706
>         * config/aarch64/aarch64.md (xorsign<mode>3): New optabs.

>         * config/aarch64/aarch64-builtins.c
>         (aarch64_builtin_vectorized_function): Added CASE_CFN_XORSIGN.
>         * config/aarch64/aarch64-simd-builtins.def: Added xorsign BINOP.

These changes are no longer in the patch?

>         * config/aarch64/aarch64-simd.md: Added xorsign<mode>3.
> 
> gcc/testsuite/
> 2017-07-10  Tamar Christina  <tamar.christina@arm.com>
> 
>         * gcc.target/aarch64/xorsign.c: New.
>         * gcc.target/aarch64/xorsign_exec.c: New.
>         * gcc.target/aarch64/vect-xorsign_exec.c: New.
> ________________________________________
> From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org> on behalf of Tamar Christina <Tamar.Christina@arm.com>
> Sent: Monday, June 12, 2017 8:56:58 AM
> To: GCC Patches
> Cc: nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft
> Subject: [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)]

Please don't top-post your replies like this, it makes it very confusing
to read the thread.

<snip old email>

> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 1cb6eeb318716aadacb84a44aa2062d486e0186b..db6a882eb42819569a127bc4526d73e94771c970 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -351,6 +351,35 @@
>    }
>  )
>  
> +(define_expand "xorsign<mode>3"
> +  [(match_operand:VHSDF 0 "register_operand")
> +   (match_operand:VHSDF 1 "register_operand")
> +   (match_operand:VHSDF 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +
> +  machine_mode imode = <V_cmp_result>mode;
> +  rtx v_bitmask = gen_reg_rtx (imode);
> +  rtx op1x = gen_reg_rtx (imode);
> +  rtx op2x = gen_reg_rtx (imode);
> +
> +  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
> +  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
> +
> +  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
> +
> +  emit_move_insn (v_bitmask,
> +		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
> +						     HOST_WIDE_INT_M1U << bits));
> +
> +  emit_insn (gen_and<v_cmp_result>3 (op2x, v_bitmask, arg2));
> +  emit_insn (gen_xor<v_cmp_result>3 (op1x, arg1, op2x));
> +  emit_move_insn (operands[0],
> +		  lowpart_subreg (<MODE>mode, op1x, imode));
> +  DONE;
> +}
> +)
> +
>  (define_expand "copysign<mode>3"
>    [(match_operand:VHSDF 0 "register_operand")
>     (match_operand:VHSDF 1 "register_operand")
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 6bdbf650d9281f95fc7fa49b38e1a6da538cdd27..583bb2af4026bec68ecd129988b9aee6918b814c 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5000,6 +5000,42 @@
>  }
>  )
>  
> +;; For xorsign (x, y), we want to generate:
> +;;
> +;; LDR   d2, #1<<63
> +;; AND   v3.8B, v1.8B, v2.8B
> +;; EOR   v0.8B, v0.8B, v3.8B
> +;;
> +
> +(define_expand "xorsign<mode>3"
> +  [(match_operand:GPF 0 "register_operand")
> +   (match_operand:GPF 1 "register_operand")
> +   (match_operand:GPF 2 "register_operand")]
> +  "TARGET_FLOAT && TARGET_SIMD"
> +{
> +
> +  machine_mode imode = <V_cmp_result>mode;
> +  rtx mask = gen_reg_rtx (imode);
> +  rtx op1x = gen_reg_rtx (imode);
> +  rtx op2x = gen_reg_rtx (imode);
> +
> +  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
> +  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
> +						     imode)));

If you need a trunc_int_for_mode here, why don't you also need it in
the vector version above?

> +  emit_insn (gen_and<v_cmp_result>3 (op2x, mask,
> +				     lowpart_subreg (imode, operands[2],
> +						     <MODE>mode)));
> +  emit_insn (gen_xor<v_cmp_result>3 (op1x,
> +				     lowpart_subreg (imode, operands[1],
> +						     <MODE>mode),
> +				     op2x));
> +  emit_move_insn (operands[0],
> +		  lowpart_subreg (<MODE>mode, op1x, imode));
> +  DONE;
> +}
> +)
> +
>  ;; -------------------------------------------------------------------
>  ;; Reload support
>  ;; -------------------------------------------------------------------

Thanks,
James

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-07-24 11:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-12  7:57 [GCC][PATCH][AArch64] Optimize x * copysign (1.0, y) [Patch (2/2)] Tamar Christina
2017-07-10 15:49 ` Tamar Christina
2017-07-24 11:22   ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).