public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386]: Add xorsign support
@ 2019-01-09 19:20 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2019-01-09 19:20 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 957 bytes --]

Recent discussions on mailing list reminded me on this long forgotten
patch... I hope it is still OK to commit it, so gcc-9 will support
optimization that benefits SPEC on x86 targets.

2019-01-09  Uroš Bizjak  <ubizjak@gmail.com>

    * config/i386/i386-protos.h (ix86_expand_xorsign): New prototype.
    (ix86_split_xorsign): Ditto.
    * config/i386/i386.c (ix86_expand_xorsign): New function.
    (ix86_split_xorsign): Ditto.
    * config/i386/i386.md (UNSPEC_XORSIGN): New unspec.
    (xorsign<mode>3): New expander.
    (xorsign<mode>3_1): New insn_and_split pattern.
    * config/i386/sse.md (xorsign<mode>3): New expander.

testsuite/ChangeLog:

2019-01-09  Uroš Bizjak  <ubizjak@gmail.com>

    * lib/target-supports.exp
    (check_effective_target_xorsign): Add i?86-*-* and x86_64-*-* targets.
    * gcc.target/i386/xorsign.c: New test.

Bootstrapped and regression tested on x86_64. Committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 6965 bytes --]

Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h	(revision 267776)
+++ config/i386/i386-protos.h	(working copy)
@@ -124,6 +124,8 @@ extern void ix86_expand_fp_absneg_operator (enum r
 extern void ix86_expand_copysign (rtx []);
 extern void ix86_split_copysign_const (rtx []);
 extern void ix86_split_copysign_var (rtx []);
+extern void ix86_expand_xorsign (rtx []);
+extern void ix86_split_xorsign (rtx []);
 extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
 extern bool ix86_match_ccmode (rtx, machine_mode);
 extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 267776)
+++ config/i386/i386.c	(working copy)
@@ -21860,6 +21860,63 @@ ix86_split_copysign_var (rtx operands[])
   emit_insn (gen_rtx_SET (dest, x));
 }
 
+/* Expand an xorsign operation.  */
+
+void
+ix86_expand_xorsign (rtx operands[])
+{
+  rtx (*xorsign_insn)(rtx, rtx, rtx, rtx);
+  machine_mode mode, vmode;
+  rtx dest, op0, op1, mask;
+
+  dest = operands[0];
+  op0 = operands[1];
+  op1 = operands[2];
+
+  mode = GET_MODE (dest);
+
+  if (mode == SFmode)
+    {
+      xorsign_insn = gen_xorsignsf3_1;
+      vmode = V4SFmode;
+    }
+  else if (mode == DFmode)
+    {
+      xorsign_insn = gen_xorsigndf3_1;
+      vmode = V2DFmode;
+    }
+  else
+    gcc_unreachable ();
+
+  mask = ix86_build_signbit_mask (vmode, 0, 0);
+
+  emit_insn (xorsign_insn (dest, op0, op1, mask));
+}
+
+/* Deconstruct an xorsign operation into bit masks.  */
+
+void
+ix86_split_xorsign (rtx operands[])
+{
+  machine_mode mode, vmode;
+  rtx dest, op0, mask, x;
+
+  dest = operands[0];
+  op0 = operands[1];
+  mask = operands[3];
+
+  mode = GET_MODE (dest);
+  vmode = GET_MODE (mask);
+
+  dest = lowpart_subreg (vmode, dest, mode);
+  x = gen_rtx_AND (vmode, dest, mask);
+  emit_insn (gen_rtx_SET (dest, x));
+
+  op0 = lowpart_subreg (vmode, op0, mode);
+  x = gen_rtx_XOR (vmode, dest, op0);
+  emit_insn (gen_rtx_SET (dest, x));
+}
+
 /* Return TRUE or FALSE depending on whether the first SET in INSN
    has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 267776)
+++ config/i386/i386.md	(working copy)
@@ -124,6 +124,7 @@
 
   ;; Generic math support
   UNSPEC_COPYSIGN
+  UNSPEC_XORSIGN
   UNSPEC_IEEE_MIN	; not commutative
   UNSPEC_IEEE_MAX	; not commutative
 
@@ -9784,6 +9785,26 @@
    && reload_completed"
   [(const_int 0)]
   "ix86_split_copysign_var (operands); DONE;")
+
+(define_expand "xorsign<mode>3"
+  [(match_operand:MODEF 0 "register_operand")
+   (match_operand:MODEF 1 "register_operand")
+   (match_operand:MODEF 2 "register_operand")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "ix86_expand_xorsign (operands); DONE;")
+
+(define_insn_and_split "xorsign<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand" "=Yv")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "Yv")
+	   (match_operand:MODEF 2 "register_operand" "0")
+	   (match_operand:<ssevecmode> 3 "nonimmediate_operand" "Yvm")]
+	  UNSPEC_XORSIGN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_xorsign (operands); DONE;")
 \f
 ;; One complement instructions
 
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 267776)
+++ config/i386/sse.md	(working copy)
@@ -3423,6 +3423,20 @@
   operands[5] = gen_reg_rtx (<MODE>mode);
 })
 
+(define_expand "xorsign<mode>3"
+  [(set (match_dup 4)
+	(and:VF (match_dup 3)
+		(match_operand:VF 2 "vector_operand")))
+   (set (match_operand:VF 0 "register_operand")
+	(xor:VF (match_dup 4)
+		(match_operand:VF 1 "vector_operand")))]
+  "TARGET_SSE"
+{
+  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
+
+  operands[4] = gen_reg_rtx (<MODE>mode);
+})
+
 ;; Also define scalar versions.  These are used for abs, neg, and
 ;; conditional move.  Using subregs into vector modes causes register
 ;; allocation lossage.  These patterns do not allow memory operands
Index: testsuite/gcc.target/i386/xorsign.c
===================================================================
--- testsuite/gcc.target/i386/xorsign.c	(nonexistent)
+++ testsuite/gcc.target/i386/xorsign.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize -fdump-tree-vect-details -save-temps" } */
+
+extern void abort ();
+
+#define N 16
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+	      -12.5f, -15.6f, -18.7f, -21.8f,
+	      24.9f, 27.1f, 30.2f, 33.3f,
+	      36.4f, 39.5f, 42.6f, 45.7f};
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+	      -9.0f, 1.0f, -2.0f, 3.0f,
+	      -4.0f, -5.0f, 6.0f, 7.0f,
+	      -8.0f, -9.0f, 10.0f, 11.0f};
+float r[N];
+
+double ad[N] = {-0.1d,  -3.2d,  -6.3d,  -9.4d,
+		-12.5d, -15.6d, -18.7d, -21.8d,
+		 24.9d,  27.1d,  30.2d,  33.3d,
+		 36.4d,  39.5d,  42.6d, 45.7d};
+double bd[N] = {-1.2d,  3.4d, -5.6d,  7.8d,
+		-9.0d,  1.0d, -2.0d,  3.0d,
+		-4.0d, -5.0d,  6.0d,  7.0d,
+		-8.0d, -9.0d, 10.0d, 11.0d};
+double rd[N];
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = a[i] * __builtin_copysignf (1.0f, b[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
+      abort ();
+
+  for (i = 0; i < N; i++)
+    rd[i] = ad[i] * __builtin_copysign (1.0d, bd[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-assembler "\[ \t\]xor" } } */
+/* { dg-final { scan-assembler "\[ \t\]and" } } */
+/* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]fxam" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]or" } } */
+/* { dg-final { scan-assembler-not "\[ \t\]mul" } } */
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp	(revision 267776)
+++ testsuite/lib/target-supports.exp	(working copy)
@@ -5730,7 +5730,8 @@ proc check_effective_target_vect_perm3_short { } {
 
 proc check_effective_target_xorsign { } {
     return [check_cached_effective_target_indexed xorsign {
-      expr { [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
+      expr { [istarget i?86-*-*] || [istarget x86_64-*-*]
+	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
 }
 
 # Return 1 if the target plus current options supports a vector

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-01-09 19:20 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-09 19:20 [PATCH, i386]: Add xorsign support Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).