public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-4162] i386: Outline fast BF -> SF conversion and fix up sNaN handling in it [PR107628]
@ 2022-11-19  9:17 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2022-11-19  9:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b1115dbfea4d6df51d608cece7416d658d2e2822

commit r13-4162-gb1115dbfea4d6df51d608cece7416d658d2e2822
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Sat Nov 19 10:17:01 2022 +0100

    i386: Outline fast BF -> SF conversion and fix up sNaN handling in it [PR107628]
    
    On Fri, Oct 21, 2022 at 10:23:14AM +0200, Uros Bizjak wrote:
    > OK, but now we have two more copies of a function that effectively
    > extends BF to SF. Can you please split this utility function out and
    > use it here and in cbranchbf4/cstorebf4? I'm talking about this part:
    >
    > +      op = gen_lowpart (HImode, op1);
    > +      if (CONST_INT_P (op))
    > +       op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
    > +                                            op1, BFmode);
    > +      else
    > +       {
    > +         rtx t1 = gen_reg_rtx (SImode);
    > +         emit_insn (gen_zero_extendhisi2 (t1, op));
    > +         emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
    > +         op = gen_lowpart (SFmode, t1);
    > +       }
    >
    > Taking this a bit further, it looks like a generic function to extend
    > BF to SF, when extendbfsf2 named function is not defined.
    >
    > The above could be a follow-up patch, the proposed patch is OK.
    
    Sorry for the delay, only got to this now.
    And I'm fixing the sNaN handling in it too.  If the argument is a BFmode sNaN
    constant, we want in this case just a SFmode sNaN constant, but
    simplify_const_unary_operation (FLOAT_EXTEND, ...)
    in that case returns NULL (as normally conversions of a sNaN to some
    other float type should raise an exception).  In this case we want
    to bypass that, as we know the sNaN will be used immediately in the SFmode
    comparison a few instructions later.  The patch fixes it by just
    simplifying the lowpart to HImode and its zero extension to SImode, then
    force into a pseudo and do the left shift and subreg to SFmode on the
    pseudo.  CSE or combine can handle it later.
    
    2022-11-19  Jakub Jelinek  <jakub@redhat.com>
    
            PR target/107628
            * config/i386/i386-protos.h (ix86_expand_fast_convert_bf_to_sf):
            Declare.
            * config/i386/i386-expand.cc (ix86_expand_fast_convert_bf_to_sf): New
            function.
            * config/i386/i386.md (cbranchbf4, cstorebf4): Use it.
    
            * gcc.target/i386/pr107628.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc           | 26 +++++++++++++++++
 gcc/config/i386/i386-protos.h            |  1 +
 gcc/config/i386/i386.md                  | 48 +++-----------------------------
 gcc/testsuite/gcc.target/i386/pr107628.c | 11 ++++++++
 4 files changed, 42 insertions(+), 44 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a166395db45..0373c3614a4 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24138,4 +24138,30 @@ ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val,
   *ptarget_bool = target_bool;
 }
 
+/* Convert a BFmode VAL to SFmode without signaling sNaNs.
+   This is done by returning SF SUBREG of ((HI SUBREG) (VAL)) << 16.  */
+
+rtx
+ix86_expand_fast_convert_bf_to_sf (rtx val)
+{
+  rtx op = gen_lowpart (HImode, val), ret;
+  if (CONST_INT_P (op))
+    {
+      ret = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
+					    val, BFmode);
+      if (ret)
+	return ret;
+      /* FLOAT_EXTEND simplification will fail if VAL is a sNaN.  */
+      ret = gen_reg_rtx (SImode);
+      emit_move_insn (ret, GEN_INT (INTVAL (op) & 0xffff));
+    }
+  else
+    {
+      ret = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extendhisi2 (ret, op));
+    }
+  emit_insn (gen_ashlsi3 (ret, ret, GEN_INT (16)));
+  return gen_lowpart (SFmode, ret);
+}
+
 #include "gt-i386-expand.h"
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 5318fc7fddf..e136f6ec175 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -227,6 +227,7 @@ extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, rtx, enum rtx_code,
 					      bool, bool);
 extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx,
 				      bool, rtx_code_label *);
+extern rtx ix86_expand_fast_convert_bf_to_sf (rtx);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a2b8f26714a..01faa911b77 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1668,28 +1668,8 @@
 	      (pc)))]
   ""
 {
-  rtx op1 = gen_lowpart (HImode, operands[1]);
-  if (CONST_INT_P (op1))
-    op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
-					  operands[1], BFmode);
-  else
-    {
-      rtx t1 = gen_reg_rtx (SImode);
-      emit_insn (gen_zero_extendhisi2 (t1, op1));
-      emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
-      op1 = gen_lowpart (SFmode, t1);
-    }
-  rtx op2 = gen_lowpart (HImode, operands[2]);
-  if (CONST_INT_P (op2))
-    op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
-					  operands[2], BFmode);
-  else
-    {
-      rtx t2 = gen_reg_rtx (SImode);
-      emit_insn (gen_zero_extendhisi2 (t2, op2));
-      emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16)));
-      op2 = gen_lowpart (SFmode, t2);
-    }
+  rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
+  rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
   do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
 			   SFmode, NULL_RTX, NULL,
 			   as_a <rtx_code_label *> (operands[3]),
@@ -1723,28 +1703,8 @@
 	   (const_int 0)]))]
   ""
 {
-  rtx op1 = gen_lowpart (HImode, operands[2]);
-  if (CONST_INT_P (op1))
-    op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
-					  operands[2], BFmode);
-  else
-    {
-      rtx t1 = gen_reg_rtx (SImode);
-      emit_insn (gen_zero_extendhisi2 (t1, op1));
-      emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
-      op1 = gen_lowpart (SFmode, t1);
-    }
-  rtx op2 = gen_lowpart (HImode, operands[3]);
-  if (CONST_INT_P (op2))
-    op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
-					  operands[3], BFmode);
-  else
-    {
-      rtx t2 = gen_reg_rtx (SImode);
-      emit_insn (gen_zero_extendhisi2 (t2, op2));
-      emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16)));
-      op2 = gen_lowpart (SFmode, t2);
-    }
+  rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
+  rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
   rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
 				   op1, op2, SFmode, 0, 1);
   if (!rtx_equal_p (res, operands[0]))
diff --git a/gcc/testsuite/gcc.target/i386/pr107628.c b/gcc/testsuite/gcc.target/i386/pr107628.c
new file mode 100644
index 00000000000..a0de1179e67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107628.c
@@ -0,0 +1,11 @@
+/* PR target/107628 */
+/* { dg-do compile } */
+/* { dg-options "-fsignaling-nans -msse2" } */
+
+typedef __bf16 __attribute__((__vector_size__ (2))) V;
+
+void
+foo (V v)
+{
+  v < (V) (short) 65436;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-11-19  9:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-19  9:17 [gcc r13-4162] i386: Outline fast BF -> SF conversion and fix up sNaN handling in it [PR107628] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).