public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Uros Bizjak <uros@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r12-5966] i386: Implement VxHF vector set/insert/extract with lower ABI levels
Date: Tue, 14 Dec 2021 17:28:30 +0000 (GMT)	[thread overview]
Message-ID: <20211214172830.8D2FF385840C@sourceware.org> (raw)

https://gcc.gnu.org/g:7a54d3deecf967029f18aa5ed1fcbdb752e213b9

commit r12-5966-g7a54d3deecf967029f18aa5ed1fcbdb752e213b9
Author: Uros Bizjak <ubizjak@gmail.com>
Date:   Tue Dec 14 18:27:22 2021 +0100

    i386: Implement VxHF vector set/insert/extract with lower ABI levels
    
    This is a preparation patch that moves VxHF vector set/insert/extract
    expansions from AVX512FP16 ABI to lower ABIs.  There are no functional
    changes for -mavx512fp16 and a follow-up patch is needed to actually
    enable VxHF vector modes for lower ABIs.
    
    2021-12-14  Uroš Bizjak  <ubizjak@gmail.com>
    
    gcc/ChangeLog:
    
            PR target/103571
            * config/i386/i386-expand.c (ix86_expand_vector_init_duplicate)
            <case E_V8HFmode>: Implement for TARGET_SSE2.
            <case E_V16HFmode>: Implement for TARGET_AVX.
            <case E_V32HFmode>: Implement for TARGET_AVX512F.
            (ix86_expand_vector_set_var): Handle V32HFmode
            without TARGET_AVX512BW.
            (ix86_expand_vector_extract)
            <case E_V8HFmode>: Implement for TARGET_SSE2.
            <case E_V16HFmode>: Implement for TARGET_AVX.
            <case E_V32HFmode>: Implement for TARGET_AVX512BW.
            (expand_vec_perm_broadcast_1) <case E_V8HFmode>: New.
            * config/i386/sse.md (VI12HF_AVX512VL): Remove
            TARGET_AVX512FP16 condition.
            (V): Ditto.
            (V_256_512): Ditto.
            (avx_vbroadcastf128_<mode>): Use V_256H mode iterator.

Diff:
---
 gcc/config/i386/i386-expand.c | 118 +++++++++++++++++++++++++++++-------------
 gcc/config/i386/sse.md        |  19 +++----
 2 files changed, 91 insertions(+), 46 deletions(-)

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 2bbb28e5317..7013c20a97a 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -14855,6 +14855,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
       goto widen;
 
     case E_V8HImode:
+    case E_V8HFmode:
       if (TARGET_AVX2)
 	return ix86_vector_duplicate_value (mode, target, val);
 
@@ -14871,15 +14872,22 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
 	  dperm.one_operand_p = true;
 
-	  /* Extend to SImode using a paradoxical SUBREG.  */
-	  tmp1 = gen_reg_rtx (SImode);
-	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
-
-	  /* Insert the SImode value as low element of a V4SImode vector.  */
-	  tmp2 = gen_reg_rtx (V4SImode);
-	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
-	  emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
+	  if (mode == V8HFmode)
+	    tmp1 = lowpart_subreg (V8HFmode, force_reg (HFmode, val), HFmode);
+	  else
+	    {
+	      /* Extend to SImode using a paradoxical SUBREG.  */
+	      tmp1 = gen_reg_rtx (SImode);
+	      emit_move_insn (tmp1, gen_lowpart (SImode, val));
+
+	      /* Insert the SImode value as
+		 low element of a V4SImode vector.  */
+	      tmp2 = gen_reg_rtx (V4SImode);
+	      emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
+	      tmp1 = gen_lowpart (mode, tmp2);
+	    }
 
+	  emit_move_insn (dperm.op0, tmp1);
 	  ok = (expand_vec_perm_1 (&dperm)
 		|| expand_vec_perm_broadcast_1 (&dperm));
 	  gcc_assert (ok);
@@ -14926,12 +14934,15 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
       }
 
     case E_V16HImode:
+    case E_V16HFmode:
     case E_V32QImode:
       if (TARGET_AVX2)
 	return ix86_vector_duplicate_value (mode, target, val);
       else
 	{
-	  machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+	  machine_mode hvmode = (mode == V16HImode ? V8HImode
+				 : mode == V16HFmode ? V8HFmode
+				 : V16QImode);
 	  rtx x = gen_reg_rtx (hvmode);
 
 	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
@@ -14942,13 +14953,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 	}
       return true;
 
-    case E_V64QImode:
     case E_V32HImode:
+    case E_V32HFmode:
+    case E_V64QImode:
       if (TARGET_AVX512BW)
 	return ix86_vector_duplicate_value (mode, target, val);
       else
 	{
-	  machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
+	  machine_mode hvmode = (mode == V32HImode ? V16HImode
+				 : mode == V32HFmode ? V16HFmode
+				 : V32QImode);
 	  rtx x = gen_reg_rtx (hvmode);
 
 	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
@@ -14959,11 +14973,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 	}
       return true;
 
-    case E_V8HFmode:
-    case E_V16HFmode:
-    case E_V32HFmode:
-      return ix86_vector_duplicate_value (mode, target, val);
-
     default:
       return false;
     }
@@ -15912,7 +15921,8 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
   /* 512-bits vector byte/word broadcast and comparison only available
      under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector
      when without TARGET_AVX512BW.  */
-  if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW)
+  if ((mode == V32HImode || mode == V32HFmode || mode == V64QImode)
+      && !TARGET_AVX512BW)
     {
       gcc_assert (TARGET_AVX512F);
       rtx vhi, vlo, idx_hi;
@@ -15926,6 +15936,12 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
 	  extract_hi = gen_vec_extract_hi_v32hi;
 	  extract_lo = gen_vec_extract_lo_v32hi;
 	}
+      else if (mode == V32HFmode)
+	{
+	  half_mode = V16HFmode;
+	  extract_hi = gen_vec_extract_hi_v32hf;
+	  extract_lo = gen_vec_extract_lo_v32hf;
+	}
       else
 	{
 	  half_mode = V32QImode;
@@ -15973,7 +15989,6 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
 	case E_V16SFmode:
 	  cmp_mode = V16SImode;
 	  break;
-	/* TARGET_AVX512FP16 implies TARGET_AVX512BW.  */
 	case E_V8HFmode:
 	  cmp_mode = V8HImode;
 	  break;
@@ -16538,6 +16553,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
       break;
 
     case E_V8HImode:
+    case E_V8HFmode:
     case E_V2HImode:
       use_vec_extr = TARGET_SSE2;
       break;
@@ -16704,25 +16720,29 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
       return;
 
     case E_V32HFmode:
-      tmp = gen_reg_rtx (V16HFmode);
-      if (elt < 16)
-	emit_insn (gen_vec_extract_lo_v32hf (tmp, vec));
-      else
-	emit_insn (gen_vec_extract_hi_v32hf (tmp, vec));
-      ix86_expand_vector_extract (false, target, tmp, elt & 15);
-      return;
+      if (TARGET_AVX512BW)
+	{
+	  tmp = gen_reg_rtx (V16HFmode);
+	  if (elt < 16)
+	    emit_insn (gen_vec_extract_lo_v32hf (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v32hf (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
+	  return;
+	}
+      break;
 
     case E_V16HFmode:
-      tmp = gen_reg_rtx (V8HFmode);
-      if (elt < 8)
-	emit_insn (gen_vec_extract_lo_v16hf (tmp, vec));
-      else
-	emit_insn (gen_vec_extract_hi_v16hf (tmp, vec));
-      ix86_expand_vector_extract (false, target, tmp, elt & 7);
-      return;
-
-    case E_V8HFmode:
-      use_vec_extr = true;
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V8HFmode);
+	  if (elt < 8)
+	    emit_insn (gen_vec_extract_lo_v16hf (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v16hf (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 7);
+	  return;
+	}
       break;
 
     case E_V8QImode:
@@ -21443,6 +21463,34 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
       emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
       return true;
 
+    case E_V8HFmode:
+      /* This can be implemented via interleave and pshufd.  */
+      if (d->testing_p)
+	return true;
+
+      if (elt >= nelt2)
+	{
+	  gen = gen_vec_interleave_highv8hf;
+	  elt -= nelt2;
+	}
+      else
+	gen = gen_vec_interleave_lowv8hf;
+      nelt2 /= 2;
+
+      dest = gen_reg_rtx (vmode);
+      emit_insn (gen (dest, op0, op0));
+
+      vmode = V4SImode;
+      op0 = gen_lowpart (vmode, dest);
+
+      memset (perm2, elt, 4);
+      dest = gen_reg_rtx (vmode);
+      ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
+      gcc_assert (ok);
+
+      emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
+      return true;
+
     case E_V32QImode:
     case E_V16HImode:
     case E_V8SImode:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5421fb51684..929eef54055 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -266,9 +266,7 @@
 (define_mode_iterator VI12HF_AVX512VL
   [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
    V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
-   (V32HF "TARGET_AVX512FP16")
-   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")])
+   V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
 
 ;; Same iterator, but without supposed TARGET_AVX512BW
 (define_mode_iterator VI12_AVX512VLBW
@@ -285,8 +283,7 @@
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
    (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
@@ -311,10 +308,10 @@
 
 ;; All 256bit and 512bit vector modes
 (define_mode_iterator V_256_512
-  [V32QI V16HI V8SI V4DI V8SF V4DF
-   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
-   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
-   (V16HF "TARGET_AVX512FP16") (V32HF "TARGET_AVX512FP16")])
+  [V32QI V16HI V16HF V8SI V4DI V8SF V4DF
+   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V32HF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
 
 ;; All vector float modes
 (define_mode_iterator VF
@@ -24892,8 +24889,8 @@
   "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
 
 (define_insn "avx_vbroadcastf128_<mode>"
-  [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
-	(vec_concat:V_256
+  [(set (match_operand:V_256H 0 "register_operand" "=x,x,x,v,v,v,v")
+	(vec_concat:V_256H
 	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
 	  (match_dup 1)))]
   "TARGET_AVX"


                 reply	other threads:[~2021-12-14 17:28 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211214172830.8D2FF385840C@sourceware.org \
    --to=uros@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).