public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector
@ 2024-06-14  1:32 Levy Hsu
  0 siblings, 0 replies; 3+ messages in thread
From: Levy Hsu @ 2024-06-14  1:32 UTC (permalink / raw)
  To: gcc-patches; +Cc: admin, liwei.xu, crazylht, ubizjak

gcc/ChangeLog:

	* config/i386/i386-expand.cc
	(ix86_vectorize_vec_perm_const): Convert BF to HI using subreg.
	* config/i386/predicates.md
	(vcvtne2ps2bf_parallel): New define_insn_and_split.
	* config/i386/sse.md
	(vpermt2_sepcial_bf16_shuffle_<mode>): New predicates matches odd increasing perm.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/vpermt2-special-bf16-shufflue.c: New test.
---
 gcc/config/i386/i386-expand.cc                |  4 +--
 gcc/config/i386/predicates.md                 | 11 ++++++
 gcc/config/i386/sse.md                        | 35 +++++++++++++++++++
 .../i386/vpermt2-special-bf16-shufflue.c      | 27 ++++++++++++++
 4 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100755 gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 312329e550b..3d599c0651a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -23657,8 +23657,8 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
     return false;
 
-  /* For HF mode vector, convert it to HI using subreg.  */
-  if (GET_MODE_INNER (vmode) == HFmode)
+  /* For HF and BF mode vector, convert it to HI using subreg.  */
+  if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
     {
       machine_mode orig_mode = vmode;
       vmode = mode_for_vector (HImode,
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 7afe3100cb7..1676c50de71 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2322,3 +2322,14 @@
 
   return true;
 })
+
+;; Check that each element is odd and incrementally increasing from 1
+(define_predicate "vcvtne2ps2bf_parallel"
+  (and (match_code "const_vector")
+       (match_code "const_int" "a"))
+{
+  for (int i = 0; i < XVECLEN (op, 0); ++i)
+    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
+      return false;
+  return true;
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 680a46a0b08..5ddd1c0a778 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30698,3 +30698,38 @@
   "TARGET_AVXVNNIINT16"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "vex")])
+
+(define_mode_attr hi_cvt_bf
+  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
+
+(define_mode_attr HI_CVT_BF
+  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
+
+(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
+  [(set (match_operand:VI2_AVX512F 0 "register_operand")
+	(unspec:VI2_AVX512F
+	  [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
+	   (match_operand:VI2_AVX512F 2 "register_operand")
+	   (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
+	   UNSPEC_VPERMT2))]
+  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
+  operands[2] = lowpart_subreg (<ssePSmode>mode,
+				force_reg (<MODE>mode, operands[2]),
+				<MODE>mode);
+  operands[3] = lowpart_subreg (<ssePSmode>mode,
+				force_reg (<MODE>mode, operands[3]),
+				<MODE>mode);
+
+  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
+						   operands[3],
+						   operands[2]));
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
+					       <HI_CVT_BF>mode));
+  DONE;
+}
+[(set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
new file mode 100755
index 00000000000..5c65f2a9884
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
+/* { dg-final { scan-assembler-not "vpermi2b" } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+
+typedef __bf16 v8bf __attribute__((vector_size(16)));
+typedef __bf16 v16bf __attribute__((vector_size(32)));
+typedef __bf16 v32bf __attribute__((vector_size(64)));
+
+v8bf foo0(v8bf a, v8bf b)
+{
+  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15);
+}
+
+v16bf foo1(v16bf a, v16bf b)
+{
+  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15,
+                                 17, 19, 21, 23, 25, 27, 29, 31);
+}
+
+v32bf foo2(v32bf a, v32bf b)
+{
+  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15, 
+                                 17, 19, 21, 23, 25, 27, 29, 31, 
+                                 33, 35, 37, 39, 41, 43, 45, 47, 
+                                 49, 51, 53, 55, 57, 59, 61, 63);
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector
  2024-06-14  1:35 Levy Hsu
@ 2024-06-17  3:14 ` Hongtao Liu
  0 siblings, 0 replies; 3+ messages in thread
From: Hongtao Liu @ 2024-06-17  3:14 UTC (permalink / raw)
  To: Levy Hsu; +Cc: gcc-patches, liwei.xu, ubizjak

On Fri, Jun 14, 2024 at 9:35 AM Levy Hsu <admin@levyhsu.com> wrote:
>
> This patch updates the GCC x86 backend to efficiently handle
> odd, incrementally increasing permutations of BF16 vectors
> using the cvtne2ps2bf16 instruction.
> It modifies ix86_vectorize_vec_perm_const to support these operations
> and adds a specific predicate to ensure proper sequence handling.
>
> Bootstrapped and tested on x86_64-linux-gnu, OK for trunk?
Ok.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-expand.cc
>         (ix86_vectorize_vec_perm_const): Convert BF to HI using subreg.
>         * config/i386/predicates.md
>         (vcvtne2ps2bf_parallel): New define_insn_and_split.
>         * config/i386/sse.md
>         (vpermt2_sepcial_bf16_shuffle_<mode>): New predicates matches odd increasing perm.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/vpermt2-special-bf16-shufflue.c: New test.
> ---
>  gcc/config/i386/i386-expand.cc                |  4 +--
>  gcc/config/i386/predicates.md                 | 11 ++++++
>  gcc/config/i386/sse.md                        | 35 +++++++++++++++++++
>  .../i386/vpermt2-special-bf16-shufflue.c      | 27 ++++++++++++++
>  4 files changed, 75 insertions(+), 2 deletions(-)
>  create mode 100755 gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 312329e550b..3d599c0651a 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -23657,8 +23657,8 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
>    if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
>      return false;
>
> -  /* For HF mode vector, convert it to HI using subreg.  */
> -  if (GET_MODE_INNER (vmode) == HFmode)
> +  /* For HF and BF mode vector, convert it to HI using subreg.  */
> +  if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
>      {
>        machine_mode orig_mode = vmode;
>        vmode = mode_for_vector (HImode,
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 7afe3100cb7..1676c50de71 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -2322,3 +2322,14 @@
>
>    return true;
>  })
> +
> +;; Check that each element is odd and incrementally increasing from 1
> +(define_predicate "vcvtne2ps2bf_parallel"
> +  (and (match_code "const_vector")
> +       (match_code "const_int" "a"))
> +{
> +  for (int i = 0; i < XVECLEN (op, 0); ++i)
> +    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
> +      return false;
> +  return true;
> +})
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 680a46a0b08..5ddd1c0a778 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -30698,3 +30698,38 @@
>    "TARGET_AVXVNNIINT16"
>    "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
>     [(set_attr "prefix" "vex")])
> +
> +(define_mode_attr hi_cvt_bf
> +  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
> +
> +(define_mode_attr HI_CVT_BF
> +  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
> +
> +(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
> +  [(set (match_operand:VI2_AVX512F 0 "register_operand")
> +       (unspec:VI2_AVX512F
> +         [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
> +          (match_operand:VI2_AVX512F 2 "register_operand")
> +          (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
> +          UNSPEC_VPERMT2))]
> +  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
> +  operands[2] = lowpart_subreg (<ssePSmode>mode,
> +                               force_reg (<MODE>mode, operands[2]),
> +                               <MODE>mode);
> +  operands[3] = lowpart_subreg (<ssePSmode>mode,
> +                               force_reg (<MODE>mode, operands[3]),
> +                               <MODE>mode);
> +
> +  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
> +                                                  operands[3],
> +                                                  operands[2]));
> +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
> +                                              <HI_CVT_BF>mode));
> +  DONE;
> +}
> +[(set_attr "mode" "<sseinsnmode>")])
> diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
> new file mode 100755
> index 00000000000..5c65f2a9884
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
> +/* { dg-final { scan-assembler-not "vpermi2b" } } */
> +/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
> +
> +typedef __bf16 v8bf __attribute__((vector_size(16)));
> +typedef __bf16 v16bf __attribute__((vector_size(32)));
> +typedef __bf16 v32bf __attribute__((vector_size(64)));
> +
> +v8bf foo0(v8bf a, v8bf b)
> +{
> +  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15);
> +}
> +
> +v16bf foo1(v16bf a, v16bf b)
> +{
> +  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15,
> +                                 17, 19, 21, 23, 25, 27, 29, 31);
> +}
> +
> +v32bf foo2(v32bf a, v32bf b)
> +{
> +  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15,
> +                                 17, 19, 21, 23, 25, 27, 29, 31,
> +                                 33, 35, 37, 39, 41, 43, 45, 47,
> +                                 49, 51, 53, 55, 57, 59, 61, 63);
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector
@ 2024-06-14  1:35 Levy Hsu
  2024-06-17  3:14 ` Hongtao Liu
  0 siblings, 1 reply; 3+ messages in thread
From: Levy Hsu @ 2024-06-14  1:35 UTC (permalink / raw)
  To: gcc-patches; +Cc: admin, liwei.xu, crazylht, ubizjak

This patch updates the GCC x86 backend to efficiently handle
odd, incrementally increasing permutations of BF16 vectors
using the cvtne2ps2bf16 instruction.
It modifies ix86_vectorize_vec_perm_const to support these operations
and adds a specific predicate to ensure proper sequence handling.

Bootstrapped and tested on x86_64-linux-gnu, OK for trunk?

gcc/ChangeLog:

	* config/i386/i386-expand.cc
	(ix86_vectorize_vec_perm_const): Convert BF to HI using subreg.
	* config/i386/predicates.md
	(vcvtne2ps2bf_parallel): New define_insn_and_split.
	* config/i386/sse.md
	(vpermt2_sepcial_bf16_shuffle_<mode>): New predicates matches odd increasing perm.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/vpermt2-special-bf16-shufflue.c: New test.
---
 gcc/config/i386/i386-expand.cc                |  4 +--
 gcc/config/i386/predicates.md                 | 11 ++++++
 gcc/config/i386/sse.md                        | 35 +++++++++++++++++++
 .../i386/vpermt2-special-bf16-shufflue.c      | 27 ++++++++++++++
 4 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100755 gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 312329e550b..3d599c0651a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -23657,8 +23657,8 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
     return false;
 
-  /* For HF mode vector, convert it to HI using subreg.  */
-  if (GET_MODE_INNER (vmode) == HFmode)
+  /* For HF and BF mode vector, convert it to HI using subreg.  */
+  if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
     {
       machine_mode orig_mode = vmode;
       vmode = mode_for_vector (HImode,
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 7afe3100cb7..1676c50de71 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2322,3 +2322,14 @@
 
   return true;
 })
+
+;; Check that each element is odd and incrementally increasing from 1
+(define_predicate "vcvtne2ps2bf_parallel"
+  (and (match_code "const_vector")
+       (match_code "const_int" "a"))
+{
+  for (int i = 0; i < XVECLEN (op, 0); ++i)
+    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
+      return false;
+  return true;
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 680a46a0b08..5ddd1c0a778 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30698,3 +30698,38 @@
   "TARGET_AVXVNNIINT16"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "vex")])
+
+(define_mode_attr hi_cvt_bf
+  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
+
+(define_mode_attr HI_CVT_BF
+  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
+
+(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
+  [(set (match_operand:VI2_AVX512F 0 "register_operand")
+	(unspec:VI2_AVX512F
+	  [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
+	   (match_operand:VI2_AVX512F 2 "register_operand")
+	   (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
+	   UNSPEC_VPERMT2))]
+  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
+  operands[2] = lowpart_subreg (<ssePSmode>mode,
+				force_reg (<MODE>mode, operands[2]),
+				<MODE>mode);
+  operands[3] = lowpart_subreg (<ssePSmode>mode,
+				force_reg (<MODE>mode, operands[3]),
+				<MODE>mode);
+
+  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
+						   operands[3],
+						   operands[2]));
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
+					       <HI_CVT_BF>mode));
+  DONE;
+}
+[(set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
new file mode 100755
index 00000000000..5c65f2a9884
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
+/* { dg-final { scan-assembler-not "vpermi2b" } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+
+typedef __bf16 v8bf __attribute__((vector_size(16)));
+typedef __bf16 v16bf __attribute__((vector_size(32)));
+typedef __bf16 v32bf __attribute__((vector_size(64)));
+
+v8bf foo0(v8bf a, v8bf b)
+{
+  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15);
+}
+
+v16bf foo1(v16bf a, v16bf b)
+{
+  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15,
+                                 17, 19, 21, 23, 25, 27, 29, 31);
+}
+
+v32bf foo2(v32bf a, v32bf b)
+{
+  return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15, 
+                                 17, 19, 21, 23, 25, 27, 29, 31, 
+                                 33, 35, 37, 39, 41, 43, 45, 47, 
+                                 49, 51, 53, 55, 57, 59, 61, 63);
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-06-17  3:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-14  1:32 [PATCH] x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector Levy Hsu
2024-06-14  1:35 Levy Hsu
2024-06-17  3:14 ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).