* [PATCH] x86: Enable FMA in unsigned SI to SF expanders
@ 2021-09-04 21:55 H.J. Lu
2021-09-06 5:39 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2021-09-04 21:55 UTC (permalink / raw)
To: gcc-patches; +Cc: liuhongt, Uros Bizjak
Enable FMA in scalar/vector unsigned SI to SF expanders.
gcc/
PR target/85819
* config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse):
Enable FMA.
(ix86_expand_vector_convert_uns_vsivsf): Likewise.
gcc/testsuite/
PR target/85819
* gcc.target/i386/pr85819-1.c: New test.
* gcc.target/i386/pr85819-2a.c: Likewise.
* gcc.target/i386/pr85819-2b.c: Likewise.
* gcc.target/i386/pr85819-2c.c: Likewise.
* gcc.target/i386/pr85819-3.c: Likewise.
---
gcc/config/i386/i386-expand.c | 44 ++++++++++++++++------
gcc/testsuite/gcc.target/i386/pr85819-1.c | 11 ++++++
gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++
gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 +++
gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++
gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 +++++++++
6 files changed, 91 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 2500dbfa7fb..26263bbe1af 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
fp_lo = gen_reg_rtx (SFmode);
emit_insn (gen_floatsisf2 (fp_hi, int_hi));
emit_insn (gen_floatsisf2 (fp_lo, int_lo));
- fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
- 0, OPTAB_DIRECT);
- fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
- 0, OPTAB_DIRECT);
- if (!rtx_equal_p (target, fp_hi))
- emit_move_insn (target, fp_hi);
+ if (TARGET_FMA || TARGET_AVX512F)
+ {
+ x = validize_mem (force_const_mem (SFmode, x));
+ fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo);
+ emit_move_insn (target, fp_hi);
+ }
+ else
+ {
+ fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
+ 0, OPTAB_DIRECT);
+ fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
+ 0, OPTAB_DIRECT);
+ if (!rtx_equal_p (target, fp_hi))
+ emit_move_insn (target, fp_hi);
+ }
}
/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
@@ -1888,12 +1897,23 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
real_ldexp (&TWO16r, &dconst1, 16);
tmp[5] = const_double_from_real_value (TWO16r, SFmode);
tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
- tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
- OPTAB_DIRECT);
- tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
- OPTAB_DIRECT);
- if (tmp[7] != target)
- emit_move_insn (target, tmp[7]);
+ unsigned vector_size = GET_MODE_SIZE (fltmode);
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]);
+ emit_move_insn (target, tmp[6]);
+ }
+ else
+ {
+ tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5],
+ NULL_RTX, 1, OPTAB_DIRECT);
+ tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6],
+ target, 1, OPTAB_DIRECT);
+ if (tmp[7] != target)
+ emit_move_insn (target, tmp[7]);
+ }
}
/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-1.c b/gcc/testsuite/gcc.target/i386/pr85819-1.c
new file mode 100644
index 00000000000..db02282d100
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mfma -mfpmath=sse" } */
+
+float
+foo (unsigned int x)
+{
+ return x;
+}
+
+/* { dg-final { scan-assembler "vfmadd132ss" { target ia32 } } } */
+/* { dg-final { scan-assembler "vcvtsi2ssq" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2a.c b/gcc/testsuite/gcc.target/i386/pr85819-2a.c
new file mode 100644
index 00000000000..cea599fe416
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-2a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mfma -mfpmath=sse" } */
+
+typedef float To __attribute__ ((__vector_size__ (32)));
+typedef unsigned int From __attribute__ ((__vector_size__ (32)));
+
+#define A2(I) (float)a[I], (float)a[1+I]
+#define A4(I) A2(I), A2(2+I)
+#define A8(I) A4(I), A4(4+I)
+
+To
+f(From a)
+{
+ return __extension__ (To) {A8(0)};
+}
+
+/* { dg-final { scan-assembler "vfmadd132ps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2b.c b/gcc/testsuite/gcc.target/i386/pr85819-2b.c
new file mode 100644
index 00000000000..0750e56f29e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-2b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mfpmath=sse" } */
+
+#include "pr85819-2a.c"
+
+/* { dg-final { scan-assembler "vcvtudq2ps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2c.c b/gcc/testsuite/gcc.target/i386/pr85819-2c.c
new file mode 100644
index 00000000000..821166908da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-2c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-fma -mno-avx2 -mno-avx512vl -mavx512f -mfpmath=sse" } */
+
+#include "pr85819-2a.c"
+
+/* { dg-final { scan-assembler-not "vcvtudq2ps" } } */
+/* { dg-final { scan-assembler-not "vfmadd132ps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-3.c b/gcc/testsuite/gcc.target/i386/pr85819-3.c
new file mode 100644
index 00000000000..cd3bf9b8d35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */
+
+typedef float To __attribute__ ((__vector_size__ (64)));
+typedef unsigned int From __attribute__ ((__vector_size__ (64)));
+
+#define A2(I) (float)a[I], (float)a[1+I]
+#define A4(I) A2(I), A2(2+I)
+#define A8(I) A4(I), A4(4+I)
+#define A16(I) A8(I), A8(8+I)
+
+To
+f(From a)
+{
+ return __extension__ (To) {A16(0)};
+}
+
+/* { dg-final { scan-assembler "vcvtudq2ps" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] x86: Enable FMA in unsigned SI to SF expanders
2021-09-04 21:55 [PATCH] x86: Enable FMA in unsigned SI to SF expanders H.J. Lu
@ 2021-09-06 5:39 ` Hongtao Liu
2021-09-06 12:34 ` [PATCH v2] " H.J. Lu
0 siblings, 1 reply; 4+ messages in thread
From: Hongtao Liu @ 2021-09-06 5:39 UTC (permalink / raw)
To: H.J. Lu; +Cc: GCC Patches, liuhongt
On Sun, Sep 5, 2021 at 5:56 AM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Enable FMA in scalar/vector unsigned SI to SF expanders.
>
> gcc/
>
> PR target/85819
> * config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse):
> Enable FMA.
> (ix86_expand_vector_convert_uns_vsivsf): Likewise.
>
> gcc/testsuite/
>
> PR target/85819
> * gcc.target/i386/pr85819-1.c: New test.
> * gcc.target/i386/pr85819-2a.c: Likewise.
> * gcc.target/i386/pr85819-2b.c: Likewise.
> * gcc.target/i386/pr85819-2c.c: Likewise.
> * gcc.target/i386/pr85819-3.c: Likewise.
> ---
> gcc/config/i386/i386-expand.c | 44 ++++++++++++++++------
> gcc/testsuite/gcc.target/i386/pr85819-1.c | 11 ++++++
> gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++
> gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 +++
> gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++
> gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 +++++++++
> 6 files changed, 91 insertions(+), 12 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 2500dbfa7fb..26263bbe1af 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
> fp_lo = gen_reg_rtx (SFmode);
> emit_insn (gen_floatsisf2 (fp_hi, int_hi));
> emit_insn (gen_floatsisf2 (fp_lo, int_lo));
> - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
> - 0, OPTAB_DIRECT);
> - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
> - 0, OPTAB_DIRECT);
> - if (!rtx_equal_p (target, fp_hi))
> - emit_move_insn (target, fp_hi);
> + if (TARGET_FMA || TARGET_AVX512F)
Looking at the expander floatunssi<mode>2, the || in the condition
should never be hit since we have direct vcvtsi2s[sd] instruction
under TARGET_AVX512F.
> + {
> + x = validize_mem (force_const_mem (SFmode, x));
> + fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo);
> + emit_move_insn (target, fp_hi);
> + }
> + else
> + {
> + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
> + 0, OPTAB_DIRECT);
> + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
> + 0, OPTAB_DIRECT);
> + if (!rtx_equal_p (target, fp_hi))
> + emit_move_insn (target, fp_hi);
> + }
> }
>
> /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
> @@ -1888,12 +1897,23 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
> real_ldexp (&TWO16r, &dconst1, 16);
> tmp[5] = const_double_from_real_value (TWO16r, SFmode);
> tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
> - tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
> - OPTAB_DIRECT);
> - tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
> - OPTAB_DIRECT);
> - if (tmp[7] != target)
> - emit_move_insn (target, tmp[7]);
> + unsigned vector_size = GET_MODE_SIZE (fltmode);
> + if (TARGET_FMA
> + || (TARGET_AVX512F && vector_size == 64)
> + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
Similar here for the last two || in the condition.
> + {
> + tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]);
> + emit_move_insn (target, tmp[6]);
> + }
> + else
> + {
> + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5],
> + NULL_RTX, 1, OPTAB_DIRECT);
> + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6],
> + target, 1, OPTAB_DIRECT);
> + if (tmp[7] != target)
> + emit_move_insn (target, tmp[7]);
> + }
> }
>
> /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
> diff --git a/gcc/testsuite/gcc.target/i386/pr85819-1.c b/gcc/testsuite/gcc.target/i386/pr85819-1.c
> new file mode 100644
> index 00000000000..db02282d100
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr85819-1.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-avx512f -mfma -mfpmath=sse" } */
> +
> +float
> +foo (unsigned int x)
> +{
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler "vfmadd132ss" { target ia32 } } } */
> +/* { dg-final { scan-assembler "vcvtsi2ssq" { target { ! ia32 } } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2a.c b/gcc/testsuite/gcc.target/i386/pr85819-2a.c
> new file mode 100644
> index 00000000000..cea599fe416
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr85819-2a.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mfma -mfpmath=sse" } */
> +
> +typedef float To __attribute__ ((__vector_size__ (32)));
> +typedef unsigned int From __attribute__ ((__vector_size__ (32)));
> +
> +#define A2(I) (float)a[I], (float)a[1+I]
> +#define A4(I) A2(I), A2(2+I)
> +#define A8(I) A4(I), A4(4+I)
> +
> +To
> +f(From a)
> +{
> + return __extension__ (To) {A8(0)};
> +}
> +
> +/* { dg-final { scan-assembler "vfmadd132ps" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2b.c b/gcc/testsuite/gcc.target/i386/pr85819-2b.c
> new file mode 100644
> index 00000000000..0750e56f29e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr85819-2b.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vl -mfpmath=sse" } */
> +
> +#include "pr85819-2a.c"
> +
> +/* { dg-final { scan-assembler "vcvtudq2ps" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2c.c b/gcc/testsuite/gcc.target/i386/pr85819-2c.c
> new file mode 100644
> index 00000000000..821166908da
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr85819-2c.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-fma -mno-avx2 -mno-avx512vl -mavx512f -mfpmath=sse" } */
> +
> +#include "pr85819-2a.c"
> +
> +/* { dg-final { scan-assembler-not "vcvtudq2ps" } } */
> +/* { dg-final { scan-assembler-not "vfmadd132ps" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr85819-3.c b/gcc/testsuite/gcc.target/i386/pr85819-3.c
> new file mode 100644
> index 00000000000..cd3bf9b8d35
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr85819-3.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */
> +
> +typedef float To __attribute__ ((__vector_size__ (64)));
> +typedef unsigned int From __attribute__ ((__vector_size__ (64)));
> +
> +#define A2(I) (float)a[I], (float)a[1+I]
> +#define A4(I) A2(I), A2(2+I)
> +#define A8(I) A4(I), A4(4+I)
> +#define A16(I) A8(I), A8(8+I)
> +
> +To
> +f(From a)
> +{
> + return __extension__ (To) {A16(0)};
> +}
> +
> +/* { dg-final { scan-assembler "vcvtudq2ps" } } */
> --
> 2.31.1
>
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2] x86: Enable FMA in unsigned SI to SF expanders
2021-09-06 5:39 ` Hongtao Liu
@ 2021-09-06 12:34 ` H.J. Lu
2021-09-07 1:15 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2021-09-06 12:34 UTC (permalink / raw)
To: Hongtao Liu; +Cc: GCC Patches, liuhongt
[-- Attachment #1: Type: text/plain, Size: 4676 bytes --]
On Sun, Sep 5, 2021 at 10:34 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Sun, Sep 5, 2021 at 5:56 AM H.J. Lu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Enable FMA in scalar/vector unsigned SI to SF expanders.
> >
> > gcc/
> >
> > PR target/85819
> > * config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse):
> > Enable FMA.
> > (ix86_expand_vector_convert_uns_vsivsf): Likewise.
> >
> > gcc/testsuite/
> >
> > PR target/85819
> > * gcc.target/i386/pr85819-1.c: New test.
> > * gcc.target/i386/pr85819-2a.c: Likewise.
> > * gcc.target/i386/pr85819-2b.c: Likewise.
> > * gcc.target/i386/pr85819-2c.c: Likewise.
> > * gcc.target/i386/pr85819-3.c: Likewise.
> > ---
> > gcc/config/i386/i386-expand.c | 44 ++++++++++++++++------
> > gcc/testsuite/gcc.target/i386/pr85819-1.c | 11 ++++++
> > gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++
> > gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 +++
> > gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++
> > gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 +++++++++
> > 6 files changed, 91 insertions(+), 12 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c
> >
> > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> > index 2500dbfa7fb..26263bbe1af 100644
> > --- a/gcc/config/i386/i386-expand.c
> > +++ b/gcc/config/i386/i386-expand.c
> > @@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
> > fp_lo = gen_reg_rtx (SFmode);
> > emit_insn (gen_floatsisf2 (fp_hi, int_hi));
> > emit_insn (gen_floatsisf2 (fp_lo, int_lo));
> > - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
> > - 0, OPTAB_DIRECT);
> > - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
> > - 0, OPTAB_DIRECT);
> > - if (!rtx_equal_p (target, fp_hi))
> > - emit_move_insn (target, fp_hi);
> > + if (TARGET_FMA || TARGET_AVX512F)
> Looking at the expander floatunssi<mode>2, the || in the condition
> should never be hit since we have direct vcvtsi2s[sd] instruction
> under TARGET_AVX512F.
Fixed.
> > + {
> > + x = validize_mem (force_const_mem (SFmode, x));
> > + fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo);
> > + emit_move_insn (target, fp_hi);
> > + }
> > + else
> > + {
> > + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
> > + 0, OPTAB_DIRECT);
> > + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
> > + 0, OPTAB_DIRECT);
> > + if (!rtx_equal_p (target, fp_hi))
> > + emit_move_insn (target, fp_hi);
> > + }
> > }
> >
> > /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
> > @@ -1888,12 +1897,23 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
> > real_ldexp (&TWO16r, &dconst1, 16);
> > tmp[5] = const_double_from_real_value (TWO16r, SFmode);
> > tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
> > - tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
> > - OPTAB_DIRECT);
> > - tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
> > - OPTAB_DIRECT);
> > - if (tmp[7] != target)
> > - emit_move_insn (target, tmp[7]);
> > + unsigned vector_size = GET_MODE_SIZE (fltmode);
> > + if (TARGET_FMA
> > + || (TARGET_AVX512F && vector_size == 64)
> > + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
> Similar here for the last two || in the condition.
Fixed.
> > + {
> > + tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]);
> > + emit_move_insn (target, tmp[6]);
> > + }
> > + else
> > + {
> > + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5],
> > + NULL_RTX, 1, OPTAB_DIRECT);
> > + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6],
> > + target, 1, OPTAB_DIRECT);
> > + if (tmp[7] != target)
> > + emit_move_insn (target, tmp[7]);
> > + }
> > }
> >
> > /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
Here is the v2 patch.
--
H.J.
[-- Attachment #2: v2-0001-x86-Enable-FMA-in-unsigned-SI-to-SF-expanders.patch --]
[-- Type: text/x-patch, Size: 7004 bytes --]
From 35dac257d711aa9b37604d30c9d41abaf94b8650 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 4 Sep 2021 07:48:43 -0700
Subject: [PATCH v2] x86: Enable FMA in unsigned SI to SF expanders
Enable FMA in scalar/vector unsigned SI to SF expanders. Don't check
TARGET_AVX512F which has vcvtusi2ss and vcvtudq2ps instructions.
gcc/
PR target/85819
* config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse):
Enable FMA.
(ix86_expand_vector_convert_uns_vsivsf): Likewise.
gcc/testsuite/
PR target/85819
* gcc.target/i386/pr85819-1a.c: New test.
* gcc.target/i386/pr85819-1b.c: Likewise.
* gcc.target/i386/pr85819-2a.c: Likewise.
* gcc.target/i386/pr85819-2b.c: Likewise.
* gcc.target/i386/pr85819-2c.c: Likewise.
* gcc.target/i386/pr85819-3.c: Likewise.
---
gcc/config/i386/i386-expand.c | 41 +++++++++++++++-------
gcc/testsuite/gcc.target/i386/pr85819-1a.c | 11 ++++++
gcc/testsuite/gcc.target/i386/pr85819-1b.c | 6 ++++
gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++
gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 ++++
gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++
gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 ++++++++++
7 files changed, 94 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 273a0ba8e3d..3f90f67a994 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
fp_lo = gen_reg_rtx (SFmode);
emit_insn (gen_floatsisf2 (fp_hi, int_hi));
emit_insn (gen_floatsisf2 (fp_lo, int_lo));
- fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
- 0, OPTAB_DIRECT);
- fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
- 0, OPTAB_DIRECT);
- if (!rtx_equal_p (target, fp_hi))
- emit_move_insn (target, fp_hi);
+ if (TARGET_FMA)
+ {
+ x = validize_mem (force_const_mem (SFmode, x));
+ fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo);
+ emit_move_insn (target, fp_hi);
+ }
+ else
+ {
+ fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
+ 0, OPTAB_DIRECT);
+ fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
+ 0, OPTAB_DIRECT);
+ if (!rtx_equal_p (target, fp_hi))
+ emit_move_insn (target, fp_hi);
+ }
}
/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
@@ -1888,12 +1897,20 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
real_ldexp (&TWO16r, &dconst1, 16);
tmp[5] = const_double_from_real_value (TWO16r, SFmode);
tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
- tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
- OPTAB_DIRECT);
- tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
- OPTAB_DIRECT);
- if (tmp[7] != target)
- emit_move_insn (target, tmp[7]);
+ if (TARGET_FMA)
+ {
+ tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]);
+ emit_move_insn (target, tmp[6]);
+ }
+ else
+ {
+ tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5],
+ NULL_RTX, 1, OPTAB_DIRECT);
+ tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6],
+ target, 1, OPTAB_DIRECT);
+ if (tmp[7] != target)
+ emit_move_insn (target, tmp[7]);
+ }
}
/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-1a.c b/gcc/testsuite/gcc.target/i386/pr85819-1a.c
new file mode 100644
index 00000000000..db02282d100
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-1a.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mfma -mfpmath=sse" } */
+
+float
+foo (unsigned int x)
+{
+ return x;
+}
+
+/* { dg-final { scan-assembler "vfmadd132ss" { target ia32 } } } */
+/* { dg-final { scan-assembler "vcvtsi2ssq" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-1b.c b/gcc/testsuite/gcc.target/i386/pr85819-1b.c
new file mode 100644
index 00000000000..f1408c4f463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-1b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */
+
+#include "pr85819-1a.c"
+
+/* { dg-final { scan-assembler "vcvtusi2ss" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2a.c b/gcc/testsuite/gcc.target/i386/pr85819-2a.c
new file mode 100644
index 00000000000..cea599fe416
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-2a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mfma -mfpmath=sse" } */
+
+typedef float To __attribute__ ((__vector_size__ (32)));
+typedef unsigned int From __attribute__ ((__vector_size__ (32)));
+
+#define A2(I) (float)a[I], (float)a[1+I]
+#define A4(I) A2(I), A2(2+I)
+#define A8(I) A4(I), A4(4+I)
+
+To
+f(From a)
+{
+ return __extension__ (To) {A8(0)};
+}
+
+/* { dg-final { scan-assembler "vfmadd132ps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2b.c b/gcc/testsuite/gcc.target/i386/pr85819-2b.c
new file mode 100644
index 00000000000..0750e56f29e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-2b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mfpmath=sse" } */
+
+#include "pr85819-2a.c"
+
+/* { dg-final { scan-assembler "vcvtudq2ps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-2c.c b/gcc/testsuite/gcc.target/i386/pr85819-2c.c
new file mode 100644
index 00000000000..821166908da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-2c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-fma -mno-avx2 -mno-avx512vl -mavx512f -mfpmath=sse" } */
+
+#include "pr85819-2a.c"
+
+/* { dg-final { scan-assembler-not "vcvtudq2ps" } } */
+/* { dg-final { scan-assembler-not "vfmadd132ps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr85819-3.c b/gcc/testsuite/gcc.target/i386/pr85819-3.c
new file mode 100644
index 00000000000..cd3bf9b8d35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85819-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */
+
+typedef float To __attribute__ ((__vector_size__ (64)));
+typedef unsigned int From __attribute__ ((__vector_size__ (64)));
+
+#define A2(I) (float)a[I], (float)a[1+I]
+#define A4(I) A2(I), A2(2+I)
+#define A8(I) A4(I), A4(4+I)
+#define A16(I) A8(I), A8(8+I)
+
+To
+f(From a)
+{
+ return __extension__ (To) {A16(0)};
+}
+
+/* { dg-final { scan-assembler "vcvtudq2ps" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] x86: Enable FMA in unsigned SI to SF expanders
2021-09-06 12:34 ` [PATCH v2] " H.J. Lu
@ 2021-09-07 1:15 ` Hongtao Liu
0 siblings, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2021-09-07 1:15 UTC (permalink / raw)
To: H.J. Lu; +Cc: GCC Patches, liuhongt
On Mon, Sep 6, 2021 at 8:35 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Sep 5, 2021 at 10:34 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Sun, Sep 5, 2021 at 5:56 AM H.J. Lu via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Enable FMA in scalar/vector unsigned SI to SF expanders.
> > >
> > > gcc/
> > >
> > > PR target/85819
> > > * config/i386/i386-expand.c (ix86_expand_convert_uns_sisf_sse):
> > > Enable FMA.
> > > (ix86_expand_vector_convert_uns_vsivsf): Likewise.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/85819
> > > * gcc.target/i386/pr85819-1.c: New test.
> > > * gcc.target/i386/pr85819-2a.c: Likewise.
> > > * gcc.target/i386/pr85819-2b.c: Likewise.
> > > * gcc.target/i386/pr85819-2c.c: Likewise.
> > > * gcc.target/i386/pr85819-3.c: Likewise.
> > > ---
> > > gcc/config/i386/i386-expand.c | 44 ++++++++++++++++------
> > > gcc/testsuite/gcc.target/i386/pr85819-1.c | 11 ++++++
> > > gcc/testsuite/gcc.target/i386/pr85819-2a.c | 17 +++++++++
> > > gcc/testsuite/gcc.target/i386/pr85819-2b.c | 6 +++
> > > gcc/testsuite/gcc.target/i386/pr85819-2c.c | 7 ++++
> > > gcc/testsuite/gcc.target/i386/pr85819-3.c | 18 +++++++++
> > > 6 files changed, 91 insertions(+), 12 deletions(-)
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-1.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2a.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2b.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-2c.c
> > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85819-3.c
> > >
> > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> > > index 2500dbfa7fb..26263bbe1af 100644
> > > --- a/gcc/config/i386/i386-expand.c
> > > +++ b/gcc/config/i386/i386-expand.c
> > > @@ -1851,12 +1851,21 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
> > > fp_lo = gen_reg_rtx (SFmode);
> > > emit_insn (gen_floatsisf2 (fp_hi, int_hi));
> > > emit_insn (gen_floatsisf2 (fp_lo, int_lo));
> > > - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
> > > - 0, OPTAB_DIRECT);
> > > - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
> > > - 0, OPTAB_DIRECT);
> > > - if (!rtx_equal_p (target, fp_hi))
> > > - emit_move_insn (target, fp_hi);
> > > + if (TARGET_FMA || TARGET_AVX512F)
> > Looking at the expander floatunssi<mode>2, the || in the condition
> > should never be hit since we have direct vcvtsi2s[sd] instruction
> > under TARGET_AVX512F.
>
> Fixed.
>
> > > + {
> > > + x = validize_mem (force_const_mem (SFmode, x));
> > > + fp_hi = gen_rtx_FMA (SFmode, fp_hi, x, fp_lo);
> > > + emit_move_insn (target, fp_hi);
> > > + }
> > > + else
> > > + {
> > > + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
> > > + 0, OPTAB_DIRECT);
> > > + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
> > > + 0, OPTAB_DIRECT);
> > > + if (!rtx_equal_p (target, fp_hi))
> > > + emit_move_insn (target, fp_hi);
> > > + }
> > > }
> > >
> > > /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
> > > @@ -1888,12 +1897,23 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
> > > real_ldexp (&TWO16r, &dconst1, 16);
> > > tmp[5] = const_double_from_real_value (TWO16r, SFmode);
> > > tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
> > > - tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
> > > - OPTAB_DIRECT);
> > > - tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
> > > - OPTAB_DIRECT);
> > > - if (tmp[7] != target)
> > > - emit_move_insn (target, tmp[7]);
> > > + unsigned vector_size = GET_MODE_SIZE (fltmode);
> > > + if (TARGET_FMA
> > > + || (TARGET_AVX512F && vector_size == 64)
> > > + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
> > Similar here for the last two || in the condition.
>
> Fixed.
>
> > > + {
> > > + tmp[6] = gen_rtx_FMA (fltmode, tmp[4], tmp[5], tmp[3]);
> > > + emit_move_insn (target, tmp[6]);
> > > + }
> > > + else
> > > + {
> > > + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5],
> > > + NULL_RTX, 1, OPTAB_DIRECT);
> > > + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6],
> > > + target, 1, OPTAB_DIRECT);
> > > + if (tmp[7] != target)
> > > + emit_move_insn (target, tmp[7]);
> > > + }
> > > }
> > >
> > > /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
>
> Here is the v2 patch.
LGTM.
>
> --
> H.J.
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-09-07 1:10 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-04 21:55 [PATCH] x86: Enable FMA in unsigned SI to SF expanders H.J. Lu
2021-09-06 5:39 ` Hongtao Liu
2021-09-06 12:34 ` [PATCH v2] " H.J. Lu
2021-09-07 1:15 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).