* [PATCH] Fix regression introduced by r12-5536.
@ 2021-11-29 1:32 liuhongt
2021-11-29 7:53 ` Uros Bizjak
0 siblings, 1 reply; 5+ messages in thread
From: liuhongt @ 2021-11-29 1:32 UTC (permalink / raw)
To: gcc-patches
There're several failures reported in [1]:
1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
%vpextrw should be used in output templates.
2. ICE in get_attr_memory for movhi_internal since some alternatives
are marked as TYPE_SSELOG.
Explicitly set memory_attr for those alternatives.
Also this patch fixs a typo and some latent bugs which are related to
moving HImode from/to sse register w/o TARGET_AVX512FP16.
For optimization issues discussed in PR102811, I'll create another patch for
it.
[1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
Ok for trunk?
gcc/ChangeLog:
* config/i386/i386.c (ix86_secondary_reload): Without
TARGET_SSE4_1, General register is needed to move HImode from
sse register to memory.
* config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
pextrw in output templates.
* config/i386/i386.md (movhi_internal): Ditto, also fix typo of
MEM_P (operands[1]) and adjust memory/mode/prefix/type
attribute for alternatives related to sse register.
---
gcc/config/i386/i386.c | 2 +-
gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++-----------
gcc/config/i386/sse.md | 6 +++---
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3dedf522c42..7cf599f57f7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
}
/* Require movement to gpr, and then store to memory. */
- if (mode == HFmode
+ if ((mode == HFmode || mode == HImode)
&& !TARGET_SSE4_1
&& SSE_CLASS_P (rclass)
&& !in_p && MEM_P (x))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 68606e57e60..2cb3e727588 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
case TYPE_SSELOG:
if (SSE_REG_P (operands[0]))
return MEM_P (operands[1])
- ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
- : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+ ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
+ : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
else
- return MEM_P (operands[1])
- ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
- : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+ return MEM_P (operands[0])
+ ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
+ : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
case TYPE_MSKLOG:
if (operands[1] == const0_rtx)
@@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
]
(const_string "*")))
(set (attr "type")
- (cond [(eq_attr "alternative" "9,10,11,12,13")
+ (cond [(eq_attr "alternative" "9,10,12,13")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "ssemov")
(const_string "sselog"))
(eq_attr "alternative" "4,5,6,7")
(const_string "mskmov")
+ (eq_attr "alternative" "11")
+ (const_string "ssemov")
(eq_attr "alternative" "8")
(const_string "msklog")
(match_test "optimize_function_for_size_p (cfun)")
@@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "memory")
+ (cond [(eq_attr "alternative" "9,10")
+ (const_string "none")
+ (eq_attr "alternative" "12")
+ (const_string "load")
+ (eq_attr "alternative" "13")
+ (const_string "store")
+ ]
+ (const_string "*")))
(set (attr "prefix")
- (if_then_else (eq_attr "alternative" "4,5,6,7,8")
- (const_string "vex")
- (const_string "orig")))
+ (cond [(eq_attr "alternative" "9,10,11,12,13")
+ (const_string "maybe_evex")
+ (eq_attr "alternative" "4,5,6,7,8")
+ (const_string "vex")
+ ]
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
+ (eq_attr "alternative" "9,10,12,13")
+ (if_then_else (match_test "TARGET_AVX512FP16")
+ (const_string "HI")
+ (const_string "TI"))
(eq_attr "alternative" "11")
- (const_string "HF")
+ (if_then_else (match_test "TARGET_AVX512FP16")
+ (const_string "HF")
+ (const_string "SF"))
(and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand"))
(const_string "SI")
@@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal"
? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
: "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
else
- return MEM_P (operands[1])
+ return MEM_P (operands[0])
? "pextrw\t{$0, %1, %0|%0, %1, 0}"
- : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+ : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
default:
gcc_unreachable ();
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b109c2aa8fa..5229b23af98 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf"
switch (which_alternative)
{
case 0:
- return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
+ return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
case 1:
- return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
case 2:
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
@@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf"
gcc_unreachable ();
}
}
- [(set_attr "isa" "*,*,noavx,avx")
+ [(set_attr "isa" "*,sse4,noavx,avx")
(set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "TI")])
--
2.18.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix regression introduced by r12-5536.
2021-11-29 1:32 [PATCH] Fix regression introduced by r12-5536 liuhongt
@ 2021-11-29 7:53 ` Uros Bizjak
2021-11-29 9:55 ` Hongtao Liu
0 siblings, 1 reply; 5+ messages in thread
From: Uros Bizjak @ 2021-11-29 7:53 UTC (permalink / raw)
To: liuhongt; +Cc: gcc-patches, Hongtao Liu, H. J. Lu
On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> There're several failures reported in [1]:
> 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> %vpextrw should be used in output templates.
> 2. ICE in get_attr_memory for movhi_internal since some alternatives
> are marked as TYPE_SSELOG.
> Explicitly set memory_attr for those alternatives.
>
> Also this patch fixs a typo and some latent bugs which are related to
> moving HImode from/to sse register w/o TARGET_AVX512FP16.
>
> For optimization issues discussed in PR102811, I'll create another patch for
> it.
> [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
> x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.c (ix86_secondary_reload): Without
> TARGET_SSE4_1, General register is needed to move HImode from
> sse register to memory.
> * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
> pextrw in output templates.
> * config/i386/i386.md (movhi_internal): Ditto, also fix typo of
> MEM_P (operands[1]) and adjust memory/mode/prefix/type
> attribute for alternatives related to sse register.
OK, but please use sselog1 type instead so you don't need to introduce
the memory attribute.
Thanks,
Uros.
> ---
> gcc/config/i386/i386.c | 2 +-
> gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++-----------
> gcc/config/i386/sse.md | 6 +++---
> 3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3dedf522c42..7cf599f57f7 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
> }
>
> /* Require movement to gpr, and then store to memory. */
> - if (mode == HFmode
> + if ((mode == HFmode || mode == HImode)
> && !TARGET_SSE4_1
> && SSE_CLASS_P (rclass)
> && !in_p && MEM_P (x))
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 68606e57e60..2cb3e727588 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
> case TYPE_SSELOG:
> if (SSE_REG_P (operands[0]))
> return MEM_P (operands[1])
> - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
> + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> else
> - return MEM_P (operands[1])
> - ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> + return MEM_P (operands[0])
> + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
> + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
>
> case TYPE_MSKLOG:
> if (operands[1] == const0_rtx)
> @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
> ]
> (const_string "*")))
> (set (attr "type")
> - (cond [(eq_attr "alternative" "9,10,11,12,13")
> + (cond [(eq_attr "alternative" "9,10,12,13")
> (if_then_else (match_test "TARGET_AVX512FP16")
> (const_string "ssemov")
> (const_string "sselog"))
> (eq_attr "alternative" "4,5,6,7")
> (const_string "mskmov")
> + (eq_attr "alternative" "11")
> + (const_string "ssemov")
> (eq_attr "alternative" "8")
> (const_string "msklog")
> (match_test "optimize_function_for_size_p (cfun)")
> @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
> (const_string "imovx")
> ]
> (const_string "imov")))
> + (set (attr "memory")
> + (cond [(eq_attr "alternative" "9,10")
> + (const_string "none")
> + (eq_attr "alternative" "12")
> + (const_string "load")
> + (eq_attr "alternative" "13")
> + (const_string "store")
> + ]
> + (const_string "*")))
Please use sselog1 type instead, and the memory attribute will be
calculated correctly.
> (set (attr "prefix")
> - (if_then_else (eq_attr "alternative" "4,5,6,7,8")
> - (const_string "vex")
> - (const_string "orig")))
> + (cond [(eq_attr "alternative" "9,10,11,12,13")
> + (const_string "maybe_evex")
> + (eq_attr "alternative" "4,5,6,7,8")
> + (const_string "vex")
> + ]
> + (const_string "orig")))
> (set (attr "mode")
> (cond [(eq_attr "type" "imovx")
> (const_string "SI")
> + (eq_attr "alternative" "9,10,12,13")
> + (if_then_else (match_test "TARGET_AVX512FP16")
> + (const_string "HI")
> + (const_string "TI"))
> (eq_attr "alternative" "11")
> - (const_string "HF")
> + (if_then_else (match_test "TARGET_AVX512FP16")
> + (const_string "HF")
> + (const_string "SF"))
> (and (eq_attr "alternative" "1,2")
> (match_operand:HI 1 "aligned_operand"))
> (const_string "SI")
> @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal"
> ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> else
> - return MEM_P (operands[1])
> + return MEM_P (operands[0])
> ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> + : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
>
> default:
> gcc_unreachable ();
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index b109c2aa8fa..5229b23af98 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf"
> switch (which_alternative)
> {
> case 0:
> - return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> case 1:
> - return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
> + return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
>
> case 2:
> operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
> @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf"
> gcc_unreachable ();
> }
> }
> - [(set_attr "isa" "*,*,noavx,avx")
> + [(set_attr "isa" "*,sse4,noavx,avx")
> (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
> (set_attr "prefix" "maybe_evex")
> (set_attr "mode" "TI")])
> --
> 2.18.1
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix regression introduced by r12-5536.
2021-11-29 7:53 ` Uros Bizjak
@ 2021-11-29 9:55 ` Hongtao Liu
2021-11-29 21:20 ` Uros Bizjak
0 siblings, 1 reply; 5+ messages in thread
From: Hongtao Liu @ 2021-11-29 9:55 UTC (permalink / raw)
To: Uros Bizjak; +Cc: liuhongt, gcc-patches, H. J. Lu
On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > There're several failures reported in [1]:
> > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > %vpextrw should be used in output templates.
> > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > are marked as TYPE_SSELOG.
> > Explicitly set memory_attr for those alternatives.
> >
> > Also this patch fixs a typo and some latent bugs which are related to
> > moving HImode from/to sse register w/o TARGET_AVX512FP16.
> >
> > For optimization issues discussed in PR102811, I'll create another patch for
> > it.
> > [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html
> >
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
> > x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > * config/i386/i386.c (ix86_secondary_reload): Without
> > TARGET_SSE4_1, General register is needed to move HImode from
> > sse register to memory.
> > * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
> > pextrw in output templates.
> > * config/i386/i386.md (movhi_internal): Ditto, also fix typo of
> > MEM_P (operands[1]) and adjust memory/mode/prefix/type
> > attribute for alternatives related to sse register.
>
> OK, but please use sselog1 type instead so you don't need to introduce
> the memory attribute.
Changed, committed as r12-5573
thanks for the review.
>
> Thanks,
> Uros.
>
> > ---
> > gcc/config/i386/i386.c | 2 +-
> > gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++-----------
> > gcc/config/i386/sse.md | 6 +++---
> > 3 files changed, 36 insertions(+), 16 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 3dedf522c42..7cf599f57f7 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
> > }
> >
> > /* Require movement to gpr, and then store to memory. */
> > - if (mode == HFmode
> > + if ((mode == HFmode || mode == HImode)
> > && !TARGET_SSE4_1
> > && SSE_CLASS_P (rclass)
> > && !in_p && MEM_P (x))
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 68606e57e60..2cb3e727588 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
> > case TYPE_SSELOG:
> > if (SSE_REG_P (operands[0]))
> > return MEM_P (operands[1])
> > - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> > - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> > + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
> > + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> > else
> > - return MEM_P (operands[1])
> > - ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> > + return MEM_P (operands[0])
> > + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
> > + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
> >
> > case TYPE_MSKLOG:
> > if (operands[1] == const0_rtx)
> > @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
> > ]
> > (const_string "*")))
> > (set (attr "type")
> > - (cond [(eq_attr "alternative" "9,10,11,12,13")
> > + (cond [(eq_attr "alternative" "9,10,12,13")
> > (if_then_else (match_test "TARGET_AVX512FP16")
> > (const_string "ssemov")
> > (const_string "sselog"))
> > (eq_attr "alternative" "4,5,6,7")
> > (const_string "mskmov")
> > + (eq_attr "alternative" "11")
> > + (const_string "ssemov")
> > (eq_attr "alternative" "8")
> > (const_string "msklog")
> > (match_test "optimize_function_for_size_p (cfun)")
> > @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
> > (const_string "imovx")
> > ]
> > (const_string "imov")))
> > + (set (attr "memory")
> > + (cond [(eq_attr "alternative" "9,10")
> > + (const_string "none")
> > + (eq_attr "alternative" "12")
> > + (const_string "load")
> > + (eq_attr "alternative" "13")
> > + (const_string "store")
> > + ]
> > + (const_string "*")))
>
> Please use sselog1 type instead, and the memory attribute will be
> calculated correctly.
>
> > (set (attr "prefix")
> > - (if_then_else (eq_attr "alternative" "4,5,6,7,8")
> > - (const_string "vex")
> > - (const_string "orig")))
> > + (cond [(eq_attr "alternative" "9,10,11,12,13")
> > + (const_string "maybe_evex")
> > + (eq_attr "alternative" "4,5,6,7,8")
> > + (const_string "vex")
> > + ]
> > + (const_string "orig")))
> > (set (attr "mode")
> > (cond [(eq_attr "type" "imovx")
> > (const_string "SI")
> > + (eq_attr "alternative" "9,10,12,13")
> > + (if_then_else (match_test "TARGET_AVX512FP16")
> > + (const_string "HI")
> > + (const_string "TI"))
> > (eq_attr "alternative" "11")
> > - (const_string "HF")
> > + (if_then_else (match_test "TARGET_AVX512FP16")
> > + (const_string "HF")
> > + (const_string "SF"))
> > (and (eq_attr "alternative" "1,2")
> > (match_operand:HI 1 "aligned_operand"))
> > (const_string "SI")
> > @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal"
> > ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> > : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> > else
> > - return MEM_P (operands[1])
> > + return MEM_P (operands[0])
> > ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> > + : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
> >
> > default:
> > gcc_unreachable ();
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index b109c2aa8fa..5229b23af98 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf"
> > switch (which_alternative)
> > {
> > case 0:
> > - return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> > + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> > case 1:
> > - return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
> > + return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
> >
> > case 2:
> > operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
> > @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf"
> > gcc_unreachable ();
> > }
> > }
> > - [(set_attr "isa" "*,*,noavx,avx")
> > + [(set_attr "isa" "*,sse4,noavx,avx")
> > (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
> > (set_attr "prefix" "maybe_evex")
> > (set_attr "mode" "TI")])
> > --
> > 2.18.1
> >
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix regression introduced by r12-5536.
2021-11-29 9:55 ` Hongtao Liu
@ 2021-11-29 21:20 ` Uros Bizjak
2021-11-30 1:31 ` Hongtao Liu
0 siblings, 1 reply; 5+ messages in thread
From: Uros Bizjak @ 2021-11-29 21:20 UTC (permalink / raw)
To: Hongtao Liu; +Cc: liuhongt, gcc-patches, H. J. Lu
[-- Attachment #1: Type: text/plain, Size: 2045 bytes --]
On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > There're several failures reported in [1]:
> > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > > %vpextrw should be used in output templates.
> > > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > > are marked as TYPE_SSELOG.
> > > Explicitly set memory_attr for those alternatives.
> > >
> > > Also this patch fixs a typo and some latent bugs which are related to
> > > moving HImode from/to sse register w/o TARGET_AVX512FP16.
Here are some more fixes:
i386: Fix and improve movhi_internal and movhf_internal some more.
An (*v,C) alternative can be added to movhi_internal to directly load
HImode constant 0 to xmm register. Also, V4SFmode moves can be used
for xmm->xmm moves instead of TImode moves when optimizing for size.
Fix invalid %vpinsrw insn template, which needs to duplicate %xmm
register for AVX targets.
Optimize GPR moves in movhf_internal in the same way as in movhi_internal.
Fix pinsrw and pextrw templates for AVX targets. Use sselog1
instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY
and TARGET_SSE_SPLIT_REGS targets.
2021-11-29 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/102811
* config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative.
Do not allocate non-GPR registers. Optimize xmm->xmm moves when
optimizing for size. Fix vpinsrw insn template.
(*movhf_internal): Fix pinsrw and pextrw insn templates for
AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves.
Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY
and TARGET_SSE_SPLIT_REGS targets.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and
w/o -mf16c.
Pushed to master.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 10249 bytes --]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a384dae23e2..c88374c9d2b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2495,11 +2495,12 @@
(symbol_ref "true")))])
(define_insn "*movhi_internal"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m")
- (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand"
+ "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m")
+ (match_operand:HI 1 "general_operand"
+ "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
-
{
switch (get_attr_type (insn))
{
@@ -2526,10 +2527,13 @@
return ix86_output_ssemov (insn, operands);
case TYPE_SSELOG1:
+ if (satisfies_constraint_C (operands[1]))
+ return standard_sse_constant_opcode (insn, operands);
+
if (SSE_REG_P (operands[0]))
return MEM_P (operands[1])
- ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
- : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+ ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
+ : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
else
return MEM_P (operands[0])
? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
@@ -2550,23 +2554,25 @@
}
}
[(set (attr "isa")
- (cond [(eq_attr "alternative" "9,10,11,12")
+ (cond [(eq_attr "alternative" "9,10,11,12,13")
(const_string "sse2")
- (eq_attr "alternative" "13")
+ (eq_attr "alternative" "14")
(const_string "sse4")
]
(const_string "*")))
(set (attr "type")
- (cond [(eq_attr "alternative" "9,10,12,13")
+ (cond [(eq_attr "alternative" "4,5,6,7")
+ (const_string "mskmov")
+ (eq_attr "alternative" "8")
+ (const_string "msklog")
+ (eq_attr "alternative" "9,10,13,14")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "ssemov")
(const_string "sselog1"))
- (eq_attr "alternative" "4,5,6,7")
- (const_string "mskmov")
(eq_attr "alternative" "11")
+ (const_string "sselog1")
+ (eq_attr "alternative" "12")
(const_string "ssemov")
- (eq_attr "alternative" "8")
- (const_string "msklog")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "imov")
(and (eq_attr "alternative" "0")
@@ -2581,33 +2587,54 @@
(const_string "imovx")
]
(const_string "imov")))
- (set (attr "prefix")
- (cond [(eq_attr "alternative" "9,10,11,12,13")
- (const_string "maybe_evex")
- (eq_attr "alternative" "4,5,6,7,8")
- (const_string "vex")
- ]
- (const_string "orig")))
- (set (attr "mode")
- (cond [(eq_attr "type" "imovx")
- (const_string "SI")
- (eq_attr "alternative" "9,10,12,13")
- (if_then_else (match_test "TARGET_AVX512FP16")
- (const_string "HI")
- (const_string "TI"))
- (eq_attr "alternative" "11")
- (if_then_else (match_test "TARGET_AVX512FP16")
- (const_string "HF")
- (const_string "SF"))
- (and (eq_attr "alternative" "1,2")
- (match_operand:HI 1 "aligned_operand"))
- (const_string "SI")
- (and (eq_attr "alternative" "0")
- (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
- (not (match_test "TARGET_HIMODE_MATH"))))
- (const_string "SI")
+ (set (attr "prefix")
+ (cond [(eq_attr "alternative" "4,5,6,7,8")
+ (const_string "vex")
+ (eq_attr "alternative" "9,10,11,12,13,14")
+ (const_string "maybe_evex")
+ ]
+ (const_string "orig")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "9,10,13,14")
+ (if_then_else (match_test "TARGET_AVX512FP16")
+ (const_string "HI")
+ (const_string "TI"))
+ (eq_attr "alternative" "11")
+ (cond [(match_test "TARGET_AVX")
+ (const_string "TI")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "V4SF")
+ ]
+ (const_string "TI"))
+ (eq_attr "alternative" "12")
+ (cond [(match_test "TARGET_AVX512FP16")
+ (const_string "HI")
+ (match_test "TARGET_AVX")
+ (const_string "TI")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "V4SF")
+ ]
+ (const_string "TI"))
+ (eq_attr "type" "imovx")
+ (const_string "SI")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand"))
+ (const_string "SI")
+ (and (eq_attr "alternative" "0")
+ (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+ (not (match_test "TARGET_HIMODE_MATH"))))
+ (const_string "SI")
]
- (const_string "HI")))])
+ (const_string "HI")))
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "9")
+ (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+ (eq_attr "alternative" "10")
+ (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+ ]
+ (symbol_ref "true")))])
;; Situation is quite tricky about when to choose full sized (SImode) move
;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
@@ -3774,92 +3801,110 @@
(define_insn "*movhf_internal"
[(set (match_operand:HF 0 "nonimmediate_operand"
- "=?r,?m,v,v,?r,m,?v,v")
+ "=?r,?r,?r,?m,v,v,?r,m,?v,v")
(match_operand:HF 1 "general_operand"
- "rmF,rF,C,v, v,v, r,m"))]
+ "r ,F ,m ,rF,C,v, v,v,r ,m"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (lra_in_progress
|| reload_completed
|| !CONST_DOUBLE_P (operands[1])
- || (TARGET_SSE && TARGET_SSE_MATH
+ || (TARGET_SSE2
&& standard_sse_constant_p (operands[1], HFmode) == 1)
|| memory_operand (operands[0], HFmode))"
{
switch (get_attr_type (insn))
{
- case TYPE_IMOV:
- return "mov{w}\t{%1, %0|%0, %1}";
-
- case TYPE_SSELOG1:
- return standard_sse_constant_opcode (insn, operands);
+ case TYPE_IMOVX:
+ /* movzwl is faster than movw on p2 due to partial word stalls,
+ though not as fast as an aligned movl. */
+ return "movz{wl|x}\t{%1, %k0|%k0, %1}";
case TYPE_SSEMOV:
return ix86_output_ssemov (insn, operands);
- case TYPE_SSELOG:
+ case TYPE_SSELOG1:
+ if (satisfies_constraint_C (operands[1]))
+ return standard_sse_constant_opcode (insn, operands);
+
if (SSE_REG_P (operands[0]))
return MEM_P (operands[1])
- ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
- : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+ ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
+ : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
else
return MEM_P (operands[0])
- ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
- : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
+ ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
+ : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
default:
- gcc_unreachable ();
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else
+ return "mov{w}\t{%1, %0|%0, %1}";
}
}
[(set (attr "isa")
- (cond [(eq_attr "alternative" "2,3,4,6,7")
+ (cond [(eq_attr "alternative" "4,5,6,8,9")
(const_string "sse2")
- (eq_attr "alternative" "5")
+ (eq_attr "alternative" "7")
(const_string "sse4")
]
(const_string "*")))
(set (attr "type")
- (cond [(eq_attr "alternative" "0,1")
- (const_string "imov")
- (eq_attr "alternative" "2")
+ (cond [(eq_attr "alternative" "4")
(const_string "sselog1")
- (eq_attr "alternative" "4,5,6,7")
+ (eq_attr "alternative" "5")
+ (const_string "ssemov")
+ (eq_attr "alternative" "6,7,8,9")
(if_then_else
(match_test ("TARGET_AVX512FP16"))
(const_string "ssemov")
- (const_string "sselog"))
- ]
- (const_string "ssemov")))
- (set (attr "memory")
- (cond [(eq_attr "alternative" "4,6")
- (const_string "none")
- (eq_attr "alternative" "5")
- (const_string "store")
- (eq_attr "alternative" "7")
- (const_string "load")
- ]
- (const_string "*")))
+ (const_string "sselog1"))
+ (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "imov")
+ (and (eq_attr "alternative" "0")
+ (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+ (not (match_test "TARGET_HIMODE_MATH"))))
+ (const_string "imov")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand"))
+ (const_string "imov")
+ (and (match_test "TARGET_MOVX")
+ (eq_attr "alternative" "0,2"))
+ (const_string "imovx")
+ ]
+ (const_string "imov")))
(set (attr "prefix")
- (cond [(eq_attr "alternative" "0,1")
- (const_string "orig")
+ (cond [(eq_attr "alternative" "4,5,6,7,8,9")
+ (const_string "maybe_vex")
]
- (const_string "maybe_vex")))
+ (const_string "orig")))
(set (attr "mode")
- (cond [(eq_attr "alternative" "0,1")
- (const_string "HI")
- (eq_attr "alternative" "2")
+ (cond [(eq_attr "alternative" "4")
(const_string "V4SF")
- (eq_attr "alternative" "4,5,6,7")
+ (eq_attr "alternative" "6,7,8,9")
(if_then_else
(match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "TI"))
- (eq_attr "alternative" "3")
- (if_then_else
- (match_test "TARGET_AVX512FP16")
- (const_string "HF")
- (const_string "SF"))
+ (eq_attr "alternative" "5")
+ (cond [(match_test "TARGET_AVX512FP16")
+ (const_string "HF")
+ (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (match_test "TARGET_SSE_SPLIT_REGS"))
+ (const_string "V4SF")
+ ]
+ (const_string "SF"))
+ (eq_attr "type" "imovx")
+ (const_string "SI")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand"))
+ (const_string "SI")
+ (and (eq_attr "alternative" "0")
+ (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+ (not (match_test "TARGET_HIMODE_MATH"))))
+ (const_string "SI")
]
- (const_string "*")))])
+ (const_string "HI")))])
(define_split
[(set (match_operand 0 "any_fp_register_operand")
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix regression introduced by r12-5536.
2021-11-29 21:20 ` Uros Bizjak
@ 2021-11-30 1:31 ` Hongtao Liu
0 siblings, 0 replies; 5+ messages in thread
From: Hongtao Liu @ 2021-11-30 1:31 UTC (permalink / raw)
To: Uros Bizjak; +Cc: liuhongt, gcc-patches, H. J. Lu
On Tue, Nov 30, 2021 at 5:21 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > > There're several failures reported in [1]:
> > > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > > > %vpextrw should be used in output templates.
> > > > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > > > are marked as TYPE_SSELOG.
> > > > Explicitly set memory_attr for those alternatives.
> > > >
> > > > Also this patch fixs a typo and some latent bugs which are related to
> > > > moving HImode from/to sse register w/o TARGET_AVX512FP16.
>
> Here are some more fixes:
>
thanks.
> i386: Fix and improve movhi_internal and movhf_internal some more.
>
> An (*v,C) alternative can be added to movhi_internal to directly load
> HImode constant 0 to xmm register. Also, V4SFmode moves can be used
> for xmm->xmm moves instead of TImode moves when optimizing for size.
> Fix invalid %vpinsrw insn template, which needs to duplicate %xmm
> register for AVX targets.
>
> Optimize GPR moves in movhf_internal in the same way as in movhi_internal.
> Fix pinsrw and pextrw templates for AVX targets. Use sselog1
> instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY
> and TARGET_SSE_SPLIT_REGS targets.
>
> 2021-11-29 Uroš Bizjak <ubizjak@gmail.com>
>
> gcc/ChangeLog:
>
> PR target/102811
> * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative.
> Do not allocate non-GPR registers. Optimize xmm->xmm moves when
> optimizing for size. Fix vpinsrw insn template.
> (*movhf_internal): Fix pinsrw and pextrw insn templates for
> AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves.
> Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY
> and TARGET_SSE_SPLIT_REGS targets.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and
> w/o -mf16c.
>
> Pushed to master.
>
> Uros.
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-11-30 1:24 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-29 1:32 [PATCH] Fix regression introduced by r12-5536 liuhongt
2021-11-29 7:53 ` Uros Bizjak
2021-11-29 9:55 ` Hongtao Liu
2021-11-29 21:20 ` Uros Bizjak
2021-11-30 1:31 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).