* [PATCH 1/7] [x86] Add more splitters to match (unspec [op1 op2 (gt op3 constm1_operand)] UNSPEC_BLENDV)
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 8:23 ` [PATCH 2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true,false} is vector -1/0 liuhongt
` (6 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
These define_insn_and_split are needed after vcond{,u,eq} is obsolete.
gcc/ChangeLog:
PR target/115517
* config/i386/sse.md
(*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_gt): New
define_insn_and_split.
(*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_gtint):
Ditto.
(*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_gtint):
Ditto.
(*<sse4_1_avx2>_pblendvb_gt): Ditto.
(*<sse4_1_avx2>_pblendvb_gt_subreg_not): Ditto.
---
gcc/config/i386/sse.md | 130 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 130 insertions(+)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0be2dcd8891..1148ac84f3d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -23016,6 +23016,32 @@ (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_gt"
+ [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
+ (unspec:VF_128_256
+ [(match_operand:VF_128_256 1 "vector_operand" "Yrja,*xja,xjm")
+ (match_operand:VF_128_256 2 "register_operand" "0,0,x")
+ (gt:VF_128_256
+ (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
+ (match_operand:<sseintvecmode> 4 "vector_all_ones_operand"))]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:VF_128_256
+ [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
+ "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "addr" "gpr16")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix_data16" "1,1,*")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
+ (set_attr "mode" "<MODE>")])
+
(define_mode_attr ssefltmodesuffix
[(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")
(V2DF "pd") (V4DF "pd") (V4SF "ps") (V8SF "ps")])
@@ -23055,6 +23081,38 @@ (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<ssefltvecmode>")])
+(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_gtint"
+ [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
+ (unspec:<ssebytemode>
+ [(match_operand:<ssebytemode> 1 "vector_operand" "Yrja,*xja,xjm")
+ (match_operand:<ssebytemode> 2 "register_operand" "0,0,x")
+ (subreg:<ssebytemode>
+ (gt:VI48_AVX
+ (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
+ (match_operand:VI48_AVX 4 "vector_all_ones_operand")) 0)]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:<ssefltvecmode>
+ [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
+{
+ operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
+ operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
+ operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
+ operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
+}
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "addr" "gpr16")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix_data16" "1,1,*")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
+ (set_attr "mode" "<ssefltvecmode>")])
+
;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask;
(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint"
[(set (match_operand:<ssebytemode> 0 "register_operand")
@@ -23082,6 +23140,32 @@ (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_lt
operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
})
+(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_gtint"
+ [(set (match_operand:<ssebytemode> 0 "register_operand")
+ (unspec:<ssebytemode>
+ [(match_operand:<ssebytemode> 1 "vector_operand")
+ (match_operand:<ssebytemode> 2 "register_operand")
+ (subreg:<ssebytemode>
+ (gt:VI48_AVX
+ (subreg:VI48_AVX
+ (not:<ssebytemode>
+ (match_operand:<ssebytemode> 3 "register_operand")) 0)
+ (match_operand:VI48_AVX 4 "vector_all_ones_operand")) 0)]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<ssefltvecmode>
+ [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
+{
+ operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
+ operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
+ operands[1] = force_reg (<ssefltvecmode>mode,
+ gen_lowpart (<ssefltvecmode>mode, operands[1]));
+ operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
+})
+
(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
@@ -23236,6 +23320,30 @@ (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<sse4_1_avx2>_pblendvb_gt"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
+ (unspec:VI1_AVX2
+ [(match_operand:VI1_AVX2 1 "vector_operand" "Yrja,*xja,xjm")
+ (match_operand:VI1_AVX2 2 "register_operand" "0,0,x")
+ (gt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
+ (match_operand:VI1_AVX2 4 "vector_all_ones_operand"))]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VI1_AVX2
+ [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
+ ""
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "addr" "gpr16")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "*,*,1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(unspec:VI1_AVX2
@@ -23258,6 +23366,28 @@ (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
(lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
"operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
+(define_insn_and_split "*<sse4_1_avx2>_pblendvb_gt_subreg_not"
+ [(set (match_operand:VI1_AVX2 0 "register_operand")
+ (unspec:VI1_AVX2
+ [(match_operand:VI1_AVX2 2 "register_operand")
+ (match_operand:VI1_AVX2 1 "vector_operand")
+ (gt:VI1_AVX2
+ (subreg:VI1_AVX2
+ (not (match_operand 3 "register_operand")) 0)
+ (match_operand:VI1_AVX2 4 "vector_all_ones_operand"))]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1
+ && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VI1_AVX2
+ [(match_dup 1) (match_dup 2)
+ (gt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
+ "operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
+
(define_insn "sse4_1_pblend<ssemodesuffix>"
[(set (match_operand:V8_128 0 "register_operand" "=Yr,*x,x")
(vec_merge:V8_128
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true,false} is vector -1/0.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
2024-06-27 8:23 ` [PATCH 1/7] [x86] Add more splitters to match (unspec [op1 op2 (gt op3 constm1_operand)] UNSPEC_BLENDV) liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 10:10 ` [PATCH 2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true, false} " Richard Biener
2024-06-27 8:23 ` [PATCH 3/7] [x86] Match IEEE min/max with UNSPEC_IEEE_{MIN,MAX} liuhongt
` (5 subsequent siblings)
7 siblings, 1 reply; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
gcc/ChangeLog
PR target/115517
* config/i386/sse.md
(*<avx512>_cvtmask2<ssemodesuffix><mode>_not): New pre_reload
splitter.
(*<avx512>_cvtmask2<ssemodesuffix><mode>_not): Ditto.
(*avx2_pcmp<mode>3_6): Ditto.
(*avx2_pcmp<mode>3_7): Ditto.
---
gcc/config/i386/sse.md | 97 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1148ac84f3d..822159a869b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9986,6 +9986,24 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand")
+ (vec_merge:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 2 "const0_operand")
+ (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand")))]
+ "TARGET_AVX512BW && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 4)
+ (not:<avx512fmaskmode> (match_dup 1)))
+ (set (match_dup 0)
+ (vec_merge:VI12_AVX512VL
+ (match_dup 3)
+ (match_dup 2)
+ (match_dup 4)))]
+ "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
+
(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand")
(vec_merge:VI48_AVX512VL
@@ -10024,6 +10042,24 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+ (vec_merge:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 2 "const0_operand")
+ (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand")))]
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 4)
+ (not:<avx512fmaskmode> (match_dup 1)))
+ (set (match_dup 0)
+ (vec_merge:VI48_AVX512VL
+ (match_dup 3)
+ (match_dup 2)
+ (match_dup 4)))]
+ "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
+
(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog_false_dep"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
@@ -17675,6 +17711,67 @@ (define_insn_and_split "*avx2_pcmp<mode>3_5"
std::swap (operands[1], operands[2]);
})
+(define_int_attr pcmp_usmin
+ [(UNSPEC_PCMP "smin") (UNSPEC_UNSIGNED_PCMP "umin")])
+
+(define_insn_and_split "*avx2_pcmp<mode>3_6"
+ [(set (match_operand:VI_128_256 0 "register_operand")
+ (vec_merge:VI_128_256
+ (match_operand:VI_128_256 1 "vector_all_ones_operand")
+ (match_operand:VI_128_256 2 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI_128_256 3 "nonimmediate_operand")
+ (match_operand:VI_128_256 4 "nonimmediate_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP_ITER)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ && (INTVAL (operands[5]) == 2 || INTVAL (operands[5]) == 5)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx dst_min = gen_reg_rtx (<MODE>mode);
+
+ if (MEM_P (operands[3]) && MEM_P (operands[4]))
+ operands[3] = force_reg (<MODE>mode, operands[3]);
+ emit_insn (gen_<pcmp_usmin><mode>3 (dst_min, operands[3], operands[4]));
+ rtx eq_op = INTVAL (operands[5]) == 2 ? operands[3] : operands[4];
+ emit_move_insn (operands[0], gen_rtx_EQ (<MODE>mode, eq_op, dst_min));
+ DONE;
+})
+
+(define_insn_and_split "*avx2_pcmp<mode>3_7"
+ [(set (match_operand:VI_128_256 0 "register_operand")
+ (vec_merge:VI_128_256
+ (match_operand:VI_128_256 1 "const0_operand")
+ (match_operand:VI_128_256 2 "vector_all_ones_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI_128_256 3 "nonimmediate_operand")
+ (match_operand:VI_128_256 4 "nonimmediate_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP_ITER)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ /* NE is commutative. */
+ && (INTVAL (operands[5]) == 4
+ /* LE, 3 must be register. */
+ || INTVAL (operands[5]) == 2
+ /* NLT aka GE, 4 must be register and we swap operands. */
+ || INTVAL (operands[5]) == 5)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (INTVAL (operands[5]) == 5)
+ std::swap (operands[3], operands[4]);
+
+ if (MEM_P (operands[3]))
+ operands[3] = force_reg (<MODE>mode, operands[3]);
+ enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
+ emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(unspec:<avx512fmaskmode>
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true, false} is vector -1/0.
2024-06-27 8:23 ` [PATCH 2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true,false} is vector -1/0 liuhongt
@ 2024-06-27 10:10 ` Richard Biener
0 siblings, 0 replies; 11+ messages in thread
From: Richard Biener @ 2024-06-27 10:10 UTC (permalink / raw)
To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools
On Thu, Jun 27, 2024 at 10:30 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> gcc/ChangeLog
In PR115659 Kewen notes that ISEL (and possibly folding) could do a
better job with
these. In addition to the mentioned issues we can also try whether the target
can handle an alternate mask mode. So instead of gating with
/* Try to fold x CMP y ? -1 : 0 to x CMP y. */
if (can_compute_op0
&& integer_minus_onep (op1)
&& integer_zerop (op2)
&& TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
when TYPE_MODE (TREE_TYPE (lhs)) != TYPE_MODE (TREE_TYPE (op0)) see
if when we do
build_truth_vector_type_for_mode (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)),
TYPE_MODE
(TREE_TYPE (op0));
and use that for the LHS type whether we can handle the resulting compare
(can_compute_op0 with this mode) and rewrite it accordingly to make
the transform.
Richard.
> PR target/115517
> * config/i386/sse.md
> (*<avx512>_cvtmask2<ssemodesuffix><mode>_not): New pre_reload
> splitter.
> (*<avx512>_cvtmask2<ssemodesuffix><mode>_not): Ditto.
> (*avx2_pcmp<mode>3_6): Ditto.
> (*avx2_pcmp<mode>3_7): Ditto.
> ---
> gcc/config/i386/sse.md | 97 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 97 insertions(+)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 1148ac84f3d..822159a869b 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -9986,6 +9986,24 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
> [(set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
> + [(set (match_operand:VI12_AVX512VL 0 "register_operand")
> + (vec_merge:VI12_AVX512VL
> + (match_operand:VI12_AVX512VL 2 "const0_operand")
> + (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand")
> + (match_operand:<avx512fmaskmode> 1 "register_operand")))]
> + "TARGET_AVX512BW && ix86_pre_reload_split ()"
> + "#"
> + "&& 1"
> + [(set (match_dup 4)
> + (not:<avx512fmaskmode> (match_dup 1)))
> + (set (match_dup 0)
> + (vec_merge:VI12_AVX512VL
> + (match_dup 3)
> + (match_dup 2)
> + (match_dup 4)))]
> + "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
> +
> (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
> [(set (match_operand:VI48_AVX512VL 0 "register_operand")
> (vec_merge:VI48_AVX512VL
> @@ -10024,6 +10042,24 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
> (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
> + [(set (match_operand:VI48_AVX512VL 0 "register_operand")
> + (vec_merge:VI48_AVX512VL
> + (match_operand:VI48_AVX512VL 2 "const0_operand")
> + (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand")
> + (match_operand:<avx512fmaskmode> 1 "register_operand")))]
> + "TARGET_AVX512F && ix86_pre_reload_split ()"
> + "#"
> + "&& 1"
> + [(set (match_dup 4)
> + (not:<avx512fmaskmode> (match_dup 1)))
> + (set (match_dup 0)
> + (vec_merge:VI48_AVX512VL
> + (match_dup 3)
> + (match_dup 2)
> + (match_dup 4)))]
> + "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
> +
> (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog_false_dep"
> [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
> (vec_merge:VI48_AVX512VL
> @@ -17675,6 +17711,67 @@ (define_insn_and_split "*avx2_pcmp<mode>3_5"
> std::swap (operands[1], operands[2]);
> })
>
> +(define_int_attr pcmp_usmin
> + [(UNSPEC_PCMP "smin") (UNSPEC_UNSIGNED_PCMP "umin")])
> +
> +(define_insn_and_split "*avx2_pcmp<mode>3_6"
> + [(set (match_operand:VI_128_256 0 "register_operand")
> + (vec_merge:VI_128_256
> + (match_operand:VI_128_256 1 "vector_all_ones_operand")
> + (match_operand:VI_128_256 2 "const0_operand")
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI_128_256 3 "nonimmediate_operand")
> + (match_operand:VI_128_256 4 "nonimmediate_operand")
> + (match_operand:SI 5 "const_0_to_7_operand")]
> + UNSPEC_PCMP_ITER)))]
> + "TARGET_AVX512VL && ix86_pre_reload_split ()
> + && (INTVAL (operands[5]) == 2 || INTVAL (operands[5]) == 5)"
> + "#"
> + "&& 1"
> + [(const_int 0)]
> +{
> + rtx dst_min = gen_reg_rtx (<MODE>mode);
> +
> + if (MEM_P (operands[3]) && MEM_P (operands[4]))
> + operands[3] = force_reg (<MODE>mode, operands[3]);
> + emit_insn (gen_<pcmp_usmin><mode>3 (dst_min, operands[3], operands[4]));
> + rtx eq_op = INTVAL (operands[5]) == 2 ? operands[3] : operands[4];
> + emit_move_insn (operands[0], gen_rtx_EQ (<MODE>mode, eq_op, dst_min));
> + DONE;
> +})
> +
> +(define_insn_and_split "*avx2_pcmp<mode>3_7"
> + [(set (match_operand:VI_128_256 0 "register_operand")
> + (vec_merge:VI_128_256
> + (match_operand:VI_128_256 1 "const0_operand")
> + (match_operand:VI_128_256 2 "vector_all_ones_operand")
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI_128_256 3 "nonimmediate_operand")
> + (match_operand:VI_128_256 4 "nonimmediate_operand")
> + (match_operand:SI 5 "const_0_to_7_operand")]
> + UNSPEC_PCMP_ITER)))]
> + "TARGET_AVX512VL && ix86_pre_reload_split ()
> + /* NE is commutative. */
> + && (INTVAL (operands[5]) == 4
> + /* LE, 3 must be register. */
> + || INTVAL (operands[5]) == 2
> + /* NLT aka GE, 4 must be register and we swap operands. */
> + || INTVAL (operands[5]) == 5)"
> + "#"
> + "&& 1"
> + [(const_int 0)]
> +{
> + if (INTVAL (operands[5]) == 5)
> + std::swap (operands[3], operands[4]);
> +
> + if (MEM_P (operands[3]))
> + operands[3] = force_reg (<MODE>mode, operands[3]);
> + enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
> + emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
> + operands[3], operands[4]));
> + DONE;
> +})
> +
> (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
> [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
> (unspec:<avx512fmaskmode>
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 3/7] [x86] Match IEEE min/max with UNSPEC_IEEE_{MIN,MAX}.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
2024-06-27 8:23 ` [PATCH 1/7] [x86] Add more splitters to match (unspec [op1 op2 (gt op3 constm1_operand)] UNSPEC_BLENDV) liuhongt
2024-06-27 8:23 ` [PATCH 2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true,false} is vector -1/0 liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 8:23 ` [PATCH 4/7] Add more splitter for mskmov with avx512 comparison liuhongt
` (4 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
These versions of the min/max patterns implement exactly the operations
min = (op1 < op2 ? op1 : op2)
max = (!(op1 < op2) ? op1 : op2)
gcc/ChangeLog:
PR target/115517
* config/i386/sse.md (*minmax<mode>3_1): New pre_reload
define_insn_and_split.
(*minmax<mode>3_2): Ditto.
---
gcc/config/i386/sse.md | 63 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 822159a869b..92f8b74999f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3064,6 +3064,69 @@ (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*minmax<mode>3_1"
+ [(set (match_operand:VFH 0 "register_operand")
+ (vec_merge:VFH
+ (match_operand:VFH 1 "nonimmediate_operand")
+ (match_operand:VFH 2 "nonimmediate_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VFH 3 "nonimmediate_operand")
+ (match_operand:VFH 4 "nonimmediate_operand")
+ (match_operand:SI 5 "const_0_to_31_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_SSE && ix86_pre_reload_split ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))
+ && (INTVAL (operands[5]) == 1 || INTVAL (operands[5]) == 14)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ int u = UNSPEC_IEEE_MIN;
+ if ((INTVAL (operands[5]) == 1 && rtx_equal_p (operands[1], operands[4]))
+ || (INTVAL (operands[5]) == 14 && rtx_equal_p (operands[1], operands[3])))
+ u = UNSPEC_IEEE_MAX;
+
+ if (MEM_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ rtvec v = gen_rtvec (2, operands[1], operands[2]);
+ rtx tmp = gen_rtx_UNSPEC (<MODE>mode, v, u);
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ })
+
+(define_insn_and_split "*minmax<mode>3_2"
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (unspec:VF_128_256
+ [(match_operand:VF_128_256 1 "nonimmediate_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand")
+ (lt:VF_128_256
+ (match_operand:VF_128_256 3 "nonimmediate_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand"))]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE && ix86_pre_reload_split ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ int u = UNSPEC_IEEE_MIN;
+ if (rtx_equal_p (operands[1], operands[3]))
+ u = UNSPEC_IEEE_MAX;
+
+ if (MEM_P (operands[2]))
+ force_reg (<MODE>mode, operands[2]);
+ rtvec v = gen_rtvec (2, operands[2], operands[1]);
+ rtx tmp = gen_rtx_UNSPEC (<MODE>mode, v, u);
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ })
+
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
;; max = (!(op1 < op2) ? op1 : op2)
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 4/7] Add more splitter for mskmov with avx512 comparison.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
` (2 preceding siblings ...)
2024-06-27 8:23 ` [PATCH 3/7] [x86] Match IEEE min/max with UNSPEC_IEEE_{MIN,MAX} liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 8:23 ` [PATCH 5/7] Adjust testcase for the regressed testcases after obsolete of vcond{,u,eq} liuhongt
` (3 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
gcc/ChangeLog:
PR target/115517
* config/i386/sse.md
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt_avx512): New
define_insn_and_split.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt_avx512):
Ditto.
(*<sse2_avx2>_pmovmskb_lt_avx512): Ditto.
(*<sse2_avx2>_pmovmskb_zext_lt_avx512): Ditto.
(*sse2_pmovmskb_ext_lt_avx512): Ditto.
(*pmovsk_kmask_v16qi_avx512): Ditto.
(*pmovsk_mask_v32qi_avx512): Ditto.
(*pmovsk_mask_cmp_<mode>_avx512): Ditto.
(*pmovsk_ptest_<mode>_avx512): Ditto.
---
gcc/config/i386/sse.md | 232 +++++++++++++++++++++++++++++++++++++----
1 file changed, 209 insertions(+), 23 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92f8b74999f..5996ad99606 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10049,24 +10049,6 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
- (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 2 "const0_operand")
- (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand")
- (match_operand:<avx512fmaskmode> 1 "register_operand")))]
- "TARGET_AVX512BW && ix86_pre_reload_split ()"
- "#"
- "&& 1"
- [(set (match_dup 4)
- (not:<avx512fmaskmode> (match_dup 1)))
- (set (match_dup 0)
- (vec_merge:VI12_AVX512VL
- (match_dup 3)
- (match_dup 2)
- (match_dup 4)))]
- "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
-
(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand")
(vec_merge:VI48_AVX512VL
@@ -10106,10 +10088,10 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand")
- (vec_merge:VI48_AVX512VL
- (match_operand:VI48_AVX512VL 2 "const0_operand")
- (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand")
+ [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
+ (vec_merge:VI1248_AVX512VLBW
+ (match_operand:VI1248_AVX512VLBW 2 "const0_operand")
+ (match_operand:VI1248_AVX512VLBW 3 "vector_all_ones_operand")
(match_operand:<avx512fmaskmode> 1 "register_operand")))]
"TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
@@ -10117,7 +10099,7 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
[(set (match_dup 4)
(not:<avx512fmaskmode> (match_dup 1)))
(set (match_dup 0)
- (vec_merge:VI48_AVX512VL
+ (vec_merge:VI1248_AVX512VLBW
(match_dup 3)
(match_dup 2)
(match_dup 4)))]
@@ -21753,6 +21735,30 @@ (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt_avx512"
+ [(set (match_operand:SI 0 "register_operand" "=r,jr")
+ (unspec:SI
+ [(subreg:VF_128_256
+ (vec_merge:<sseintvecmode>
+ (match_operand:<sseintvecmode> 3 "vector_all_ones_operand")
+ (match_operand:<sseintvecmode> 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
+ (match_operand:<sseintvecmode> 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP)) 0)]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+ "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r,jr")
(any_extend:DI
@@ -21772,6 +21778,31 @@ (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt_avx512"
+ [(set (match_operand:DI 0 "register_operand" "=r,jr")
+ (any_extend:DI
+ (unspec:SI
+ [(subreg:VF_128_256
+ (vec_merge:<sseintvecmode>
+ (match_operand:<sseintvecmode> 3 "vector_all_ones_operand")
+ (match_operand:<sseintvecmode> 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
+ (match_operand:<sseintvecmode> 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP)) 0)]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
[(set (match_operand:SI 0 "register_operand" "=r,jr")
(unspec:SI
@@ -21961,6 +21992,34 @@ (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt_avx512"
+ [(set (match_operand:SI 0 "register_operand" "=r,jr")
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 1 "register_operand" "x,x")
+ (match_operand:VI1_AVX2 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+ ""
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
[(set (match_operand:DI 0 "register_operand" "=r,jr")
(zero_extend:DI
@@ -21984,6 +22043,35 @@ (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt_avx512"
+ [(set (match_operand:DI 0 "register_operand" "=r,jr")
+ (zero_extend:DI
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 1 "register_operand" "x,x")
+ (match_operand:VI1_AVX2 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE2"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ ""
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*sse2_pmovmskb_ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r,jr")
(sign_extend:DI
@@ -22007,6 +22095,63 @@ (define_insn_and_split "*sse2_pmovmskb_ext_lt"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*sse2_pmovmskb_ext_lt_avx512"
+ [(set (match_operand:DI 0 "register_operand" "=r,jr")
+ (sign_extend:DI
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 1 "register_operand" "x,x")
+ (match_operand:VI1_AVX2 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE2"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ ""
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*pmovsk_kmask_v16qi_avx512"
+ [(set (match_operand:SI 0 "register_operand")
+ (unspec:SI
+ [(vec_merge:V16QI
+ (match_operand:V16QI 2 "vector_all_ones_operand")
+ (match_operand:V16QI 3 "const0_operand")
+ (match_operand:HI 1 "register_operand"))]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:SI (match_dup 1)))])
+
+(define_insn_and_split "*pmovsk_mask_v32qi_avx512"
+ [(set (match_operand:SI 0 "register_operand")
+ (unspec:SI
+ [(vec_merge:V32QI
+ (match_operand:V32QI 2 "vector_all_ones_operand")
+ (match_operand:V32QI 3 "const0_operand")
+ (match_operand:SI 1 "register_operand"))]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (match_dup 1))])
+
;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.
(define_mode_attr vi1avx2const
[(V32QI "0xffffffff") (V16QI "0xffff")])
@@ -22025,6 +22170,47 @@ (define_split
(match_dup 0)]
UNSPEC_PTEST))])
+(define_insn_and_split "*pmovsk_mask_cmp_<mode>_avx512"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 0 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 3 "const0_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand"))]
+ UNSPEC_MOVMSK)
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_AVX512VL && UINTVAL (operands[2]) <= <vi1avx2const>"
+ "#"
+ "&& 1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (match_dup 1)
+ (match_dup 2)))]
+ "operands[2] = gen_int_mode (UINTVAL (operands[2]), <avx512fmaskmode>mode);")
+
+(define_insn_and_split "*pmovsk_ptest_<mode>_avx512"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 0 "vector_operand")
+ (match_operand:VI1_AVX2 1 "const0_operand")
+ (const_int 0)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK)
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_AVX512VL && (INTVAL (operands[2]) == (int) (<vi1avx2const>))"
+ "#"
+ "&& 1"
+ [(set (reg:CCZ FLAGS_REG)
+ (unspec:CCZ [(match_dup 0)
+ (match_dup 0)]
+ UNSPEC_PTEST))])
+
(define_expand "sse2_maskmovdqu"
[(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 5/7] Adjust testcase for the regressed testcases after obsolete of vcond{,u,eq}.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
` (3 preceding siblings ...)
2024-06-27 8:23 ` [PATCH 4/7] Add more splitter for mskmov with avx512 comparison liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 10:17 ` [PATCH 5/7] Adjust testcase for the regressed testcases after obsolete of vcond{, u, eq} Richard Biener
2024-06-27 8:23 ` [PATCH 6/7] [x86] Optimize a < 0 ? -1 : 0 to (signed)a >> 31 liuhongt
` (2 subsequent siblings)
7 siblings, 1 reply; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
> Richard suggests that we implement the "obvious" transforms like
> inversion in the middle-end but if for example unsigned compares
> are not supported the us_minus + eq + negative trick isn't on
> that list.
>
> The main reason to restrict vec_cmp would be to avoid
> a <= b ? c : d going with an unsupported vec_cmp but instead
> do a > b ? d : c - the alternative is trying to fix this
> on the RTL side via combine. I understand the non-native
Yes, I have a patch which can fix most regressions via pattern match
in combine.
Still there is a situation that is difficult to deal with, mainly the
optimization w/o sse4.1 . Because pblendvb/blendvps/blendvpd only
exists under sse4.1, w/o sse4.1, it takes 3
instructions (pand,pandn,por) to simulate the vcond_mask, and the
combine matches up to 4 instructions, which makes it currently
impossible to use the combine to recover those optimizations in the
vcond{,u,eq}.i.e min/max.
In the case of sse 4.1 and above, there is basically no regression anymore.
the regression testcases w/o sse4.1
FAIL: g++.target/i386/pr100637-1b.C -std=gnu++14 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr100637-1b.C -std=gnu++17 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr100637-1b.C -std=gnu++20 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr100637-1b.C -std=gnu++98 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr100637-1w.C -std=gnu++14 scan-assembler-times pcmpeqw 2
FAIL: g++.target/i386/pr100637-1w.C -std=gnu++17 scan-assembler-times pcmpeqw 2
FAIL: g++.target/i386/pr100637-1w.C -std=gnu++20 scan-assembler-times pcmpeqw 2
FAIL: g++.target/i386/pr100637-1w.C -std=gnu++98 scan-assembler-times pcmpeqw 2
FAIL: g++.target/i386/pr103861-1.C -std=gnu++14 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr103861-1.C -std=gnu++17 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr103861-1.C -std=gnu++20 scan-assembler-times pcmpeqb 2
FAIL: g++.target/i386/pr103861-1.C -std=gnu++98 scan-assembler-times pcmpeqb 2
FAIL: gcc.target/i386/pr88540.c scan-assembler minpd
gcc/testsuite/ChangeLog:
PR target/115517
* g++.target/i386/pr100637-1b.C: Add xfail and -mno-sse4.1.
* g++.target/i386/pr100637-1w.C: Ditto.
* g++.target/i386/pr103861-1.C: Ditto.
* gcc.target/i386/pr88540.c: Ditto.
* gcc.target/i386/pr103941-2.c: Add -mno-avx512f.
* g++.target/i386/sse4_1-pr100637-1b.C: New test.
* g++.target/i386/sse4_1-pr100637-1w.C: New test.
* g++.target/i386/sse4_1-pr103861-1.C: New test.
* gcc.target/i386/sse4_1-pr88540.c: New test.
---
gcc/testsuite/g++.target/i386/pr100637-1b.C | 4 ++--
gcc/testsuite/g++.target/i386/pr100637-1w.C | 4 ++--
gcc/testsuite/g++.target/i386/pr103861-1.C | 4 ++--
.../g++.target/i386/sse4_1-pr100637-1b.C | 17 +++++++++++++++++
.../g++.target/i386/sse4_1-pr100637-1w.C | 17 +++++++++++++++++
.../g++.target/i386/sse4_1-pr103861-1.C | 17 +++++++++++++++++
gcc/testsuite/gcc.target/i386/pr103941-2.c | 2 +-
gcc/testsuite/gcc.target/i386/pr88540.c | 4 ++--
gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c | 10 ++++++++++
9 files changed, 70 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
diff --git a/gcc/testsuite/g++.target/i386/pr100637-1b.C b/gcc/testsuite/g++.target/i386/pr100637-1b.C
index 35b5df7c9dd..dccb8f5e712 100644
--- a/gcc/testsuite/g++.target/i386/pr100637-1b.C
+++ b/gcc/testsuite/g++.target/i386/pr100637-1b.C
@@ -1,6 +1,6 @@
/* PR target/100637 */
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
typedef char __attribute__((__vector_size__ (4))) __v4qi;
@@ -13,5 +13,5 @@ __v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; }
__v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; }
__v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; }
-/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
diff --git a/gcc/testsuite/g++.target/i386/pr100637-1w.C b/gcc/testsuite/g++.target/i386/pr100637-1w.C
index a3ed06fddee..a0aab62db33 100644
--- a/gcc/testsuite/g++.target/i386/pr100637-1w.C
+++ b/gcc/testsuite/g++.target/i386/pr100637-1w.C
@@ -1,6 +1,6 @@
/* PR target/100637 */
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
typedef short __attribute__((__vector_size__ (4))) __v2hi;
@@ -13,5 +13,5 @@ __v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; }
__v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; }
__v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; }
-/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpeqw" 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
diff --git a/gcc/testsuite/g++.target/i386/pr103861-1.C b/gcc/testsuite/g++.target/i386/pr103861-1.C
index 6475728991e..3b282a7dca2 100644
--- a/gcc/testsuite/g++.target/i386/pr103861-1.C
+++ b/gcc/testsuite/g++.target/i386/pr103861-1.C
@@ -1,6 +1,6 @@
/* PR target/103861 */
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
typedef unsigned char __attribute__((__vector_size__ (2))) __v2qu;
typedef char __attribute__((__vector_size__ (2))) __v2qi;
@@ -13,5 +13,5 @@ __v2qu us (__v2qi a, __v2qi b) { return (a > b) ? au : bu; }
__v2qi su (__v2qu a, __v2qu b) { return (a > b) ? as : bs; }
__v2qi ss (__v2qi a, __v2qi b) { return (a > b) ? as : bs; }
-/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
new file mode 100644
index 00000000000..3230a4ee563
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
@@ -0,0 +1,17 @@
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
+typedef char __attribute__((__vector_size__ (4))) __v4qi;
+
+__v4qu au, bu;
+__v4qi as, bs;
+
+__v4qu uu (__v4qu a, __v4qu b) { return (a > b) ? au : bu; }
+__v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; }
+__v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; }
+__v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
new file mode 100644
index 00000000000..9036ea5429d
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
@@ -0,0 +1,17 @@
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
+typedef short __attribute__((__vector_size__ (4))) __v2hi;
+
+__v2hu au, bu;
+__v2hi as, bs;
+
+__v2hu uu (__v2hu a, __v2hu b) { return (a > b) ? au : bu; }
+__v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; }
+__v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; }
+__v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C b/gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
new file mode 100644
index 00000000000..a2b74898db9
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
@@ -0,0 +1,17 @@
+/* PR target/103861 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+typedef unsigned char __attribute__((__vector_size__ (2))) __v2qu;
+typedef char __attribute__((__vector_size__ (2))) __v2qi;
+
+__v2qu au, bu;
+__v2qi as, bs;
+
+__v2qu uu (__v2qu a, __v2qu b) { return (a > b) ? au : bu; }
+__v2qu us (__v2qi a, __v2qi b) { return (a > b) ? au : bu; }
+__v2qi su (__v2qu a, __v2qu b) { return (a > b) ? as : bs; }
+__v2qi ss (__v2qi a, __v2qi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103941-2.c b/gcc/testsuite/gcc.target/i386/pr103941-2.c
index 972a32be997..9b24a10c63d 100644
--- a/gcc/testsuite/gcc.target/i386/pr103941-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr103941-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-avx512f" } */
void foo (int *c, float *x, float *y)
{
diff --git a/gcc/testsuite/gcc.target/i386/pr88540.c b/gcc/testsuite/gcc.target/i386/pr88540.c
index b927d0c57d5..a22744763b5 100644
--- a/gcc/testsuite/gcc.target/i386/pr88540.c
+++ b/gcc/testsuite/gcc.target/i386/pr88540.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
{
@@ -7,4 +7,4 @@ void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
}
-/* { dg-final { scan-assembler "minpd" } } */
+/* { dg-final { scan-assembler "minpd" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
new file mode 100644
index 00000000000..31565a69db5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
+{
+ for (int n = 0; n < 2; ++n)
+ d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
+}
+
+/* { dg-final { scan-assembler "minpd" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 5/7] Adjust testcase for the regressed testcases after obsolete of vcond{, u, eq}.
2024-06-27 8:23 ` [PATCH 5/7] Adjust testcase for the regressed testcases after obsolete of vcond{,u,eq} liuhongt
@ 2024-06-27 10:17 ` Richard Biener
0 siblings, 0 replies; 11+ messages in thread
From: Richard Biener @ 2024-06-27 10:17 UTC (permalink / raw)
To: liuhongt, Robert Sandiford; +Cc: gcc-patches, crazylht, hjl.tools
On Thu, Jun 27, 2024 at 10:30 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> > Richard suggests that we implement the "obvious" transforms like
> > inversion in the middle-end but if for example unsigned compares
> > are not supported the us_minus + eq + negative trick isn't on
> > that list.
> >
> > The main reason to restrict vec_cmp would be to avoid
> > a <= b ? c : d going with an unsupported vec_cmp but instead
> > do a > b ? d : c - the alternative is trying to fix this
> > on the RTL side via combine. I understand the non-native
>
> Yes, I have a patch which can fix most regressions via pattern match
> in combine.
> Still there is a situation that is difficult to deal with, mainly the
> optimization w/o sse4.1 . Because pblendvb/blendvps/blendvpd only
> exists under sse4.1, w/o sse4.1, it takes 3
> instructions (pand,pandn,por) to simulate the vcond_mask, and the
> combine matches up to 4 instructions, which makes it currently
> impossible to use the combine to recover those optimizations in the
> vcond{,u,eq}.i.e min/max.
>
> In the case of sse 4.1 and above, there is basically no regression anymore.
So for SSE 4.1 you could mark the compares as not available - I'll
note this might
have fallout since not all middle-end code using expand_vec_cmp_expr_p handles
emulating it via inversion. So an alternative to outright disabling
might be for
ISEL to resort to an alternate way to see that swapping cond arms is a
good idea,
for example by having different rtx/insn_cost? I'm not sure using rtx
costing is
good here, one could think of an additional target hook indicating a compare op
is emulated?
OTOH dealing with the fallout of disabling not supported compare ops
would be good
anyway.
> the regression testcases w/o sse4.1
>
> FAIL: g++.target/i386/pr100637-1b.C -std=gnu++14 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr100637-1b.C -std=gnu++17 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr100637-1b.C -std=gnu++20 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr100637-1b.C -std=gnu++98 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr100637-1w.C -std=gnu++14 scan-assembler-times pcmpeqw 2
> FAIL: g++.target/i386/pr100637-1w.C -std=gnu++17 scan-assembler-times pcmpeqw 2
> FAIL: g++.target/i386/pr100637-1w.C -std=gnu++20 scan-assembler-times pcmpeqw 2
> FAIL: g++.target/i386/pr100637-1w.C -std=gnu++98 scan-assembler-times pcmpeqw 2
> FAIL: g++.target/i386/pr103861-1.C -std=gnu++14 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr103861-1.C -std=gnu++17 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr103861-1.C -std=gnu++20 scan-assembler-times pcmpeqb 2
> FAIL: g++.target/i386/pr103861-1.C -std=gnu++98 scan-assembler-times pcmpeqb 2
> FAIL: gcc.target/i386/pr88540.c scan-assembler minpd
>
> gcc/testsuite/ChangeLog:
>
> PR target/115517
> * g++.target/i386/pr100637-1b.C: Add xfail and -mno-sse4.1.
> * g++.target/i386/pr100637-1w.C: Ditto.
> * g++.target/i386/pr103861-1.C: Ditto.
> * gcc.target/i386/pr88540.c: Ditto.
> * gcc.target/i386/pr103941-2.c: Add -mno-avx512f.
> * g++.target/i386/sse4_1-pr100637-1b.C: New test.
> * g++.target/i386/sse4_1-pr100637-1w.C: New test.
> * g++.target/i386/sse4_1-pr103861-1.C: New test.
> * gcc.target/i386/sse4_1-pr88540.c: New test.
> ---
> gcc/testsuite/g++.target/i386/pr100637-1b.C | 4 ++--
> gcc/testsuite/g++.target/i386/pr100637-1w.C | 4 ++--
> gcc/testsuite/g++.target/i386/pr103861-1.C | 4 ++--
> .../g++.target/i386/sse4_1-pr100637-1b.C | 17 +++++++++++++++++
> .../g++.target/i386/sse4_1-pr100637-1w.C | 17 +++++++++++++++++
> .../g++.target/i386/sse4_1-pr103861-1.C | 17 +++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr103941-2.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr88540.c | 4 ++--
> gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c | 10 ++++++++++
> 9 files changed, 70 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
> create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
> create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
> create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
>
> diff --git a/gcc/testsuite/g++.target/i386/pr100637-1b.C b/gcc/testsuite/g++.target/i386/pr100637-1b.C
> index 35b5df7c9dd..dccb8f5e712 100644
> --- a/gcc/testsuite/g++.target/i386/pr100637-1b.C
> +++ b/gcc/testsuite/g++.target/i386/pr100637-1b.C
> @@ -1,6 +1,6 @@
> /* PR target/100637 */
> /* { dg-do compile } */
> -/* { dg-options "-O2 -msse2" } */
> +/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
>
> typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
> typedef char __attribute__((__vector_size__ (4))) __v4qi;
> @@ -13,5 +13,5 @@ __v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; }
> __v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; }
> __v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; }
>
> -/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
> +/* { dg-final { scan-assembler-times "pcmpeqb" 2 { xfail *-*-* } } } */
> /* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr100637-1w.C b/gcc/testsuite/g++.target/i386/pr100637-1w.C
> index a3ed06fddee..a0aab62db33 100644
> --- a/gcc/testsuite/g++.target/i386/pr100637-1w.C
> +++ b/gcc/testsuite/g++.target/i386/pr100637-1w.C
> @@ -1,6 +1,6 @@
> /* PR target/100637 */
> /* { dg-do compile } */
> -/* { dg-options "-O2 -msse2" } */
> +/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
>
> typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
> typedef short __attribute__((__vector_size__ (4))) __v2hi;
> @@ -13,5 +13,5 @@ __v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; }
> __v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; }
> __v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; }
>
> -/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
> +/* { dg-final { scan-assembler-times "pcmpeqw" 2 { xfail *-*-* } } } */
> /* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr103861-1.C b/gcc/testsuite/g++.target/i386/pr103861-1.C
> index 6475728991e..3b282a7dca2 100644
> --- a/gcc/testsuite/g++.target/i386/pr103861-1.C
> +++ b/gcc/testsuite/g++.target/i386/pr103861-1.C
> @@ -1,6 +1,6 @@
> /* PR target/103861 */
> /* { dg-do compile } */
> -/* { dg-options "-O2 -msse2" } */
> +/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
>
> typedef unsigned char __attribute__((__vector_size__ (2))) __v2qu;
> typedef char __attribute__((__vector_size__ (2))) __v2qi;
> @@ -13,5 +13,5 @@ __v2qu us (__v2qi a, __v2qi b) { return (a > b) ? au : bu; }
> __v2qi su (__v2qu a, __v2qu b) { return (a > b) ? as : bs; }
> __v2qi ss (__v2qi a, __v2qi b) { return (a > b) ? as : bs; }
>
> -/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
> +/* { dg-final { scan-assembler-times "pcmpeqb" 2 { xfail *-*-* } } } */
> /* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
> diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
> new file mode 100644
> index 00000000000..3230a4ee563
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
> @@ -0,0 +1,17 @@
> +/* PR target/100637 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
> +typedef char __attribute__((__vector_size__ (4))) __v4qi;
> +
> +__v4qu au, bu;
> +__v4qi as, bs;
> +
> +__v4qu uu (__v4qu a, __v4qu b) { return (a > b) ? au : bu; }
> +__v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; }
> +__v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; }
> +__v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; }
> +
> +/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
> +/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
> diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
> new file mode 100644
> index 00000000000..9036ea5429d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
> @@ -0,0 +1,17 @@
> +/* PR target/100637 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
> +typedef short __attribute__((__vector_size__ (4))) __v2hi;
> +
> +__v2hu au, bu;
> +__v2hi as, bs;
> +
> +__v2hu uu (__v2hu a, __v2hu b) { return (a > b) ? au : bu; }
> +__v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; }
> +__v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; }
> +__v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; }
> +
> +/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
> +/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
> diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C b/gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
> new file mode 100644
> index 00000000000..a2b74898db9
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
> @@ -0,0 +1,17 @@
> +/* PR target/103861 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +typedef unsigned char __attribute__((__vector_size__ (2))) __v2qu;
> +typedef char __attribute__((__vector_size__ (2))) __v2qi;
> +
> +__v2qu au, bu;
> +__v2qi as, bs;
> +
> +__v2qu uu (__v2qu a, __v2qu b) { return (a > b) ? au : bu; }
> +__v2qu us (__v2qi a, __v2qi b) { return (a > b) ? au : bu; }
> +__v2qi su (__v2qu a, __v2qu b) { return (a > b) ? as : bs; }
> +__v2qi ss (__v2qi a, __v2qi b) { return (a > b) ? as : bs; }
> +
> +/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
> +/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr103941-2.c b/gcc/testsuite/gcc.target/i386/pr103941-2.c
> index 972a32be997..9b24a10c63d 100644
> --- a/gcc/testsuite/gcc.target/i386/pr103941-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pr103941-2.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-O2 -msse2" } */
> +/* { dg-options "-O2 -msse2 -mno-avx512f" } */
>
> void foo (int *c, float *x, float *y)
> {
> diff --git a/gcc/testsuite/gcc.target/i386/pr88540.c b/gcc/testsuite/gcc.target/i386/pr88540.c
> index b927d0c57d5..a22744763b5 100644
> --- a/gcc/testsuite/gcc.target/i386/pr88540.c
> +++ b/gcc/testsuite/gcc.target/i386/pr88540.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-O2 -msse2" } */
> +/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
>
> void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
> {
> @@ -7,4 +7,4 @@ void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
> d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
> }
>
> -/* { dg-final { scan-assembler "minpd" } } */
> +/* { dg-final { scan-assembler "minpd" { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
> new file mode 100644
> index 00000000000..31565a69db5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
> +{
> + for (int n = 0; n < 2; ++n)
> + d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
> +}
> +
> +/* { dg-final { scan-assembler "minpd" } } */
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 6/7] [x86] Optimize a < 0 ? -1 : 0 to (signed)a >> 31.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
` (4 preceding siblings ...)
2024-06-27 8:23 ` [PATCH 5/7] Adjust testcase for the regressed testcases after obsolete of vcond{,u,eq} liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 8:23 ` [PATCH 7/7] Remove vcond{,u,eq}<mode> expanders since they will be obsolete liuhongt
2024-06-27 9:59 ` [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders Richard Biener
7 siblings, 0 replies; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
and x < 0 ? 1 : 0 into (unsigned) x >> 31.
Add define_insn_and_split for the optimization did in
ix86_expand_int_vcond.
gcc/ChangeLog:
PR target/115517
* config/i386/sse.md ("*ashr<mode>3_1"): New
define_insn_and_split.
(*avx512_ashr<mode>3_1): Ditto.
(*avx2_lshr<mode>3_1): Ditto.
(*avx2_lshr<mode>3_2): Ditto and add 2 combine splitter after
it.
* config/i386/mmx.md (mmxscalarsize): New mode attribute.
(*mmw_ashr<mode>3_1): New define_insn_and_split.
("mmx_<insn><mode>3): Add a combine spiltter after it.
(*mmx_ashrv2hi3_1): New define_insn_and_plit, also add a
combine splitter after it.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx2-pr115517.c: New test.
* gcc.target/i386/avx512-pr115517.c: New test.
* g++.target/i386/avx2-pr115517.C: New test.
* g++.target/i386/avx512-pr115517.C: New test.
* gcc.target/i386/pr111023-2.c: Adjust testcase.
* gcc.target/i386/vect-div-1.c: Ditto.
---
gcc/config/i386/mmx.md | 52 ++++++++++++
gcc/config/i386/sse.md | 83 +++++++++++++++++++
gcc/testsuite/g++.target/i386/avx2-pr115517.C | 60 ++++++++++++++
.../g++.target/i386/avx512-pr115517.C | 70 ++++++++++++++++
gcc/testsuite/gcc.target/i386/avx2-pr115517.c | 33 ++++++++
.../gcc.target/i386/avx512-pr115517.c | 70 ++++++++++++++++
gcc/testsuite/gcc.target/i386/pr111023-2.c | 4 +-
gcc/testsuite/gcc.target/i386/vect-div-1.c | 3 +-
8 files changed, 372 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr115517.C
create mode 100644 gcc/testsuite/g++.target/i386/avx512-pr115517.C
create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr115517.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512-pr115517.c
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ea53f516cbb..7262bf146c2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -135,6 +135,14 @@ (define_mode_attr mmxscalarmodelower
(V4HI "hi") (V2HI "hi")
(V8QI "qi")])
+(define_mode_attr mmxscalarsize
+ [(V1DI "64")
+ (V2SI "32") (V2SF "32")
+ (V4HF "16") (V4BF "16")
+ (V2HF "16") (V2BF "16")
+ (V4HI "16") (V2HI "16")
+ (V8QI "8")])
+
(define_mode_attr Yv_Yw
[(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
@@ -3608,6 +3616,17 @@ (define_insn "mmx_ashr<mode>3"
(const_string "0")))
(set_attr "mode" "DI,TI,TI")])
+(define_insn_and_split "*mmx_ashr<mode>3_1"
+ [(set (match_operand:MMXMODE24 0 "register_operand")
+ (lt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:MMXMODE24 2 "const0_operand")))]
+ "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (ashiftrt:MMXMODE24 (match_dup 1) (match_dup 3)))]
+ "operands[3] = gen_int_mode (<mmxscalarsize> - 1, DImode);")
+
(define_expand "ashr<mode>3"
[(set (match_operand:MMXMODE24 0 "register_operand")
(ashiftrt:MMXMODE24
@@ -3634,6 +3653,17 @@ (define_insn "mmx_<insn><mode>3"
(const_string "0")))
(set_attr "mode" "DI,TI,TI")])
+(define_split
+ [(set (match_operand:MMXMODE248 0 "register_operand")
+ (and:MMXMODE248
+ (lt:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:MMXMODE248 2 "const0_operand"))
+ (match_operand:MMXMODE248 3 "const1_operand")))]
+ "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+ [(set (match_dup 0) (lshiftrt:MMXMODE248 (match_dup 1) (match_dup 4)))]
+ "operands[4] = gen_int_mode (<mmxscalarsize> - 1, DImode);")
+
(define_expand "<insn><mode>3"
[(set (match_operand:MMXMODE24 0 "register_operand")
(any_lshift:MMXMODE24
@@ -3675,6 +3705,28 @@ (define_insn "<insn>v2hi3"
(const_string "0")))
(set_attr "mode" "TI")])
+(define_insn_and_split "*mmx_ashrv2hi3_1"
+ [(set (match_operand:V2HI 0 "register_operand")
+ (lt:V2HI
+ (match_operand:V2HI 1 "register_operand")
+ (match_operand:V2HI 2 "const0_operand")))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (ashiftrt:V2HI (match_dup 1) (match_dup 3)))]
+ "operands[3] = gen_int_mode (15, DImode);")
+
+(define_split
+ [(set (match_operand:V2HI 0 "register_operand")
+ (and:V2HI
+ (lt:V2HI
+ (match_operand:V2HI 1 "register_operand")
+ (match_operand:V2HI 2 "const0_operand"))
+ (match_operand:V2HI 3 "const1_operand")))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ [(set (match_dup 0) (lshiftrt:V2HI (match_dup 1) (match_dup 4)))]
+ "operands[4] = gen_int_mode (15, DImode);")
+
(define_expand "<insn>v8qi3"
[(set (match_operand:V8QI 0 "register_operand")
(any_shift:V8QI (match_operand:V8QI 1 "register_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5996ad99606..d86b6fa81c0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16860,6 +16860,17 @@ (define_insn "ashr<mode>3"
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*ashr<mode>3_1"
+ [(set (match_operand:VI24_AVX2 0 "register_operand")
+ (lt:VI24_AVX2
+ (match_operand:VI24_AVX2 1 "register_operand")
+ (match_operand:VI24_AVX2 2 "const0_operand")))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (ashiftrt:VI24_AVX2 (match_dup 1) (match_dup 3)))]
+ "operands[3] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
[(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
(ashiftrt:VI248_AVX512BW_AVX512VL
@@ -16874,6 +16885,23 @@ (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
(const_string "0")))
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*avx512_ashr<mode>3_1"
+ [(set (match_operand:VI248_AVX512VLBW 0 "register_operand")
+ (vec_merge:VI248_AVX512VLBW
+ (match_operand:VI248_AVX512VLBW 1 "vector_all_ones_operand")
+ (match_operand:VI248_AVX512VLBW 2 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand")
+ (match_operand:VI248_AVX512VLBW 4 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (ashiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))]
+ "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
(define_expand "ashr<mode>3"
[(set (match_operand:VI248_AVX512BW 0 "register_operand")
(ashiftrt:VI248_AVX512BW
@@ -17028,6 +17056,61 @@ (define_insn "<insn><mode>3"
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*avx2_lshr<mode>3_1"
+ [(set (match_operand:VI8_AVX2 0 "register_operand")
+ (and:VI8_AVX2
+ (gt:VI8_AVX2
+ (match_operand:VI8_AVX2 1 "register_operand")
+ (match_operand:VI8_AVX2 2 "register_operand"))
+ (match_operand:VI8_AVX2 3 "const1_operand")))]
+ "TARGET_SSE4_2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 5) (gt:VI8_AVX2 (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 5) (match_dup 4)))]
+{
+ operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);
+ operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+(define_insn_and_split "*avx2_lshr<mode>3_2"
+ [(set (match_operand:VI8_AVX2 0 "register_operand")
+ (and:VI8_AVX2
+ (lt:VI8_AVX2
+ (match_operand:VI8_AVX2 1 "register_operand")
+ (match_operand:VI8_AVX2 2 "const0_operand"))
+ (match_operand:VI8_AVX2 3 "const1_operand")))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 1) (const_int 63)))])
+
+(define_split
+ [(set (match_operand:VI248_AVX2 0 "register_operand")
+ (and:VI248_AVX2
+ (lt:VI248_AVX2
+ (match_operand:VI248_AVX2 1 "register_operand")
+ (match_operand:VI248_AVX2 2 "const0_operand"))
+ (match_operand:VI248_AVX2 3 "const1_operand")))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ [(set (match_dup 0) (lshiftrt:VI248_AVX2 (match_dup 1) (match_dup 4)))]
+ "operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
+(define_split
+ [(set (match_operand:VI248_AVX512VLBW 0 "register_operand")
+ (vec_merge:VI248_AVX512VLBW
+ (match_operand:VI248_AVX512VLBW 1 "const1_operand")
+ (match_operand:VI248_AVX512VLBW 2 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand")
+ (match_operand:VI248_AVX512VLBW 4 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
+ [(set (match_dup 0)
+ (lshiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))]
+ "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
(define_insn "<insn><mode>3<mask_name>"
[(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
(any_lshift:VI248_AVX512BW
diff --git a/gcc/testsuite/g++.target/i386/avx2-pr115517.C b/gcc/testsuite/g++.target/i386/avx2-pr115517.C
new file mode 100644
index 00000000000..ec000c57542
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx2-pr115517.C
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpsrlq" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrld" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrlw" 2 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v8hi
+foo (v8hi a)
+{
+ v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1};
+ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
+ return a < const0_op ? const1_op : const0_op;
+}
+
+v16hi
+foo2 (v16hi a)
+{
+ v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ return a < const0_op ? const1_op : const0_op;
+}
+
+v4si
+foo3 (v4si a)
+{
+ v4si const1_op = __extension__(v4si){1,1,1,1};
+ v4si const0_op = __extension__(v4si){0,0,0,0};
+ return a < const0_op ? const1_op : const0_op;
+}
+
+v8si
+foo4 (v8si a)
+{
+ v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1};
+ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
+ return a < const0_op ? const1_op : const0_op;
+}
+
+v2di
+foo3 (v2di a)
+{
+ v2di const1_op = __extension__(v2di){1,1};
+ v2di const0_op = __extension__(v2di){0,0};
+ return a < const0_op ? const1_op : const0_op;
+}
+
+v4di
+foo4 (v4di a)
+{
+ v4di const1_op = __extension__(v4di){1,1,1,1};
+ v4di const0_op = __extension__(v4di){0,0,0,0};
+ return a < const0_op ? const1_op : const0_op;
+}
diff --git a/gcc/testsuite/g++.target/i386/avx512-pr115517.C b/gcc/testsuite/g++.target/i386/avx512-pr115517.C
new file mode 100644
index 00000000000..22df41bbdc9
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512-pr115517.C
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrad" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraw" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq" 3 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v8hi
+foo (v8hi a)
+{
+ return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16hi
+foo2 (v16hi a)
+{
+ return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v32hi
+foo3 (v32hi a)
+{
+ return a < __extension__(v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v4si
+foo4 (v4si a)
+{
+ return a < __extension__(v4si) { 0, 0, 0, 0};
+}
+
+v8si
+foo5 (v8si a)
+{
+ return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16si
+foo6 (v16si a)
+{
+ return a < __extension__(v16si) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v2di
+foo7 (v2di a)
+{
+ return a < __extension__(v2di) { 0, 0};
+}
+
+v4di
+foo8 (v4di a)
+{
+ return a < __extension__(v4di) { 0, 0, 0, 0};
+}
+
+v8di
+foo9 (v8di a)
+{
+ return a < __extension__(v8di) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr115517.c b/gcc/testsuite/gcc.target/i386/avx2-pr115517.c
new file mode 100644
index 00000000000..5b2620b0dc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr115517.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpsrad" 2 } } */
+/* { dg-final { scan-assembler-times "vpsraw" 2 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+
+v8hi
+foo (v8hi a)
+{
+ return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16hi
+foo2 (v16hi a)
+{
+ return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v4si
+foo3 (v4si a)
+{
+ return a < __extension__(v4si) { 0, 0, 0, 0};
+}
+
+v8si
+foo4 (v8si a)
+{
+ return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-pr115517.c b/gcc/testsuite/gcc.target/i386/avx512-pr115517.c
new file mode 100644
index 00000000000..22df41bbdc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512-pr115517.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrad" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraw" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq" 3 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v8hi
+foo (v8hi a)
+{
+ return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16hi
+foo2 (v16hi a)
+{
+ return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v32hi
+foo3 (v32hi a)
+{
+ return a < __extension__(v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v4si
+foo4 (v4si a)
+{
+ return a < __extension__(v4si) { 0, 0, 0, 0};
+}
+
+v8si
+foo5 (v8si a)
+{
+ return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16si
+foo6 (v16si a)
+{
+ return a < __extension__(v16si) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v2di
+foo7 (v2di a)
+{
+ return a < __extension__(v2di) { 0, 0};
+}
+
+v4di
+foo8 (v4di a)
+{
+ return a < __extension__(v4di) { 0, 0, 0, 0};
+}
+
+v8di
+foo9 (v8di a)
+{
+ return a < __extension__(v8di) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr111023-2.c b/gcc/testsuite/gcc.target/i386/pr111023-2.c
index 6c69f947544..ba52959b357 100644
--- a/gcc/testsuite/gcc.target/i386/pr111023-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr111023-2.c
@@ -36,7 +36,7 @@ v4si_v4hi (v4si *dst, v8hi src)
dst[0] = *(v4si *) tem;
}
-/* { dg-final { scan-assembler "pcmpgtw" } } */
+/* { dg-final { scan-assembler "(?:pcmpgtw|psraw)" } } */
/* { dg-final { scan-assembler "punpcklwd" } } */
void
@@ -48,5 +48,5 @@ v2di_v2si (v2di *dst, v4si src)
dst[0] = *(v2di *) tem;
}
-/* { dg-final { scan-assembler "pcmpgtd" } } */
+/* { dg-final { scan-assembler "(?:pcmpgtd|psrad)" } } */
/* { dg-final { scan-assembler "punpckldq" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-div-1.c b/gcc/testsuite/gcc.target/i386/vect-div-1.c
index f611088d8df..6d911290e06 100644
--- a/gcc/testsuite/gcc.target/i386/vect-div-1.c
+++ b/gcc/testsuite/gcc.target/i386/vect-div-1.c
@@ -40,4 +40,5 @@ f4 (int x)
is always non-negative, so there is no need to do >> 31 shift
etc. to check if it is. And in f3 and f4, VRP can prove it is always
negative. */
-/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */
+/* Now (lt:v4si op1 const0_operand) is optimized to psrad, there're 20 of them. */
+/* { dg-final { scan-assembler-times "psrad\[^\n\r\]*\\\$31" 20 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 7/7] Remove vcond{,u,eq}<mode> expanders since they will be obsolete.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
` (5 preceding siblings ...)
2024-06-27 8:23 ` [PATCH 6/7] [x86] Optimize a < 0 ? -1 : 0 to (signed)a >> 31 liuhongt
@ 2024-06-27 8:23 ` liuhongt
2024-06-27 9:59 ` [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders Richard Biener
7 siblings, 0 replies; 11+ messages in thread
From: liuhongt @ 2024-06-27 8:23 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools
gcc/ChangeLog:
PR target/115517
* config/i386/mmx.md (vcond<mode>v2sf): Removed.
(vcond<MMXMODE124:mode><MMXMODEI:mode>): Ditto.
(vcond<mode><mode>): Ditto.
(vcondu<MMXMODE124:mode><MMXMODEI:mode>): Ditto.
(vcondu<mode><mode>): Ditto.
* config/i386/sse.md (vcond<V_512:mode><VF_512:mode>): Ditto.
(vcond<V_256:mode><VF_256:mode>): Ditto.
(vcond<V_128:mode><VF_128:mode>): Ditto.
(vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): Ditto.
(vcond<V_512:mode><VI_AVX512BW:mode>): Ditto.
(vcond<V_256:mode><VI_256:mode>): Ditto.
(vcond<V_128:mode><VI124_128:mode>): Ditto.
(vcond<VI8F_128:mode>v2di): Ditto.
(vcondu<V_512:mode><VI_AVX512BW:mode>): Ditto.
(vcondu<V_256:mode><VI_256:mode>): Ditto.
(vcondu<V_128:mode><VI124_128:mode>): Ditto.
(vcondu<VI8F_128:mode>v2di): Ditto.
(vcondeq<VI8F_128:mode>v2di): Ditto.
---
gcc/config/i386/mmx.md | 97 -------------------
gcc/config/i386/sse.md | 213 -----------------------------------------
2 files changed, 310 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7262bf146c2..17c5205cae2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1168,39 +1168,6 @@ (define_expand "vec_cmpv2sfv2si"
DONE;
})
-(define_expand "vcond<mode>v2sf"
- [(set (match_operand:V2FI 0 "register_operand")
- (if_then_else:V2FI
- (match_operator 3 ""
- [(match_operand:V2SF 4 "nonimmediate_operand")
- (match_operand:V2SF 5 "nonimmediate_operand")])
- (match_operand:V2FI 1 "general_operand")
- (match_operand:V2FI 2 "general_operand")))]
- "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
-{
- rtx ops[6];
- ops[5] = gen_reg_rtx (V4SFmode);
- ops[4] = gen_reg_rtx (V4SFmode);
- ops[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), VOIDmode, ops[4], ops[5]);
- ops[2] = lowpart_subreg (<mmxdoublevecmode>mode,
- force_reg (<MODE>mode, operands[2]),
- <MODE>mode);
- ops[1] = lowpart_subreg (<mmxdoublevecmode>mode,
- force_reg (<MODE>mode, operands[1]),
- <MODE>mode);
- ops[0] = gen_reg_rtx (<mmxdoublevecmode>mode);
-
- emit_insn (gen_movq_v2sf_to_sse (ops[5], operands[5]));
- emit_insn (gen_movq_v2sf_to_sse (ops[4], operands[4]));
-
- bool ok = ix86_expand_fp_vcond (ops);
- gcc_assert (ok);
-
- emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
- <mmxdoublevecmode>mode));
- DONE;
-})
-
(define_insn "@sse4_1_insertps_<mode>"
[(set (match_operand:V2FI 0 "register_operand" "=Yr,*x,v")
(unspec:V2FI
@@ -4029,70 +3996,6 @@ (define_expand "vec_cmpu<mode><mode>"
DONE;
})
-(define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
- [(set (match_operand:MMXMODE124 0 "register_operand")
- (if_then_else:MMXMODE124
- (match_operator 3 ""
- [(match_operand:MMXMODEI 4 "register_operand")
- (match_operand:MMXMODEI 5 "register_operand")])
- (match_operand:MMXMODE124 1)
- (match_operand:MMXMODE124 2)))]
- "TARGET_MMX_WITH_SSE
- && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
- == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<mode><mode>"
- [(set (match_operand:VI_16_32 0 "register_operand")
- (if_then_else:VI_16_32
- (match_operator 3 ""
- [(match_operand:VI_16_32 4 "register_operand")
- (match_operand:VI_16_32 5 "register_operand")])
- (match_operand:VI_16_32 1)
- (match_operand:VI_16_32 2)))]
- "TARGET_SSE2"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
- [(set (match_operand:MMXMODE124 0 "register_operand")
- (if_then_else:MMXMODE124
- (match_operator 3 ""
- [(match_operand:MMXMODEI 4 "register_operand")
- (match_operand:MMXMODEI 5 "register_operand")])
- (match_operand:MMXMODE124 1)
- (match_operand:MMXMODE124 2)))]
- "TARGET_MMX_WITH_SSE
- && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
- == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondu<mode><mode>"
- [(set (match_operand:VI_16_32 0 "register_operand")
- (if_then_else:VI_16_32
- (match_operator 3 ""
- [(match_operand:VI_16_32 4 "register_operand")
- (match_operand:VI_16_32 5 "register_operand")])
- (match_operand:VI_16_32 1)
- (match_operand:VI_16_32 2)))]
- "TARGET_SSE2"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
(define_expand "vcond_mask_<mode><mmxintvecmodelower>"
[(set (match_operand:MMXMODE124 0 "register_operand")
(vec_merge:MMXMODE124
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d86b6fa81c0..2d6b39c920f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4816,72 +4816,6 @@ (define_expand "vec_cmpeqv1tiv1ti"
DONE;
})
-(define_expand "vcond<V_512:mode><VF_512:mode>"
- [(set (match_operand:V_512 0 "register_operand")
- (if_then_else:V_512
- (match_operator 3 ""
- [(match_operand:VF_512 4 "nonimmediate_operand")
- (match_operand:VF_512 5 "nonimmediate_operand")])
- (match_operand:V_512 1 "general_operand")
- (match_operand:V_512 2 "general_operand")))]
- "TARGET_AVX512F
- && (GET_MODE_NUNITS (<V_512:MODE>mode)
- == GET_MODE_NUNITS (<VF_512:MODE>mode))"
-{
- bool ok = ix86_expand_fp_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<V_256:mode><VF_256:mode>"
- [(set (match_operand:V_256 0 "register_operand")
- (if_then_else:V_256
- (match_operator 3 ""
- [(match_operand:VF_256 4 "nonimmediate_operand")
- (match_operand:VF_256 5 "nonimmediate_operand")])
- (match_operand:V_256 1 "general_operand")
- (match_operand:V_256 2 "general_operand")))]
- "TARGET_AVX
- && (GET_MODE_NUNITS (<V_256:MODE>mode)
- == GET_MODE_NUNITS (<VF_256:MODE>mode))"
-{
- bool ok = ix86_expand_fp_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<V_128:mode><VF_128:mode>"
- [(set (match_operand:V_128 0 "register_operand")
- (if_then_else:V_128
- (match_operator 3 ""
- [(match_operand:VF_128 4 "vector_operand")
- (match_operand:VF_128 5 "vector_operand")])
- (match_operand:V_128 1 "general_operand")
- (match_operand:V_128 2 "general_operand")))]
- "TARGET_SSE
- && (GET_MODE_NUNITS (<V_128:MODE>mode)
- == GET_MODE_NUNITS (<VF_128:MODE>mode))"
-{
- bool ok = ix86_expand_fp_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
- [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
- (if_then_else:VI2HFBF_AVX512VL
- (match_operator 3 ""
- [(match_operand:VHF_AVX512VL 4 "vector_operand")
- (match_operand:VHF_AVX512VL 5 "vector_operand")])
- (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
- (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
- "TARGET_AVX512FP16"
-{
- bool ok = ix86_expand_fp_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
[(set (match_operand:V48_AVX512VL 0 "register_operand")
(vec_merge:V48_AVX512VL
@@ -18017,153 +17951,6 @@ (define_insn "*sse2_gt<mode>3"
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
- [(set (match_operand:V_512 0 "register_operand")
- (if_then_else:V_512
- (match_operator 3 ""
- [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
- (match_operand:VI_AVX512BW 5 "general_operand")])
- (match_operand:V_512 1)
- (match_operand:V_512 2)))]
- "TARGET_AVX512F
- && (GET_MODE_NUNITS (<V_512:MODE>mode)
- == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<V_256:mode><VI_256:mode>"
- [(set (match_operand:V_256 0 "register_operand")
- (if_then_else:V_256
- (match_operator 3 ""
- [(match_operand:VI_256 4 "nonimmediate_operand")
- (match_operand:VI_256 5 "general_operand")])
- (match_operand:V_256 1)
- (match_operand:V_256 2)))]
- "TARGET_AVX2
- && (GET_MODE_NUNITS (<V_256:MODE>mode)
- == GET_MODE_NUNITS (<VI_256:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<V_128:mode><VI124_128:mode>"
- [(set (match_operand:V_128 0 "register_operand")
- (if_then_else:V_128
- (match_operator 3 ""
- [(match_operand:VI124_128 4 "vector_operand")
- (match_operand:VI124_128 5 "general_operand")])
- (match_operand:V_128 1)
- (match_operand:V_128 2)))]
- "TARGET_SSE2
- && (GET_MODE_NUNITS (<V_128:MODE>mode)
- == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcond<VI8F_128:mode>v2di"
- [(set (match_operand:VI8F_128 0 "register_operand")
- (if_then_else:VI8F_128
- (match_operator 3 ""
- [(match_operand:V2DI 4 "vector_operand")
- (match_operand:V2DI 5 "general_operand")])
- (match_operand:VI8F_128 1)
- (match_operand:VI8F_128 2)))]
- "TARGET_SSE4_2"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
- [(set (match_operand:V_512 0 "register_operand")
- (if_then_else:V_512
- (match_operator 3 ""
- [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
- (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
- (match_operand:V_512 1 "general_operand")
- (match_operand:V_512 2 "general_operand")))]
- "TARGET_AVX512F
- && (GET_MODE_NUNITS (<V_512:MODE>mode)
- == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondu<V_256:mode><VI_256:mode>"
- [(set (match_operand:V_256 0 "register_operand")
- (if_then_else:V_256
- (match_operator 3 ""
- [(match_operand:VI_256 4 "nonimmediate_operand")
- (match_operand:VI_256 5 "nonimmediate_operand")])
- (match_operand:V_256 1 "general_operand")
- (match_operand:V_256 2 "general_operand")))]
- "TARGET_AVX2
- && (GET_MODE_NUNITS (<V_256:MODE>mode)
- == GET_MODE_NUNITS (<VI_256:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondu<V_128:mode><VI124_128:mode>"
- [(set (match_operand:V_128 0 "register_operand")
- (if_then_else:V_128
- (match_operator 3 ""
- [(match_operand:VI124_128 4 "vector_operand")
- (match_operand:VI124_128 5 "vector_operand")])
- (match_operand:V_128 1 "general_operand")
- (match_operand:V_128 2 "general_operand")))]
- "TARGET_SSE2
- && (GET_MODE_NUNITS (<V_128:MODE>mode)
- == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondu<VI8F_128:mode>v2di"
- [(set (match_operand:VI8F_128 0 "register_operand")
- (if_then_else:VI8F_128
- (match_operator 3 ""
- [(match_operand:V2DI 4 "vector_operand")
- (match_operand:V2DI 5 "vector_operand")])
- (match_operand:VI8F_128 1 "general_operand")
- (match_operand:VI8F_128 2 "general_operand")))]
- "TARGET_SSE4_2"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
-(define_expand "vcondeq<VI8F_128:mode>v2di"
- [(set (match_operand:VI8F_128 0 "register_operand")
- (if_then_else:VI8F_128
- (match_operator 3 ""
- [(match_operand:V2DI 4 "vector_operand")
- (match_operand:V2DI 5 "general_operand")])
- (match_operand:VI8F_128 1)
- (match_operand:VI8F_128 2)))]
- "TARGET_SSE4_1"
-{
- bool ok = ix86_expand_int_vcond (operands);
- gcc_assert (ok);
- DONE;
-})
-
(define_mode_iterator VEC_PERM_AVX2
[V16QI V8HI V4SI V2DI V4SF V2DF
(V8HF "TARGET_AVX512FP16")
--
2.31.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders.
2024-06-27 8:23 [PATCH 0/7][x86] Remove vcond{,u,eq}<mode> expanders liuhongt
` (6 preceding siblings ...)
2024-06-27 8:23 ` [PATCH 7/7] Remove vcond{,u,eq}<mode> expanders since they will be obsolete liuhongt
@ 2024-06-27 9:59 ` Richard Biener
7 siblings, 0 replies; 11+ messages in thread
From: Richard Biener @ 2024-06-27 9:59 UTC (permalink / raw)
To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools
On Thu, Jun 27, 2024 at 10:27 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> There're several regressions after obsolete vcond{,u,eq}<mode>,
> Some regressions are due to the direct optimizations in
> ix86_expand_{fp,int}_vcond..i.e ix86_expand_sse_fp_minmax.
> Some regrssions are due to optimizations relies on canonicalization
> in ix86_expand_{fp,int}_vcond.
>
> This series add define_split or define_insn_and_split to restore
> those optimizations at pass_combine. It fixed most regressions in GCC
> testsuite except for ones compiled w/o sse4.1. W/o sse4.1 it takes 3
> instrution for vector condition move, and pass_combine only supports
> at most 4 instructions combination. One possible solution is add fake
> "ssemovcc" instructions to help combine, and split that back to real
> instruction. This series doesn't handle that, but just adjust testcases
> to XFAIL.
>
> I also test performance on SPEC2017 with different options set.
> -march=sapphirerapids -O2
> -march=x86-64-v3 -O2
> -march=x86-64 -O2
> -march=sapphirerapids -O2
> Didn't observe obvious performance change, mostly same binaries.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Any comments?
Thanks for working on this. Can you open a bugreport for the cases
you XFAILed so we can see if the middle-end can be of help here?
Thanks,
Richard.
> liuhongt (7):
> [x86] Add more splitters to match (unspec [op1 op2 (gt op3
> constm1_operand)] UNSPEC_BLENDV)
> Lower AVX512 kmask comparison back to AVX2 comparison when
> op_{true,false} is vector -1/0.
> [x86] Match IEEE min/max with UNSPEC_IEEE_{MIN,MAX}.
> Add more splitter for mskmov with avx512 comparison.
> Adjust testcase for the regressed testcases after obsolete of
> vcond{,u,eq}.
> [x86] Optimize a < 0 ? -1 : 0 to (signed)a >> 31.
> Remove vcond{,u,eq}<mode> expanders since they will be obsolete.
>
> gcc/config/i386/mmx.md | 149 ++--
> gcc/config/i386/sse.md | 772 +++++++++++++-----
> gcc/testsuite/g++.target/i386/avx2-pr115517.C | 60 ++
> .../g++.target/i386/avx512-pr115517.C | 70 ++
> gcc/testsuite/g++.target/i386/pr100637-1b.C | 4 +-
> gcc/testsuite/g++.target/i386/pr100637-1w.C | 4 +-
> gcc/testsuite/g++.target/i386/pr103861-1.C | 4 +-
> .../g++.target/i386/sse4_1-pr100637-1b.C | 17 +
> .../g++.target/i386/sse4_1-pr100637-1w.C | 17 +
> .../g++.target/i386/sse4_1-pr103861-1.C | 17 +
> gcc/testsuite/gcc.target/i386/avx2-pr115517.c | 33 +
> .../gcc.target/i386/avx512-pr115517.c | 70 ++
> gcc/testsuite/gcc.target/i386/pr103941-2.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr111023-2.c | 4 +-
> gcc/testsuite/gcc.target/i386/pr88540.c | 4 +-
> .../gcc.target/i386/sse4_1-pr88540.c | 10 +
> gcc/testsuite/gcc.target/i386/vect-div-1.c | 3 +-
> 17 files changed, 918 insertions(+), 322 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr115517.C
> create mode 100644 gcc/testsuite/g++.target/i386/avx512-pr115517.C
> create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100637-1b.C
> create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100637-1w.C
> create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr103861-1.C
> create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr115517.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-pr115517.c
> create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-pr88540.c
>
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 11+ messages in thread