public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "crazylht at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/103771] [12 Regression] Missed vectorization under -mavx512f -mavx512vl after r12-5489
Date: Thu, 13 Jan 2022 10:43:00 +0000 [thread overview]
Message-ID: <bug-103771-4-ZRrN5m0ThP@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-103771-4@http.gcc.gnu.org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103771
--- Comment #11 from Hongtao.liu <crazylht at gmail dot com> ---
(In reply to Hongtao.liu from comment #10)
> with
> @@ -12120,7 +12120,8 @@ supportable_narrowing_operation (enum tree_code code,
> c1 = VEC_PACK_TRUNC_EXPR;
> if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
> && VECTOR_BOOLEAN_TYPE_P (vectype)
> - && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
> + && (TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
> + || known_lt (TYPE_VECTOR_SUBPARTS (vectype), BITS_PER_UNIT))
> && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
> optab1 = vec_pack_sbool_trunc_optab;
> else
> @@ -12213,6 +12214,7 @@ supportable_narrowing_operation (enum tree_code code,
> if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
> && VECTOR_BOOLEAN_TYPE_P (prev_type)
> && intermediate_mode == prev_mode
> + && known_lt (TYPE_VECTOR_SUBPARTS (intermediate_type), BITS_PER_UNIT)
> && SCALAR_INT_MODE_P (prev_mode))
> interm_optab = vec_pack_sbool_trunc_optab;
> else
>
> -march=icelake-server -O3 -mprefer-vector-width=128 now can get vectorized
> loop.
>
>
> vmovdqu8 (%rsi,%rax), %xmm0
> vpmovzxbw %xmm0, %xmm2
> vpmovzxwd %xmm2, %xmm1
> vpsrldq $8, %xmm0, %xmm0
> vpsrldq $8, %xmm2, %xmm2
> vpmovzxbw %xmm0, %xmm0
> vpmovzxwd %xmm2, %xmm2
> vpmulld %xmm9, %xmm1, %xmm1
> vpmulld %xmm9, %xmm2, %xmm2
> vpmovzxwd %xmm0, %xmm4
> vpsrldq $8, %xmm0, %xmm0
> vpmovzxwd %xmm0, %xmm0
> vpmulld %xmm9, %xmm4, %xmm4
> vpmulld %xmm9, %xmm0, %xmm0
> vpcmpud $6, %xmm6, %xmm1, %k0
> vpsubd %xmm1, %xmm7, %xmm3
> vpcmpud $6, %xmm6, %xmm2, %k1
> vpsubd %xmm2, %xmm7, %xmm5
> vpsrad $31, %xmm5, %xmm5
> vpsrad $31, %xmm3, %xmm3
> vpermt2w %xmm5, %xmm8, %xmm3
> vpsubd %xmm0, %xmm7, %xmm10
> vpsubd %xmm4, %xmm7, %xmm5
> kshiftlb $4, %k1, %k1
> vpcmpud $6, %xmm6, %xmm0, %k2
> vpsrad $31, %xmm5, %xmm5
> vpsrad $31, %xmm10, %xmm10
> kandb %k3, %k0, %k0
> korb %k1, %k0, %k0
> vpcmpud $6, %xmm6, %xmm4, %k1
> vpermt2w %xmm10, %xmm8, %xmm5
> vpermt2w %xmm2, %xmm8, %xmm1
> vpermt2w %xmm0, %xmm8, %xmm4
> vpermt2b %xmm5, %xmm11, %xmm3
> vpermt2b %xmm4, %xmm11, %xmm1
> kandb %k3, %k1, %k1
> kshiftlb $4, %k2, %k2
> korb %k2, %k1, %k1
> kunpckbw %k0, %k1, %k1
> vmovdqu8 %xmm3, %xmm1{%k1}
> vmovdqu8 %xmm1, (%rdi,%rax)
> addq $16, %rax
> cmpq %rax, %r8
> jne .L4
But still not as good as before, since original version we only need to pack
data which is produced by vec_cond_expr, but now need to extraly pack mask.
before
# x_24 = PHI <x_16(9), 0(21)>
# vectp_src.11_73 = PHI <vectp_src.11_74(9), src_11(D)(21)>
# vectp_dst.23_112 = PHI <vectp_dst.23_113(9), dst_13(D)(21)>
# ivtmp_115 = PHI <ivtmp_116(9), 0(21)>
# DEBUG x => NULL
# DEBUG BEGIN_STMT
_1 = (sizetype) x_24;
_2 = src_11(D) + _1;
vect__3.13_75 = MEM <vector(16) unsigned char> [(uint8_t *)vectp_src.11_73];
_3 = *_2;
vect__4.15_76 = [vec_unpack_lo_expr] vect__3.13_75;
vect__4.15_77 = [vec_unpack_hi_expr] vect__3.13_75;
vect__4.14_78 = [vec_unpack_lo_expr] vect__4.15_76;
vect__4.14_79 = [vec_unpack_hi_expr] vect__4.15_76;
vect__4.14_80 = [vec_unpack_lo_expr] vect__4.15_77;
vect__4.14_81 = [vec_unpack_hi_expr] vect__4.15_77;
_4 = (int) _3;
vect__5.16_83 = vect__4.14_78 * vect_cst__82;
vect__5.16_84 = vect__4.14_79 * vect_cst__82;
vect__5.16_85 = vect__4.14_80 * vect_cst__82;
vect__5.16_86 = vect__4.14_81 * vect_cst__82;
_5 = _4 * i_scale_12(D);
_6 = dst_13(D) + _1;
# DEBUG x => NULL
# DEBUG INLINE_ENTRY x264_clip_uint8
# DEBUG BEGIN_STMT
vect__14.17_88 = vect__5.16_83 & vect_cst__87;
vect__14.17_89 = vect__5.16_84 & vect_cst__87;
vect__14.17_90 = vect__5.16_85 & vect_cst__87;
vect__14.17_91 = vect__5.16_86 & vect_cst__87;
_14 = _5 & -256;
vect__17.18_92 = -vect__5.16_83;
vect__17.18_93 = -vect__5.16_84;
vect__17.18_94 = -vect__5.16_85;
vect__17.18_95 = -vect__5.16_86;
_17 = -_5;
vect__18.19_96 = vect__17.18_92 >> 31;
vect__18.19_97 = vect__17.18_93 >> 31;
vect__18.19_98 = vect__17.18_94 >> 31;
vect__18.19_99 = vect__17.18_95 >> 31;
_18 = _17 >> 31;
iftmp.0_19 = (unsigned char) _18;
iftmp.0_20 = (unsigned char) _5;
_101 = vect__14.17_88 != vect_cst__100;
vect_patt_40.20_102 = VEC_COND_EXPR <_101, vect__18.19_96, vect__5.16_83>;
_103 = vect__14.17_89 != vect_cst__100;
vect_patt_40.20_104 = VEC_COND_EXPR <_103, vect__18.19_97, vect__5.16_84>;
_105 = vect__14.17_90 != vect_cst__100;
vect_patt_40.20_106 = VEC_COND_EXPR <_105, vect__18.19_98, vect__5.16_85>;
_107 = vect__14.17_91 != vect_cst__100;
vect_patt_40.20_108 = VEC_COND_EXPR <_107, vect__18.19_99, vect__5.16_86>;
vect_patt_41.22_109 = VEC_PACK_TRUNC_EXPR <vect_patt_40.20_102,
vect_patt_40.20_104>;
vect_patt_41.22_110 = VEC_PACK_TRUNC_EXPR <vect_patt_40.20_106,
vect_patt_40.20_108>;
vect_patt_41.21_111 = VEC_PACK_TRUNC_EXPR <vect_patt_41.22_109,
vect_patt_41.22_110>;
iftmp.0_21 = _14 != 0 ? iftmp.0_19 : iftmp.0_20;
# DEBUG x => NULL
MEM <vector(16) unsigned char> [(uint8_t *)vectp_dst.23_112] =
vect_patt_41.21_111;
# DEBUG BEGIN_STMT
x_16 = x_24 + 1;
# DEBUG x => x_16
# DEBUG BEGIN_STMT
vectp_src.11_74 = vectp_src.11_73 + 16;
vectp_dst.23_113 = vectp_dst.23_112 + 16;
ivtmp_116 = ivtmp_115 + 1;
after
# x_24 = PHI <x_16(9), 0(21)>
# vectp_src.12_78 = PHI <vectp_src.12_79(9), src_11(D)(21)>
# vectp_dst.30_123 = PHI <vectp_dst.30_124(9), dst_13(D)(21)>
# ivtmp_126 = PHI <ivtmp_127(9), 0(21)>
_1 = (sizetype) x_24;
_2 = src_11(D) + _1;
vect__3.14_80 = MEM <vector(16) unsigned char> [(uint8_t *)vectp_src.12_78];
_3 = *_2;
vect__4.16_81 = [vec_unpack_lo_expr] vect__3.14_80;
vect__4.16_82 = [vec_unpack_hi_expr] vect__3.14_80;
vect__4.15_83 = [vec_unpack_lo_expr] vect__4.16_81;
vect__4.15_84 = [vec_unpack_hi_expr] vect__4.16_81;
vect__4.15_85 = [vec_unpack_lo_expr] vect__4.16_82;
vect__4.15_86 = [vec_unpack_hi_expr] vect__4.16_82;
_4 = (int) _3;
vect__5.17_88 = vect__4.15_83 * vect_cst__87;
vect__5.17_89 = vect__4.15_84 * vect_cst__87;
vect__5.17_90 = vect__4.15_85 * vect_cst__87;
vect__5.17_91 = vect__4.15_86 * vect_cst__87;
_5 = _4 * i_scale_12(D);
_6 = dst_13(D) + _1;
vect_x.18_92 = VIEW_CONVERT_EXPR<vector(4) unsigned int>(vect__5.17_88);
vect_x.18_93 = VIEW_CONVERT_EXPR<vector(4) unsigned int>(vect__5.17_89);
vect_x.18_94 = VIEW_CONVERT_EXPR<vector(4) unsigned int>(vect__5.17_90);
vect_x.18_95 = VIEW_CONVERT_EXPR<vector(4) unsigned int>(vect__5.17_91);
x.1_14 = (unsigned int) _5;
vect__41.19_96 = -vect_x.18_92;
vect__41.19_97 = -vect_x.18_93;
vect__41.19_98 = -vect_x.18_94;
vect__41.19_99 = -vect_x.18_95;
_41 = -x.1_14;
vect__17.20_100 = VIEW_CONVERT_EXPR<vector(4) int>(vect__41.19_96);
vect__17.20_101 = VIEW_CONVERT_EXPR<vector(4) int>(vect__41.19_97);
vect__17.20_102 = VIEW_CONVERT_EXPR<vector(4) int>(vect__41.19_98);
vect__17.20_103 = VIEW_CONVERT_EXPR<vector(4) int>(vect__41.19_99);
_17 = (int) _41;
vect__18.21_104 = vect__17.20_100 >> 31;
vect__18.21_105 = vect__17.20_101 >> 31;
vect__18.21_106 = vect__17.20_102 >> 31;
vect__18.21_107 = vect__17.20_103 >> 31;
_18 = _17 >> 31;
vect_iftmp.23_108 = VEC_PACK_TRUNC_EXPR <vect__18.21_104, vect__18.21_105>;
vect_iftmp.23_109 = VEC_PACK_TRUNC_EXPR <vect__18.21_106, vect__18.21_107>;
vect_iftmp.22_110 = VEC_PACK_TRUNC_EXPR <vect_iftmp.23_108,
vect_iftmp.23_109>;
iftmp.0_19 = (unsigned char) _18;
vect_iftmp.25_111 = VEC_PACK_TRUNC_EXPR <vect__5.17_88, vect__5.17_89>;
vect_iftmp.25_112 = VEC_PACK_TRUNC_EXPR <vect__5.17_90, vect__5.17_91>;
vect_iftmp.24_113 = VEC_PACK_TRUNC_EXPR <vect_iftmp.25_111,
vect_iftmp.25_112>;
iftmp.0_20 = (unsigned char) _5;
mask_patt_40.26_115 = vect_x.18_92 > { 255, 255, 255, 255 };
mask_patt_40.26_116 = vect_x.18_93 > { 255, 255, 255, 255 };
mask_patt_40.26_117 = vect_x.18_94 > { 255, 255, 255, 255 };
mask_patt_40.26_118 = vect_x.18_95 > { 255, 255, 255, 255 };
mask_patt_42.28_119 = VEC_PACK_TRUNC_EXPR <mask_patt_40.26_115,
mask_patt_40.26_116>;
mask_patt_42.28_120 = VEC_PACK_TRUNC_EXPR <mask_patt_40.26_117,
mask_patt_40.26_118>;
mask_patt_42.27_121 = VEC_PACK_TRUNC_EXPR <mask_patt_42.28_119,
mask_patt_42.28_120>;
vect_patt_43.29_122 = VEC_COND_EXPR <mask_patt_42.27_121, vect_iftmp.22_110,
vect_iftmp.24_113>;
iftmp.0_21 = x.1_14 > 255 ? iftmp.0_19 : iftmp.0_20;
MEM <vector(16) unsigned char> [(uint8_t *)vectp_dst.30_123] =
vect_patt_43.29_122;
x_16 = x_24 + 1;
vectp_src.12_79 = vectp_src.12_78 + 16;
vectp_dst.30_124 = vectp_dst.30_123 + 16;
ivtmp_127 = ivtmp_126 + 1;
next prev parent reply other threads:[~2022-01-13 10:43 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-20 8:59 [Bug target/103771] New: " wwwhhhyyy333 at gmail dot com
2021-12-20 11:45 ` [Bug target/103771] [12 Regression] " pinskia at gcc dot gnu.org
2022-01-04 15:31 ` tnfchris at gcc dot gnu.org
2022-01-05 9:00 ` crazylht at gmail dot com
2022-01-13 8:43 ` crazylht at gmail dot com
2022-01-13 9:02 ` crazylht at gmail dot com
2022-01-13 9:04 ` crazylht at gmail dot com
2022-01-13 9:28 ` rguenth at gcc dot gnu.org
2022-01-13 9:50 ` rguenth at gcc dot gnu.org
2022-01-13 10:04 ` crazylht at gmail dot com
2022-01-13 10:11 ` rsandifo at gcc dot gnu.org
2022-01-13 10:22 ` crazylht at gmail dot com
2022-01-13 10:43 ` crazylht at gmail dot com [this message]
2022-01-13 10:44 ` rguenther at suse dot de
2022-01-13 10:49 ` rguenther at suse dot de
2022-01-13 10:50 ` crazylht at gmail dot com
2022-01-13 12:40 ` crazylht at gmail dot com
2022-01-13 13:42 ` rguenther at suse dot de
2022-01-13 14:42 ` crazylht at gmail dot com
2022-01-14 9:40 ` rsandifo at gcc dot gnu.org
2022-01-14 10:18 ` rguenth at gcc dot gnu.org
2022-01-17 8:22 ` crazylht at gmail dot com
2022-01-18 8:31 ` crazylht at gmail dot com
2022-01-18 8:36 ` crazylht at gmail dot com
2022-01-18 10:44 ` rguenther at suse dot de
2022-01-18 10:49 ` pinskia at gcc dot gnu.org
2022-01-19 6:03 ` crazylht at gmail dot com
2022-01-19 6:10 ` pinskia at gcc dot gnu.org
2022-01-19 6:25 ` crazylht at gmail dot com
2022-01-19 7:44 ` rguenth at gcc dot gnu.org
2022-01-19 8:33 ` crazylht at gmail dot com
2022-01-19 8:48 ` rguenther at suse dot de
2022-01-20 8:52 ` cvs-commit at gcc dot gnu.org
2022-01-27 6:03 ` crazylht at gmail dot com
2022-02-01 5:21 ` pinskia at gcc dot gnu.org
2022-02-01 16:13 ` law at gcc dot gnu.org
2022-02-01 16:18 ` pinskia at gcc dot gnu.org
2022-02-13 9:58 ` cvs-commit at gcc dot gnu.org
2022-02-14 7:36 ` rguenth at gcc dot gnu.org
2022-02-14 7:45 ` crazylht at gmail dot com
2022-02-14 7:53 ` rguenth at gcc dot gnu.org
2022-02-17 11:01 ` cvs-commit at gcc dot gnu.org
2023-04-26 6:55 ` [Bug target/103771] [12/13/14 " rguenth at gcc dot gnu.org
2023-05-06 23:21 ` pinskia at gcc dot gnu.org
2023-05-08 7:38 ` cvs-commit at gcc dot gnu.org
2023-05-08 7:39 ` pinskia at gcc dot gnu.org
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bug-103771-4-ZRrN5m0ThP@http.gcc.gnu.org/bugzilla/ \
--to=gcc-bugzilla@gcc.gnu.org \
--cc=gcc-bugs@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).