From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 2A27D3858405; Wed, 25 Aug 2021 06:22:53 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2A27D3858405 From: "crazylht at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/77287] Much worse code generated compared to clang (stack alignment and spills) Date: Wed, 25 Aug 2021 06:22:52 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 6.1.0 X-Bugzilla-Keywords: missed-optimization, ra X-Bugzilla-Severity: normal X-Bugzilla-Who: crazylht at gmail dot com X-Bugzilla-Status: NEW X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 25 Aug 2021 06:22:53 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D77287 --- Comment #13 from Hongtao.liu --- ;; Function fn (fn, funcdef_no=3D5484, decl_uid=3D32317, cgraph_uid=3D5485, symbol_order=3D5484) int fn (const int * px, const int * py, const int * pz, const int * pw, con= st int * pa, const int * pb, const int * pc, const int * pd) { vector(16) short unsigned int _3; vector(16) short unsigned int _5; vector(16) short int _7; vector(16) short int _9; vector(32) char _12; vector(32) unsigned char _14; vector(16) short unsigned int _16; vector(16) short unsigned int _17; vector(16) short int _18; vector(16) short int _19; vector(32) char _20; vector(32) unsigned char _21; vector(16) short unsigned int _22; vector(16) short unsigned int _23; vector(16) short int _24; vector(16) short int _25; vector(32) char _26; vector(32) unsigned char _27; vector(16) short unsigned int _28; vector(16) short unsigned int _29; vector(16) short int _30; vector(16) short int _31; int _32; vector(4) int _33; vector(8) int _34; vector(32) unsigned char _35; vector(32) char _36; vector(16) short unsigned int _37; vector(16) short unsigned int _38; vector(16) short unsigned int _39; vector(16) short unsigned int _40; vector(16) short unsigned int _41; vector(16) short unsigned int _42; vector(16) short unsigned int _43; vector(16) short unsigned int _44; vector(16) short unsigned int _45; vector(16) short unsigned int _46; vector(16) short unsigned int _47; vector(16) short unsigned int _48; vector(16) short unsigned int _50; vector(16) short unsigned int _51; vector(16) short unsigned int _53; vector(16) short unsigned int _54; vector(16) short unsigned int _56; vector(16) short unsigned int _57; vector(16) short unsigned int _59; vector(16) short unsigned int _60; vector(16) short int _62; vector(16) short int _63; vector(16) short unsigned int _64; vector(16) short unsigned int _65; vector(32) unsigned char _66; vector(32) char _67; vector(16) short int _68; vector(16) short int _69; vector(16) short unsigned int _70; vector(16) short unsigned int _71; vector(32) unsigned char _72; vector(32) char _73; vector(16) short int _74; vector(16) short int _75; vector(16) short unsigned int _76; vector(16) short unsigned int _77; vector(32) unsigned char _78; vector(32) char _79; vector(16) short int _80; vector(16) short int _81; vector(16) short unsigned int _82; vector(16) short unsigned int _83; vector(32) unsigned char _84; vector(32) char _85; vector(16) short int _86; vector(16) short int _87; vector(16) short unsigned int _88; vector(16) short unsigned int _89; vector(32) unsigned char _90; vector(32) char _91; vector(4) long long int _92; vector(4) long long int _93; vector(4) long long int _94; vector(4) long long int _95; vector(4) long long int _96; vector(4) long long int _97; vector(4) long long int _98; vector(4) long long int _99; vector(4) long long int _100; vector(4) long long int _101; vector(16) short unsigned int _107; vector(16) short unsigned int _108; vector(16) short unsigned int _109; vector(16) short unsigned int _110; vector(16) short unsigned int _111; [local count: 1073741824]: _101 =3D MEM[(const __m256i_u * {ref-all})px_2(D)]; _100 =3D MEM[(const __m256i_u * {ref-all})py_4(D)]; _99 =3D MEM[(const __m256i_u * {ref-all})pz_6(D)]; _98 =3D MEM[(const __m256i_u * {ref-all})pw_8(D)]; _97 =3D MEM[(const __m256i_u * {ref-all})pa_10(D)]; _96 =3D MEM[(const __m256i_u * {ref-all})pb_11(D)]; _95 =3D MEM[(const __m256i_u * {ref-all})pc_13(D)]; _94 =3D MEM[(const __m256i_u * {ref-all})pd_15(D)]; _93 =3D MEM[(const __m256i_u * {ref-all})pc_13(D) + 32B]; _92 =3D MEM[(const __m256i_u * {ref-all})pd_15(D) + 32B]; _86 =3D VIEW_CONVERT_EXPR(_96); _87 =3D VIEW_CONVERT_EXPR(_101); _88 =3D (vector(16) short unsigned int) _87; _89 =3D (vector(16) short unsigned int) _86; _90 =3D VEC_PACK_SAT_EXPR <_88, _89>; _91 =3D (vector(32) char) _90; _80 =3D VIEW_CONVERT_EXPR(_95); _81 =3D VIEW_CONVERT_EXPR(_100); _82 =3D (vector(16) short unsigned int) _81; _83 =3D (vector(16) short unsigned int) _80; _84 =3D VEC_PACK_SAT_EXPR <_82, _83>; _85 =3D (vector(32) char) _84; _74 =3D VIEW_CONVERT_EXPR(_94); _75 =3D VIEW_CONVERT_EXPR(_99); _76 =3D (vector(16) short unsigned int) _75; _77 =3D (vector(16) short unsigned int) _74; _78 =3D VEC_PACK_SAT_EXPR <_76, _77>; _79 =3D (vector(32) char) _78; _68 =3D VIEW_CONVERT_EXPR(_93); _69 =3D VIEW_CONVERT_EXPR(_98); _70 =3D (vector(16) short unsigned int) _69; _71 =3D (vector(16) short unsigned int) _68; _72 =3D VEC_PACK_SAT_EXPR <_70, _71>; _73 =3D (vector(32) char) _72; _62 =3D VIEW_CONVERT_EXPR(_92); _63 =3D VIEW_CONVERT_EXPR(_97); _64 =3D (vector(16) short unsigned int) _63; _65 =3D (vector(16) short unsigned int) _62; _66 =3D VEC_PACK_SAT_EXPR <_64, _65>; _67 =3D (vector(32) char) _66; _59 =3D VIEW_CONVERT_EXPR(_91); _60 =3D VIEW_CONVERT_EXPR(_101); _56 =3D VIEW_CONVERT_EXPR(_85); _57 =3D VIEW_CONVERT_EXPR(_100); _53 =3D VIEW_CONVERT_EXPR(_79); _54 =3D VIEW_CONVERT_EXPR(_99); _50 =3D VIEW_CONVERT_EXPR(_73); _51 =3D VIEW_CONVERT_EXPR(_98); _47 =3D VIEW_CONVERT_EXPR(_67); _48 =3D VIEW_CONVERT_EXPR(_97); _45 =3D VIEW_CONVERT_EXPR(_96); _111 =3D _60 - _45; _46 =3D _59 + _111; _43 =3D VIEW_CONVERT_EXPR(_95); _110 =3D _57 - _43; _44 =3D _56 + _110; _41 =3D VIEW_CONVERT_EXPR(_94); _109 =3D _54 - _41; _42 =3D _53 + _109; _39 =3D VIEW_CONVERT_EXPR(_93); _108 =3D _51 - _39; _40 =3D _50 + _108; _37 =3D VIEW_CONVERT_EXPR(_92); _107 =3D _48 - _37; _38 =3D _47 + _107; _9 =3D VIEW_CONVERT_EXPR(_44); _7 =3D VIEW_CONVERT_EXPR(_46); _5 =3D (vector(16) short unsigned int) _7; _3 =3D (vector(16) short unsigned int) _9; _35 =3D VEC_PACK_SAT_EXPR <_5, _3>; _36 =3D (vector(32) char) _35; _19 =3D VIEW_CONVERT_EXPR(_42); _18 =3D VIEW_CONVERT_EXPR(_36); _17 =3D (vector(16) short unsigned int) _18; _16 =3D (vector(16) short unsigned int) _19; _14 =3D VEC_PACK_SAT_EXPR <_17, _16>; _12 =3D (vector(32) char) _14; _25 =3D VIEW_CONVERT_EXPR(_40); _24 =3D VIEW_CONVERT_EXPR(_12); _23 =3D (vector(16) short unsigned int) _24; _22 =3D (vector(16) short unsigned int) _25; _21 =3D VEC_PACK_SAT_EXPR <_23, _22>; _20 =3D (vector(32) char) _21; _31 =3D VIEW_CONVERT_EXPR(_38); _30 =3D VIEW_CONVERT_EXPR(_20); _29 =3D (vector(16) short unsigned int) _30; _28 =3D (vector(16) short unsigned int) _31; _27 =3D VEC_PACK_SAT_EXPR <_29, _28>; _26 =3D (vector(32) char) _27; _34 =3D VIEW_CONVERT_EXPR(_26); _33 =3D __builtin_ia32_vextractf128_si256 (_34, 0); _32 =3D __builtin_ia32_vec_ext_v4si (_33, 1); [tail call] return _32; } After folding _mm256_packus_epi16, gimple still doesn't simplify it. I guess gcc only functionally supports vec_pack_sat_expr, but does not opti= mize it=