From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id B4041386181F; Tue, 10 Nov 2020 03:04:37 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B4041386181F From: "crazylht at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/97770] [ICELAKE]Missing vectorization for vpopcnt Date: Tue, 10 Nov 2020 03:04:37 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 11.0 X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: normal X-Bugzilla-Who: crazylht at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 10 Nov 2020 03:04:37 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D97770 --- Comment #2 from Hongtao.liu --- After adding expander, successfully vectorize the loop. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b153a87fb98..e8159997c40 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -22678,6 +22678,12 @@ (define_insn "avx5124vnniw_vp4dpwssds_maskz" (set_attr ("prefix") ("evex")) (set_attr ("mode") ("TI"))]) +(define_expand "popcount2" + [(set (match_operand:VI48_AVX512VL 0 "register_operand") + (popcount:VI48_AVX512VL + (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))] + "TARGET_AVX512VPOPCNTDQ") + (define_insn "vpopcount" [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=3Dv") (popcount:VI48_AVX512VL @@ -22722,6 +22728,12 @@ (define_insn "*restore_multiple_leave_return" "TARGET_SSE && TARGET_64BIT" "jmp\t%P1") +(define_insn "popcount2" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=3Dv") + (popcount:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512BITALG") + (define_insn "vpopcount" [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=3Dv") (popcount:VI12_AVX512VL --- But for vector byte/word/quadword, vectorizer still use vpopcntd, but not vpopcnt{b,w,q}, missing corresponding ifn? void fooq(long long* __restrict dest, long long* src) { for (int i =3D 0; i !=3D 4; i++) dest[i] =3D __builtin_popcount (src[i]); } void foow(short* __restrict dest, short* src) { for (int i =3D 0; i !=3D 16; i++) dest[i] =3D __builtin_popcount (src[i]); } void foob(char* __restrict dest, char* src) { for (int i =3D 0; i !=3D 32; i++) dest[i] =3D __builtin_popcount (src[i]); } dump of test.c.164.vect ;; Function foow (foow, funcdef_no=3D0, decl_uid=3D4228, cgraph_uid=3D1, symbol_order=3D0) Merging blocks 2 and 6 foow (short int * restrict dest, short int * src) { vector(8) short int * vectp_dest.10; vector(8) short int * vectp_dest.9; vector(8) short int vect__8.8; vector(4) int vect__6.7; vector(4) unsigned int vect__5.6; vector(8) short int vect__4.5; vector(8) short int * vectp_src.4; vector(8) short int * vectp_src.3; int i; long unsigned int _1; long unsigned int _2; short int * _3; short int _4; unsigned int _5; int _6; short int * _7; short int _8; unsigned int ivtmp_26; unsigned int ivtmp_28; unsigned int ivtmp_34; unsigned int ivtmp_35; [local count: 119292720]: [local count: 119292719]: # i_19 =3D PHI # ivtmp_35 =3D PHI # vectp_src.3_24 =3D PHI # vectp_dest.9_9 =3D PHI # ivtmp_26 =3D PHI _1 =3D (long unsigned int) i_19; _2 =3D _1 * 2; _3 =3D src_12(D) + _2; vect__4.5_22 =3D MEM [(short int *)vectp_src.3_24]; _4 =3D *_3; vect__5.6_21 =3D [vec_unpack_lo_expr] vect__4.5_22; vect__5.6_18 =3D [vec_unpack_hi_expr] vect__4.5_22; _5 =3D (unsigned int) _4; vect__6.7_17 =3D .POPCOUNT (vect__5.6_21); vect__6.7_16 =3D .POPCOUNT (vect__5.6_18); _6 =3D 0; _7 =3D dest_13(D) + _2; vect__8.8_10 =3D VEC_PACK_TRUNC_EXPR ; _8 =3D (short int) _6; MEM [(short int *)vectp_dest.9_9] =3D vect__8.8_10; i_15 =3D i_19 + 1; ivtmp_34 =3D ivtmp_35 - 1; vectp_src.3_23 =3D vectp_src.3_24 + 16; vectp_dest.9_29 =3D vectp_dest.9_9 + 16; ivtmp_28 =3D ivtmp_26 + 1; if (ivtmp_28 < 1) goto ; [0.00%] else goto ; [100.00%] [local count: 0]: goto ; [100.00%] [local count: 119292720]: return; }=