From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 2E899388B018; Thu, 4 Jun 2020 06:24:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2E899388B018 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1591251890; bh=hpQ3jHfeI/fz4QPpqY6XV/ktmmEYefkMb9vHeu0JWjE=; h=From:To:Subject:Date:From; b=nV6kmWdgHBbdy8gM8kwhC2OF4NrALX8eZd+F6EkHMat9UTNylnAJDEh7hUupGKEeU m52HtHR8wPyWxu5Ni47vxPZ7CY+vdzGg/eDy3oA5O2tAGVINC+7+eWAqRXAgSXWwmi X/vNWqDpRrhbRNOTk3Qpq7u4e4RGy0DW0Twx7rCQ= From: "crazylht at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/95524] New: Subtimal codegen for shift by constant for v16qi/v32qi under -march=skylake Date: Thu, 04 Jun 2020 06:24:50 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 11.0 X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: normal X-Bugzilla-Who: crazylht at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status keywords bug_severity priority component assigned_to reporter target_milestone cf_gcctarget Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 04 Jun 2020 06:24:50 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D95524 Bug ID: 95524 Summary: Subtimal codegen for shift by constant for v16qi/v32qi under -march=3Dskylake Product: gcc Version: 11.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: crazylht at gmail dot com Target Milestone: --- Target: x86_64-*-* i?86-*-* cat test.c --- typedef char v16qi __attribute__ ((vector_size (16))); typedef char v32qi __attribute__ ((vector_size (32))); typedef unsigned char v16uqi __attribute__ ((vector_size (16))); typedef unsigned char v32uqi __attribute__ ((vector_size (32))); v16qi ashift (v16qi a) { return a<<5; } v32qi ashift2 (v32qi a, v32qi b) { return a<<5; } v16qi ashiftrt (v16qi a) { return a>>5; } v32qi arshiftrt2 (v32qi a) { return a>>5; } v16uqi lshiftrt (v16uqi a) { return a>>5; } v32uqi lshiftrt2 (v32uqi a) { return a>>5; } --- gcc11 -O2 -march=3Dskylake --- ashift(char __vector(16)): vpaddb xmm0, xmm0, xmm0 vpaddb xmm0, xmm0, xmm0 vpaddb xmm0, xmm0, xmm0 vpaddb xmm0, xmm0, xmm0 vpaddb xmm0, xmm0, xmm0 ret ashift2(char __vector(32), char __vector(32)): vpaddb ymm0, ymm0, ymm0 vpaddb ymm0, ymm0, ymm0 vpaddb ymm0, ymm0, ymm0 vpaddb ymm0, ymm0, ymm0 vpaddb ymm0, ymm0, ymm0 ret ashiftrt(char __vector(16)): vpmovsxbw xmm2, xmm0 vpsrldq xmm1, xmm0, 8 vpmovsxbw xmm1, xmm1 vpsraw xmm0, xmm2, 5 vmovdqa xmm2, XMMWORD PTR .LC0[rip] vpsraw xmm1, xmm1, 5 vpand xmm0, xmm2, xmm0 vpand xmm2, xmm2, xmm1 vpackuswb xmm0, xmm0, xmm2 ret arshiftrt2(char __vector(32)): vmovdqa ymm1, ymm0 vextracti128 xmm1, ymm1, 0x1 vmovdqa ymm2, YMMWORD PTR .LC1[rip] vpmovsxbw ymm0, xmm0 vpmovsxbw ymm1, xmm1 vpsraw ymm1, ymm1, 5 vpsraw ymm0, ymm0, 5 vpand ymm0, ymm2, ymm0 vpand ymm2, ymm2, ymm1 vpackuswb ymm0, ymm0, ymm2 vpermq ymm0, ymm0, 216 ret lshiftrt(unsigned char __vector(16)): vpmovzxbw xmm2, xmm0 vpsrldq xmm1, xmm0, 8 vpmovzxbw xmm1, xmm1 vpsrlw xmm0, xmm2, 5 vmovdqa xmm2, XMMWORD PTR .LC0[rip] vpsrlw xmm1, xmm1, 5 vpand xmm0, xmm2, xmm0 vpand xmm2, xmm2, xmm1 vpackuswb xmm0, xmm0, xmm2 ret lshiftrt2(unsigned char __vector(32)): vmovdqa ymm1, ymm0 vextracti128 xmm1, ymm1, 0x1 vmovdqa ymm2, YMMWORD PTR .LC1[rip] vpmovzxbw ymm0, xmm0 vpmovzxbw ymm1, xmm1 vpsrlw ymm1, ymm1, 5 vpsrlw ymm0, ymm0, 5 vpand ymm0, ymm2, ymm0 vpand ymm2, ymm2, ymm1 vpackuswb ymm0, ymm0, ymm2 vpermq ymm0, ymm0, 216 ret .LC0: .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .LC1: .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 .value 255 --- icc has --- ashift(char __vector(16)): vpsllw xmm1, xmm0, 5 #9.16 vpand xmm0, xmm1, XMMWORD PTR .L_2il0floatpacket.0[rip] #9.16 ret #9.16 ashift2(char __vector(32), char __vector(32)): vpsllw ymm2, ymm0, 5 #15.16 vpand ymm0, ymm2, YMMWORD PTR .L_2il0floatpacket.1[rip] #15.16 ret #15.16 ashiftrt(char __vector(16)): vpsrlw xmm1, xmm0, 5 #21.16 vpand xmm0, xmm1, XMMWORD PTR .L_2il0floatpacket.2[rip] #21.16 ret #21.16 arshiftrt2(char __vector(32)): vpsrlw ymm1, ymm0, 5 #27.16 vpand ymm0, ymm1, YMMWORD PTR .L_2il0floatpacket.3[rip] #27.16 ret #27.16 lshiftrt(unsigned char __vector(16)): vpsrlw xmm1, xmm0, 5 #33.16 vpand xmm0, xmm1, XMMWORD PTR .L_2il0floatpacket.2[rip] #33.16 ret #33.16 lshiftrt2(unsigned char __vector(32)): vpsrlw ymm1, ymm0, 5 #39.16 vpand ymm0, ymm1, YMMWORD PTR .L_2il0floatpacket.3[rip] #39.16 ret #39.16 .L_2il0floatpacket.1: .long=20=20 0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e= 0,0xe0e0e0e0 .L_2il0floatpacket.3: .long=20=20 0x07070707,0x07070707,0x07070707,0x07070707,0x07070707,0x07070707,0x0707070= 7,0x07070707 .L_2il0floatpacket.0: .long 0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0,0xe0e0e0e0 .L_2il0floatpacket.2: .long 0x07070707,0x07070707,0x07070707,0x07070707 --- icc take much less instructions than gcc.=