From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ua1-x929.google.com (mail-ua1-x929.google.com [IPv6:2607:f8b0:4864:20::929]) by sourceware.org (Postfix) with ESMTPS id 50EDC3857C7D for ; Wed, 19 Jan 2022 01:09:53 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 50EDC3857C7D Received: by mail-ua1-x929.google.com with SMTP id w21so1499341uan.7 for ; Tue, 18 Jan 2022 17:09:53 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=+QHfqZv3K09GZ2qggSwg0rgU86qyrx7deFZ8oBkDu/M=; b=U5bJCXpuFaZiOcAevA7JI5bkzyHI3FBqbkP5Rcu0afoUrGI5vcdl0OKLJKd2J+HSZG iHvTYw8aa4OmVINlSvXk53Jkah6EtqNWhj3WK6CTR0GLRfl8dh42qBg7DxPpApTzGhzT iRLO+kCpnBzxj/npObDPoV6Zd3GWgtqX3glDvN6ddTiOOv1kPJrpx5HaxC6Cy+m60e97 B6OTV6emNzzoFyMGv8lfxG8hoCTVJrJaFDw7lNKhvAbWkVQ3raJfviUqOUkSp4A3DHW2 XkgUspXLkdChjsWGAXDO9sZpySCNp+MvhcnzXFlz1DMd5H4GUaeSXKQBBBSmdLp93weu 14UQ== X-Gm-Message-State: AOAM531O25YwmlgekrNaGsbS+YZanXEYUWiuL+GBnrz7J3Orsa9duLgU INmjhZwTX52wDW0wmyAbmUMMYWvSfxWLpCUvrloSXn0NGMY= X-Google-Smtp-Source: ABdhPJzqLtfJi3tb/9VnpyqKakz7lG/n0h1UaUc1Zxkt45o7KA6QITAOWspGWznKso093yaVxo2lARRIN9tcnsOMVMc= X-Received: by 2002:a67:ea84:: with SMTP id f4mr10551369vso.50.1642554592831; Tue, 18 Jan 2022 17:09:52 -0800 (PST) MIME-Version: 1.0 References: <20220113072839.8405-1-hongyu.wang@intel.com> <20220119000016.GO2646553@tucnak> In-Reply-To: <20220119000016.GO2646553@tucnak> From: Hongtao Liu Date: Wed, 19 Jan 2022 09:09:41 +0800 Message-ID: Subject: Re: [PATCH] i386: Fix GLC tuning with -masm=intel [PR104104] To: Jakub Jelinek Cc: Uros Bizjak , GCC Patches , Hongyu Wang Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-2.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 19 Jan 2022 01:09:55 -0000 On Wed, Jan 19, 2022 at 8:00 AM Jakub Jelinek wrote: > > On Sun, Jan 16, 2022 at 12:22:18PM +0800, Hongtao Liu via Gcc-patches wrote: > > On Sun, Jan 16, 2022 at 12:44 AM Uros Bizjak via Gcc-patches > > wrote: > > > > > > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang wrote: > > > > > > > > Thanks for the suggestion, here is the updated patch that survived > > > > bootstrap/regtest. > > > > > > LGTM for me, but please get the final approval from Hongtao. > > > > > Ok, thanks. > > Unfortunately the patch results in assembler failures with -masm=intel. > > > > > > + if (TARGET_DEST_FALSE_DEPENDENCY > > > > > + && get_attr_dest_false_dep (insn) == > > > > > + DEST_FALSE_DEP_TRUE) > > > > > + output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > > All the vxorps insns were emitted like the above, which means for -masm=sysv > it looks like > vxorps %xmm3, %xmm3, %xmm3 > but for -masm=intel like: > vxorps > We want obviously > vxorps xmm3, xmm3, xmm3 > so the following patch just drops the errorneous {}s. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Yes, thanks. > > 2022-01-19 Jakub Jelinek > > PR target/104104 > * config/i386/sse.md > (__, > avx512fp16_sh_v8hf, > avx512dq_mul3, _permvar, > avx2_perm_1, avx512f_perm_1, > avx512dq_rangep, > avx512dq_ranges, > _getmant, > avx512f_vgetmant): > Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}. > > * gcc.target/i386/pr104104.c: New test. > > --- gcc/config/i386/sse.md.jj 2022-01-18 11:58:59.156988142 +0100 > +++ gcc/config/i386/sse.md 2022-01-18 21:20:40.022477778 +0100 > @@ -6539,7 +6539,7 @@ (define_insn "__< > { > if (TARGET_DEST_FALSE_DEP_FOR_GLC > && ) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "v\t{%2, %1, %0|%0, %1, %2}"; > } > [(set_attr "type" "ssemul") > @@ -6750,7 +6750,7 @@ (define_insn "avx512fp16_ > { > if (TARGET_DEST_FALSE_DEP_FOR_GLC > && ) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vsh\t{%2, %1, %0|%0, %1, %2}"; > } > [(set_attr "type" "ssemul") > @@ -15222,7 +15222,7 @@ (define_insn "avx512dq_mul3 && > && !reg_mentioned_p (operands[0], operands[1]) > && !reg_mentioned_p (operands[0], operands[2])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vpmullq\t{%2, %1, %0|%0, %1, %2}"; > } > [(set_attr "type" "sseimul") > @@ -24658,7 +24658,7 @@ (define_insn "_permvar && > && !reg_mentioned_p (operands[0], operands[1]) > && !reg_mentioned_p (operands[0], operands[2])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vperm\t{%1, %2, %0|%0, %2, %1}"; > } > [(set_attr "type" "sselog") > @@ -24900,7 +24900,7 @@ (define_insn "avx2_perm_1 if (TARGET_DEST_FALSE_DEP_FOR_GLC > && > && !reg_mentioned_p (operands[0], operands[1])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vperm\t{%2, %1, %0|%0, %1, %2}"; > } > [(set_attr "type" "sselog") > @@ -24975,7 +24975,7 @@ (define_insn "avx512f_perm_1 if (TARGET_DEST_FALSE_DEP_FOR_GLC > && > && !reg_mentioned_p (operands[0], operands[1])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vperm\t{%2, %1, %0|%0, %1, %2}"; > } > [(set_attr "type" "sselog") > @@ -26880,7 +26880,7 @@ (define_insn "avx512dq_rangep && > && !reg_mentioned_p (operands[0], operands[1]) > && !reg_mentioned_p (operands[0], operands[2])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vrange\t{%3, %2, %1, %0|%0, %1, %2, %3}"; > } > [(set_attr "type" "sse") > @@ -26903,7 +26903,7 @@ (define_insn "avx512dq_ranges && > && !reg_mentioned_p (operands[0], operands[1]) > && !reg_mentioned_p (operands[0], operands[2])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vrange\t{%3, %2, %1, %0|%0, %1, %2, %3}"; > } > [(set_attr "type" "sse") > @@ -26949,7 +26949,7 @@ (define_insn "_getmant if (TARGET_DEST_FALSE_DEP_FOR_GLC > && > && MEM_P (operands[1])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vgetmant\t{%2, %1, %0|%0, %1, %2}"; > } > [(set_attr "prefix" "evex") > @@ -26971,7 +26971,7 @@ (define_insn "avx512f_vgetmant && > && !reg_mentioned_p (operands[0], operands[1]) > && !reg_mentioned_p (operands[0], operands[2])) > - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); > + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); > return "vgetmant\t{%3, %2, %1, %0|%0, %1, %2, %3}"; > } > [(set_attr "prefix" "evex") > --- gcc/testsuite/gcc.target/i386/pr104104.c.jj 2022-01-18 21:38:17.007906673 +0100 > +++ gcc/testsuite/gcc.target/i386/pr104104.c 2022-01-18 21:36:10.475623148 +0100 > @@ -0,0 +1,10 @@ > +/* PR target/104104 */ > +/* { dg-do assemble { target vect_simd_clones } } */ > +/* { dg-require-effective-target masm_intel } */ > +/* { dg-options "-march=alderlake -masm=intel -O1 -fallow-store-data-races -funroll-all-loops" } */ > + > +__attribute__ ((simd)) short int > +foo (void) > +{ > + return 0; > +} > > > Jakub > -- BR, Hongtao