From: Jakub Jelinek <jakub@redhat.com>
To: Hongtao Liu <crazylht@gmail.com>, Uros Bizjak <ubizjak@gmail.com>
Cc: gcc-patches@gcc.gnu.org, Hongyu Wang <hongyu.wang@intel.com>
Subject: [PATCH] i386: Fix GLC tuning with -masm=intel [PR104104]
Date: Wed, 19 Jan 2022 01:00:16 +0100 [thread overview]
Message-ID: <20220119000016.GO2646553@tucnak> (raw)
In-Reply-To: <CAMZc-bzxiitDEvkPn-q6nWzRK=hkECgDgFyj3tQyVqXgrTeWWw@mail.gmail.com>
On Sun, Jan 16, 2022 at 12:22:18PM +0800, Hongtao Liu via Gcc-patches wrote:
> On Sun, Jan 16, 2022 at 12:44 AM Uros Bizjak via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
> > >
> > > Thanks for the suggestion, here is the updated patch that survived
> > > bootstrap/regtest.
> >
> > LGTM for me, but please get the final approval from Hongtao.
> >
> Ok, thanks.
Unfortunately the patch results in assembler failures with -masm=intel.
> > > > + if (TARGET_DEST_FALSE_DEPENDENCY
> > > > + && get_attr_dest_false_dep (insn) ==
> > > > + DEST_FALSE_DEP_TRUE)
> > > > + output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
All the vxorps insns were emitted like the above, which means for -masm=sysv
it looks like
vxorps %xmm3, %xmm3, %xmm3
but for -masm=intel like:
vxorps
We want obviously
vxorps xmm3, xmm3, xmm3
so the following patch just drops the errorneous {}s.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2022-01-19 Jakub Jelinek <jakub@redhat.com>
PR target/104104
* config/i386/sse.md
(<avx512>_<complexopname>_<mode><maskc_name><round_name>,
avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>,
avx512dq_mul<mode>3<mask_name>, <avx2_avx512>_permvar<mode><mask_name>,
avx2_perm<mode>_1<mask_name>, avx512f_perm<mode>_1<mask_name>,
avx512dq_rangep<mode><mask_name><round_saeonly_name>,
avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>,
<avx512>_getmant<mode><mask_name><round_saeonly_name>,
avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>):
Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}.
* gcc.target/i386/pr104104.c: New test.
--- gcc/config/i386/sse.md.jj 2022-01-18 11:58:59.156988142 +0100
+++ gcc/config/i386/sse.md 2022-01-18 21:20:40.022477778 +0100
@@ -6539,7 +6539,7 @@ (define_insn "<avx512>_<complexopname>_<
{
if (TARGET_DEST_FALSE_DEP_FOR_GLC
&& <maskc_dest_false_dep_for_glc_cond>)
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}";
}
[(set_attr "type" "ssemul")
@@ -6750,7 +6750,7 @@ (define_insn "avx512fp16_<complexopname>
{
if (TARGET_DEST_FALSE_DEP_FOR_GLC
&& <mask_scalarc_dest_false_dep_for_glc_cond>)
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}";
}
[(set_attr "type" "ssemul")
@@ -15222,7 +15222,7 @@ (define_insn "avx512dq_mul<mode>3<mask_n
&& <mask3_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1])
&& !reg_mentioned_p (operands[0], operands[2]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
}
[(set_attr "type" "sseimul")
@@ -24658,7 +24658,7 @@ (define_insn "<avx2_avx512>_permvar<mode
&& <mask3_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1])
&& !reg_mentioned_p (operands[0], operands[2]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}";
}
[(set_attr "type" "sselog")
@@ -24900,7 +24900,7 @@ (define_insn "avx2_perm<mode>_1<mask_nam
if (TARGET_DEST_FALSE_DEP_FOR_GLC
&& <mask6_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
@@ -24975,7 +24975,7 @@ (define_insn "avx512f_perm<mode>_1<mask_
if (TARGET_DEST_FALSE_DEP_FOR_GLC
&& <mask10_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog")
@@ -26880,7 +26880,7 @@ (define_insn "avx512dq_rangep<mode><mask
&& <mask4_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1])
&& !reg_mentioned_p (operands[0], operands[2]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
}
[(set_attr "type" "sse")
@@ -26903,7 +26903,7 @@ (define_insn "avx512dq_ranges<mode><mask
&& <mask_scalar4_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1])
&& !reg_mentioned_p (operands[0], operands[2]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
}
[(set_attr "type" "sse")
@@ -26949,7 +26949,7 @@ (define_insn "<avx512>_getmant<mode><mas
if (TARGET_DEST_FALSE_DEP_FOR_GLC
&& <mask3_dest_false_dep_for_glc_cond>
&& MEM_P (operands[1]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
}
[(set_attr "prefix" "evex")
@@ -26971,7 +26971,7 @@ (define_insn "avx512f_vgetmant<mode><mas
&& <mask_scalar4_dest_false_dep_for_glc_cond>
&& !reg_mentioned_p (operands[0], operands[1])
&& !reg_mentioned_p (operands[0], operands[2]))
- output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
}
[(set_attr "prefix" "evex")
--- gcc/testsuite/gcc.target/i386/pr104104.c.jj 2022-01-18 21:38:17.007906673 +0100
+++ gcc/testsuite/gcc.target/i386/pr104104.c 2022-01-18 21:36:10.475623148 +0100
@@ -0,0 +1,10 @@
+/* PR target/104104 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-march=alderlake -masm=intel -O1 -fallow-store-data-races -funroll-all-loops" } */
+
+__attribute__ ((simd)) short int
+foo (void)
+{
+ return 0;
+}
Jakub
next prev parent reply other threads:[~2022-01-19 0:00 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-13 7:28 [PATCH] [i386] GLC tuning: Break false dependency for dest register Hongyu Wang
2022-01-13 7:41 ` Uros Bizjak
2022-01-14 5:38 ` Hongyu Wang
2022-01-14 6:03 ` Hongyu Wang
2022-01-14 8:37 ` Uros Bizjak
2022-01-14 13:44 ` Hongyu Wang
2022-01-14 15:49 ` Uros Bizjak
2022-01-15 16:39 ` Hongyu Wang
2022-01-15 16:43 ` Uros Bizjak
2022-01-16 4:22 ` Hongtao Liu
2022-01-19 0:00 ` Jakub Jelinek [this message]
2022-01-19 1:01 ` [PATCH] i386: Fix GLC tuning with -masm=intel [PR104104] Wang, Hongyu
2022-01-19 1:09 ` Hongtao Liu
2022-01-19 1:40 ` [PATCH] i386: Fix *aes<aeswideklvariant>u8 Jakub Jelinek
2022-01-19 1:47 ` Hongtao Liu
2022-01-14 8:17 ` [PATCH] [i386] GLC tuning: Break false dependency for dest register Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220119000016.GO2646553@tucnak \
--to=jakub@redhat.com \
--cc=crazylht@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongyu.wang@intel.com \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).