public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6715] i386: Fix GLC tuning with -masm=intel [PR104104]
@ 2022-01-19  1:25 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2022-01-19  1:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ddce00dba244d889d688490517fb106169a72f01

commit r12-6715-gddce00dba244d889d688490517fb106169a72f01
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Wed Jan 19 02:24:06 2022 +0100

    i386: Fix GLC tuning with -masm=intel [PR104104]
    
    > > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
    > > > Thanks for the suggestion, here is the updated patch that survived
    > > > bootstrap/regtest.
    
    Unfortunately the patch results in assembler failures with -masm=intel.
    
    > > > > +  if (TARGET_DEST_FALSE_DEPENDENCY
    > > > > +      && get_attr_dest_false_dep (insn) ==
    > > > > +        DEST_FALSE_DEP_TRUE)
    > > > > +    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
    
    All the vxorps insns were emitted like the above, which means for -masm=sysv
    it looks like
            vxorps  %xmm3, %xmm3, %xmm3
    but for -masm=intel like:
            vxorps
    We want obviously
            vxorps  xmm3, xmm3, xmm3
    so the following patch just drops the errorneous {}s.
    
    2022-01-19  Jakub Jelinek  <jakub@redhat.com>
    
            PR target/104104
            * config/i386/sse.md
            (<avx512>_<complexopname>_<mode><maskc_name><round_name>,
            avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>,
            avx512dq_mul<mode>3<mask_name>, <avx2_avx512>_permvar<mode><mask_name>,
            avx2_perm<mode>_1<mask_name>, avx512f_perm<mode>_1<mask_name>,
            avx512dq_rangep<mode><mask_name><round_saeonly_name>,
            avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>,
            <avx512>_getmant<mode><mask_name><round_saeonly_name>,
            avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>):
            Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}.
    
            * gcc.target/i386/pr104104.c: New test.

Diff:
---
 gcc/config/i386/sse.md                   | 20 ++++++++++----------
 gcc/testsuite/gcc.target/i386/pr104104.c | 10 ++++++++++
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 34175fd5258..829107e00c9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6539,7 +6539,7 @@
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <maskc_dest_false_dep_for_glc_cond>)
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}";
 }
   [(set_attr "type" "ssemul")
@@ -6750,7 +6750,7 @@
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask_scalarc_dest_false_dep_for_glc_cond>)
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}";
 }
   [(set_attr "type" "ssemul")
@@ -15222,7 +15222,7 @@
       && <mask3_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
 }
   [(set_attr "type" "sseimul")
@@ -24658,7 +24658,7 @@
       && <mask3_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}";
 }
   [(set_attr "type" "sselog")
@@ -24900,7 +24900,7 @@
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask6_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
 }
   [(set_attr "type" "sselog")
@@ -24975,7 +24975,7 @@
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask10_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
 }
   [(set_attr "type" "sselog")
@@ -26880,7 +26880,7 @@
       && <mask4_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
 }
   [(set_attr "type" "sse")
@@ -26903,7 +26903,7 @@
       && <mask_scalar4_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
 }
   [(set_attr "type" "sse")
@@ -26949,7 +26949,7 @@
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask3_dest_false_dep_for_glc_cond>
       && MEM_P (operands[1]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
 }
   [(set_attr "prefix" "evex")
@@ -26971,7 +26971,7 @@
       && <mask_scalar4_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
 }
    [(set_attr "prefix" "evex")
diff --git a/gcc/testsuite/gcc.target/i386/pr104104.c b/gcc/testsuite/gcc.target/i386/pr104104.c
new file mode 100644
index 00000000000..1653caedc03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104104.c
@@ -0,0 +1,10 @@
+/* PR target/104104 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-march=alderlake -masm=intel -O1 -fallow-store-data-races -funroll-all-loops" } */
+
+__attribute__ ((simd)) short int
+foo (void)
+{
+  return 0;
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-01-19  1:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-19  1:25 [gcc r12-6715] i386: Fix GLC tuning with -masm=intel [PR104104] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).