public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-2183] middle-end/110452 - bad code generation with AVX512 mask splat
@ 2023-06-29  8:34 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-06-29  8:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6d2eddf456f2d6494cac490c4aa3e7d089926098

commit r14-2183-g6d2eddf456f2d6494cac490c4aa3e7d089926098
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Jun 28 14:05:55 2023 +0200

    middle-end/110452 - bad code generation with AVX512 mask splat
    
    The following adds an alternate way of expanding a uniform
    mask vector constructor like
    
      _55 = _2 ? -1 : 0;
      vect_cst__56 = {_55, _55, _55, _55, _55, _55, _55, _55};
    
    when the mask mode is a scalar int mode like for AVX512 or GCN.
    Instead of piecewise building the result via shifts and ors
    we can take advantage of uniformity and signedness of the
    component and simply sign-extend to the result.
    
    Instead of
    
            cmpl    $3, %edi
            sete    %cl
            movl    %ecx, %esi
            leal    (%rsi,%rsi), %eax
            leal    0(,%rsi,4), %r9d
            leal    0(,%rsi,8), %r8d
            orl     %esi, %eax
            orl     %r9d, %eax
            movl    %ecx, %r9d
            orl     %r8d, %eax
            movl    %ecx, %r8d
            sall    $4, %r9d
            sall    $5, %r8d
            sall    $6, %esi
            orl     %r9d, %eax
            orl     %r8d, %eax
            movl    %ecx, %r8d
            orl     %esi, %eax
            sall    $7, %r8d
            orl     %r8d, %eax
            kmovb   %eax, %k1
    
    we then get
    
            cmpl    $3, %edi
            sete    %cl
            negl    %ecx
            kmovb   %ecx, %k1
    
    Code generation for non-uniform masks remains bad, but at least
    I see no easy way out for the most general case here.
    
            PR middle-end/110452
            * expr.cc (store_constructor): Handle uniform boolean
            vectors with integer mode specially.

Diff:
---
 gcc/expr.cc                              | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr110452.c | 13 +++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 62cd8facf75..b7f4e2fda9e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7447,6 +7447,19 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	      emit_move_insn (target, ops[0].value);
 	    break;
 	  }
+	/* Use sign-extension for uniform boolean vectors with
+	   integer modes.  */
+	if (!TREE_SIDE_EFFECTS (exp)
+	    && VECTOR_BOOLEAN_TYPE_P (type)
+	    && SCALAR_INT_MODE_P (mode)
+	    && (elt = uniform_vector_p (exp))
+	    && !VECTOR_TYPE_P (TREE_TYPE (elt)))
+	  {
+	    rtx op0 = force_reg (TYPE_MODE (TREE_TYPE (elt)),
+				 expand_normal (elt));
+	    convert_move (target, op0, 0);
+	    break;
+	  }
 
 	n_elts = TYPE_VECTOR_SUBPARTS (type);
 	if (REG_P (target)
diff --git a/gcc/testsuite/gcc.target/i386/pr110452.c b/gcc/testsuite/gcc.target/i386/pr110452.c
new file mode 100644
index 00000000000..8a3e2e560d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110452.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -mavx512f -mprefer-vector-width=512" } */
+
+double a[1024], b[1024], c[1024];
+
+void foo (int flag, int n)
+{
+  _Bool x = flag == 3;
+  for (int i = 0; i < n; ++i)
+    a[i] = (x ? b[i] : c[i]) * 42.;
+}
+
+/* { dg-final { scan-assembler-not "\[^x\]orl" } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-29  8:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-29  8:34 [gcc r14-2183] middle-end/110452 - bad code generation with AVX512 mask splat Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).