public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] middle-end/110452 - bad code generation with AVX512 mask splat
@ 2023-06-28 13:17 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-06-28 13:17 UTC (permalink / raw)
  To: gcc-patches

The following adds an alternate way of expanding a uniform
mask vector constructor like

  _55 = _2 ? -1 : 0;
  vect_cst__56 = {_55, _55, _55, _55, _55, _55, _55, _55};

when the mask mode is a scalar int mode like for AVX512 or GCN.
Instead of piecewise building the result via shifts and ors
we can take advantage of uniformity and signedness of the
component and simply sign-extend to the result.

Instead of

        cmpl    $3, %edi
        sete    %cl
        movl    %ecx, %esi
        leal    (%rsi,%rsi), %eax
        leal    0(,%rsi,4), %r9d
        leal    0(,%rsi,8), %r8d
        orl     %esi, %eax
        orl     %r9d, %eax
        movl    %ecx, %r9d
        orl     %r8d, %eax
        movl    %ecx, %r8d
        sall    $4, %r9d
        sall    $5, %r8d
        sall    $6, %esi
        orl     %r9d, %eax
        orl     %r8d, %eax
        movl    %ecx, %r8d
        orl     %esi, %eax
        sall    $7, %r8d
        orl     %r8d, %eax
        kmovb   %eax, %k1

we then get

        cmpl    $3, %edi
        sete    %cl
	negl    %ecx
	kmovb   %ecx, %k1

Code generation for non-uniform masks remains bad, but at least
I see no easy way out for the most general case here.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Will apply tomorrow after double-checking SPEC results and
if no comments appear.

Richard.

	PR middle-end/110452
	* expr.cc (store_constructor): Handle uniform boolean
	vectors with integer mode specially.
---
 gcc/expr.cc                              | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr110452.c | 13 +++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110452.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 62cd8facf75..b7f4e2fda9e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7447,6 +7447,19 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	      emit_move_insn (target, ops[0].value);
 	    break;
 	  }
+	/* Use sign-extension for uniform boolean vectors with
+	   integer modes.  */
+	if (!TREE_SIDE_EFFECTS (exp)
+	    && VECTOR_BOOLEAN_TYPE_P (type)
+	    && SCALAR_INT_MODE_P (mode)
+	    && (elt = uniform_vector_p (exp))
+	    && !VECTOR_TYPE_P (TREE_TYPE (elt)))
+	  {
+	    rtx op0 = force_reg (TYPE_MODE (TREE_TYPE (elt)),
+				 expand_normal (elt));
+	    convert_move (target, op0, 0);
+	    break;
+	  }
 
 	n_elts = TYPE_VECTOR_SUBPARTS (type);
 	if (REG_P (target)
diff --git a/gcc/testsuite/gcc.target/i386/pr110452.c b/gcc/testsuite/gcc.target/i386/pr110452.c
new file mode 100644
index 00000000000..8a3e2e560d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110452.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -mavx512f -mprefer-vector-width=512" } */
+
+double a[1024], b[1024], c[1024];
+
+void foo (int flag, int n)
+{
+  _Bool x = flag == 3;
+  for (int i = 0; i < n; ++i)
+    a[i] = (x ? b[i] : c[i]) * 42.;
+}
+
+/* { dg-final { scan-assembler-not "\[^x\]orl" } } */
-- 
2.35.3

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-28 13:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-28 13:17 [PATCH] middle-end/110452 - bad code generation with AVX512 mask splat Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).