public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] target/109944 - avoid STLF fail for V16QImode CTOR expansion
@ 2023-05-24 10:13 Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2023-05-24 10:13 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak, hongtao.liu

The following dispatches to V2DImode CTOR expansion instead of
using sets of (subreg:DI (reg:V16QI 146) [08]) which causes
LRA to spill DImode and reload V16QImode.  The same applies for
V8QImode or V4HImode construction from SImode parts which happens
during 32bit libgcc build.

Boostrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

	PR target/109944
	* config/i386/i386-expand.cc (ix86_expand_vector_init_general):
	Perform final vector composition using
	ix86_expand_vector_init_general instead of setting
	the highpart and lowpart which causes spilling.

	* gcc.target/i386/pr109944-1.c: New testcase.
	* gcc.target/i386/pr109944-2.c: Likewise.
---
 gcc/config/i386/i386-expand.cc             | 11 ++++----
 gcc/testsuite/gcc.target/i386/pr109944-1.c | 30 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr109944-2.c | 17 ++++++++++++
 3 files changed, 53 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-2.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ff3d382f1b4..19acd9c01f9 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -16367,11 +16367,12 @@ quarter:
 	emit_move_insn (target, gen_lowpart (mode, words[0]));
       else if (n_words == 2)
 	{
-	  rtx tmp = gen_reg_rtx (mode);
-	  emit_clobber (tmp);
-	  emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]);
-	  emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]);
-	  emit_move_insn (target, tmp);
+	  gcc_assert (tmp_mode == DImode || tmp_mode == SImode);
+	  machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode;
+	  rtx tmp = gen_reg_rtx (concat_mode);
+	  vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words));
+	  ix86_expand_vector_init_general (false, concat_mode, tmp, vals);
+	  emit_move_insn (target, gen_lowpart (mode, tmp));
 	}
       else if (n_words == 4)
 	{
diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c b/gcc/testsuite/gcc.target/i386/pr109944-1.c
new file mode 100644
index 00000000000..d82214d9ebc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void foo (char * __restrict a, char *b)
+{
+  a[0] = b[0];
+  a[1] = b[16];
+  a[2] = b[32];
+  a[3] = b[48];
+  a[4] = b[64];
+  a[5] = b[80];
+  a[6] = b[96];
+  a[7] = b[112];
+  a[8] = b[128];
+  a[9] = b[144];
+  a[10] = b[160];
+  a[11] = b[176];
+  a[12] = b[192];
+  a[13] = b[208];
+  a[14] = b[224];
+  a[15] = b[240];
+}
+
+/* We do not want to generate a spill/reload for when the store is vectorized.
+        movq    %rdx, -24(%rsp)
+...
+        movq    %rax, -16(%rsp)
+        movdqa  -24(%rsp), %xmm0
+        movups  %xmm0, (%rdi)  */
+/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c b/gcc/testsuite/gcc.target/i386/pr109944-2.c
new file mode 100644
index 00000000000..318dfab0250
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef char v16qi __attribute__((vector_size(16)));
+v16qi foo (char *b)
+{
+  return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112],
+      b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] };
+}
+
+/* We do not want to generate a spill/reload
+        movq    %rdx, -24(%rsp)
+...
+        movq    %rax, -16(%rsp)
+        movdqa  -24(%rsp), %xmm0
+        movups  %xmm0, (%rdi)  */
+/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
-- 
2.35.3

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] target/109944 - avoid STLF fail for V16QImode CTOR expansion
       [not found] <646de365.5d0a0220.695c1.0c25SMTPIN_ADDED_MISSING@mx.google.com>
@ 2023-05-24 12:25 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2023-05-24 12:25 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, hongtao.liu

On Wed, May 24, 2023 at 12:13 PM Richard Biener <rguenther@suse.de> wrote:
>
> The following dispatches to V2DImode CTOR expansion instead of
> using sets of (subreg:DI (reg:V16QI 146) [08]) which causes
> LRA to spill DImode and reload V16QImode.  The same applies for
> V8QImode or V4HImode construction from SImode parts which happens
> during 32bit libgcc build.
>
> Boostrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
>         PR target/109944
>         * config/i386/i386-expand.cc (ix86_expand_vector_init_general):
>         Perform final vector composition using
>         ix86_expand_vector_init_general instead of setting
>         the highpart and lowpart which causes spilling.
>
>         * gcc.target/i386/pr109944-1.c: New testcase.
>         * gcc.target/i386/pr109944-2.c: Likewise.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.cc             | 11 ++++----
>  gcc/testsuite/gcc.target/i386/pr109944-1.c | 30 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr109944-2.c | 17 ++++++++++++
>  3 files changed, 53 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-2.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index ff3d382f1b4..19acd9c01f9 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -16367,11 +16367,12 @@ quarter:
>         emit_move_insn (target, gen_lowpart (mode, words[0]));
>        else if (n_words == 2)
>         {
> -         rtx tmp = gen_reg_rtx (mode);
> -         emit_clobber (tmp);
> -         emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]);
> -         emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]);
> -         emit_move_insn (target, tmp);
> +         gcc_assert (tmp_mode == DImode || tmp_mode == SImode);
> +         machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode;
> +         rtx tmp = gen_reg_rtx (concat_mode);
> +         vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words));
> +         ix86_expand_vector_init_general (false, concat_mode, tmp, vals);
> +         emit_move_insn (target, gen_lowpart (mode, tmp));
>         }
>        else if (n_words == 4)
>         {
> diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c b/gcc/testsuite/gcc.target/i386/pr109944-1.c
> new file mode 100644
> index 00000000000..d82214d9ebc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void foo (char * __restrict a, char *b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[16];
> +  a[2] = b[32];
> +  a[3] = b[48];
> +  a[4] = b[64];
> +  a[5] = b[80];
> +  a[6] = b[96];
> +  a[7] = b[112];
> +  a[8] = b[128];
> +  a[9] = b[144];
> +  a[10] = b[160];
> +  a[11] = b[176];
> +  a[12] = b[192];
> +  a[13] = b[208];
> +  a[14] = b[224];
> +  a[15] = b[240];
> +}
> +
> +/* We do not want to generate a spill/reload for when the store is vectorized.
> +        movq    %rdx, -24(%rsp)
> +...
> +        movq    %rax, -16(%rsp)
> +        movdqa  -24(%rsp), %xmm0
> +        movups  %xmm0, (%rdi)  */
> +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c b/gcc/testsuite/gcc.target/i386/pr109944-2.c
> new file mode 100644
> index 00000000000..318dfab0250
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2" } */
> +
> +typedef char v16qi __attribute__((vector_size(16)));
> +v16qi foo (char *b)
> +{
> +  return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112],
> +      b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] };
> +}
> +
> +/* We do not want to generate a spill/reload
> +        movq    %rdx, -24(%rsp)
> +...
> +        movq    %rax, -16(%rsp)
> +        movdqa  -24(%rsp), %xmm0
> +        movups  %xmm0, (%rdi)  */
> +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
> --
> 2.35.3

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-05-24 12:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-24 10:13 [PATCH] target/109944 - avoid STLF fail for V16QImode CTOR expansion Richard Biener
     [not found] <646de365.5d0a0220.695c1.0c25SMTPIN_ADDED_MISSING@mx.google.com>
2023-05-24 12:25 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).