public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations
@ 2021-07-01 15:22 H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Changes in the v5 patches:

1. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard
scratch register to avoid stack realignment when expanding memset.
2. Use vec_duplicate, instead of adding TARGET_READ_MEMSET_VALUE and
TARGET_GEN_MEMSET_VALUE, to expand memset if available.

Changes in the v4 patches:

1. Define x86 MAX_MOVE_MAX to 64, which is the constant maximum number
of bytes that a single instruction can move quickly between memory and
registers or between two memory locations.
2. Define x86 MOVE_MAX to MOVE_MAX_PIECES, which is the maximum number of
bytes we can move from memory to memory in one reasonably fast instruction.
The difference between MAX_MOVE_MAX and MOVE_MAX is that MAX_MOVE_MAX
must be a constant, independent of compiler options, since it is used in
reload.h to define struct target_reload and MOVE_MAX can vary, depending
on compiler options.

Changes in the v3 patches:

1. Split the TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE changes
into the generic part and the x86 part.


1. Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
target instructions to duplicate QImode value to TImode/OImode/XImode
value for memmset.
2. x86: Avoid stack realignment when copying data
3. x86: Remov MAX_BITSIZE_MODE_ANY_INT.  Only x86 backend defines it.
4. x86: Use TImode/OImode/XImode integers for piecewise move and store.
5. x86: Add tests for TImode/OImode/XImode for piecewise move and store.
6. x86: Adjust existing tests.

On x86-64, SPEC CPU 2017 performance impact is neutral.  Glibc code size
differences with -O2 build are:

             Before         After
libc.so     1906572        1906444

Some code sequence differences in libc.so are:

<svcudp_bufcreate@GLIBC_2.2.5>:
	...
	jne    <svcudp_bufcreate@GLIBC_2.2.5+0x318>	      |		jne    <svcudp_bufcreate@GLIBC_2.2.5+0x2a8>
	test   %r15,%r15						test   %r15,%r15
	je     <svcudp_bufcreate@GLIBC_2.2.5+0x318>	      |		je     <svcudp_bufcreate@GLIBC_2.2.5+0x2a8>
	mov    %r13d,(%r14)						mov    %r13d,(%r14)
	lea    0x10(%r14),%rdi						lea    0x10(%r14),%rdi
	mov    $0x1,%ecx						mov    $0x1,%ecx
	mov    %r13d,%edx						mov    %r13d,%edx
	mov    %r15,0x40(%r12)						mov    %r15,0x40(%r12)
	mov    %r15,%rsi						mov    %r15,%rsi
	call   <xdrmem_create@GLIBC_2.2.5>				call   <xdrmem_create@GLIBC_2.2.5>
	lea    0xa2f9b(%rip),%rax        # <svcudp_op>	      |		lea    0xa2fab(%rip),%rax        # <svcudp_op>
	xor    %esi,%esi						xor    %esi,%esi
	mov    %ebp,%edi						mov    %ebp,%edi
	mov    %rax,0x8(%r12)						mov    %rax,0x8(%r12)
	movzwl 0x12(%rsp),%eax						movzwl 0x12(%rsp),%eax
	mov    $0x8,%edx				      <
	lea    0xc(%rsp),%rcx						lea    0xc(%rsp),%rcx
	mov    %r14,0x48(%r12)				      <
	add    $0x40,%r14				      <
	mov    $0x4,%r8d						mov    $0x4,%r8d
							      >		movq   $0x0,0x1d0(%r14)
							      >		mov    $0x8,%edx
	rol    $0x8,%ax							rol    $0x8,%ax
	mov    %ebp,(%r12)				      |		mov    %r14,0x48(%r12)
	movq   $0x0,0x190(%r14)				      |		add    $0x40,%r14
	mov    %ax,0x4(%r12)				      <
	mov    %r14,0x30(%r12)						mov    %r14,0x30(%r12)
							      >		mov    %ax,0x4(%r12)
							      >		mov    %ebp,(%r12)
	movl   $0x1,0xc(%rsp)						movl   $0x1,0xc(%rsp)
	call   <setsockopt>						call   <setsockopt>
	mov    %r12,%rdi						mov    %r12,%rdi
	movabs $0x101010101010101,%rdx			      <
	test   %eax,%eax						test   %eax,%eax
	mov    $0xff,%eax						mov    $0xff,%eax
	cmove  %eax,%ebx						cmove  %eax,%ebx
	movzbl %bl,%eax					      |		movd   %ebx,%xmm0
	mov    %ebx,0xc(%rsp)						mov    %ebx,0xc(%rsp)
	mov    %rax,%rsi				      |		punpcklbw %xmm0,%xmm0
	imul   %rdx,%rsi				      |		punpcklwd %xmm0,%xmm0
	mul    %rdx					      |		pshufd $0x0,%xmm0,%xmm0
	add    %rsi,%rdx				      |		movups %xmm0,0x50(%r12)
	mov    %rax,0x50(%r12)				      |		movups %xmm0,0x60(%r12)
	mov    %rdx,0x58(%r12)				      |		movups %xmm0,0x70(%r12)
	mov    %rax,0x60(%r12)				      |		movups %xmm0,0x80(%r12)
	mov    %rdx,0x68(%r12)				      |		movups %xmm0,0x90(%r12)
	mov    %rax,0x70(%r12)				      |		movups %xmm0,0xa0(%r12)
	mov    %rdx,0x78(%r12)				      |		movups %xmm0,0xb0(%r12)
	mov    %rax,0x80(%r12)				      |		movups %xmm0,0xc0(%r12)
	mov    %rdx,0x88(%r12)				      |		movups %xmm0,0xd0(%r12)
	mov    %rax,0x90(%r12)				      |		movups %xmm0,0xe0(%r12)
	mov    %rdx,0x98(%r12)				      |		movups %xmm0,0xf0(%r12)
	mov    %rax,0xa0(%r12)				      |		movups %xmm0,0x100(%r12)
	mov    %rdx,0xa8(%r12)				      |		movups %xmm0,0x110(%r12)
	mov    %rax,0xb0(%r12)				      |		movups %xmm0,0x120(%r12)
	mov    %rdx,0xb8(%r12)				      |		movups %xmm0,0x130(%r12)
	mov    %rax,0xc0(%r12)				      |		movups %xmm0,0x140(%r12)
	mov    %rdx,0xc8(%r12)				      <
	mov    %rax,0xd0(%r12)				      <
	mov    %rdx,0xd8(%r12)				      <
	mov    %rax,0xe0(%r12)				      <
	mov    %rdx,0xe8(%r12)				      <
	mov    %rax,0xf0(%r12)				      <
	mov    %rdx,0xf8(%r12)				      <
	mov    %rax,0x100(%r12)				      <
	mov    %rdx,0x108(%r12)				      <
	mov    %rax,0x110(%r12)				      <
	mov    %rdx,0x118(%r12)				      <
	mov    %rax,0x120(%r12)				      <
	mov    %rdx,0x128(%r12)				      <
	mov    %rax,0x130(%r12)				      <
	mov    %rdx,0x138(%r12)				      <
	mov    %rax,0x140(%r12)				      <
	mov    %rdx,0x148(%r12)				      <
	call   <xprt_register@GLIBC_2.2.5>				call   <xprt_register@GLIBC_2.2.5>
	add    $0x28,%rsp						add    $0x28,%rsp
	mov    %r12,%rax						mov    %r12,%rax
	pop    %rbx							pop    %rbx
	pop    %rbp							pop    %rbp
	pop    %r12							pop    %r12
	pop    %r13							pop    %r13
	pop    %r14							pop    %r14
	pop    %r15							pop    %r15
	ret    								ret    


*** BLURB HERE ***

H.J. Lu (11):
  Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX
  x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX
  x86: Avoid stack realignment when copying data
  x86: Update piecewise move and store
  x86: Add AVX2 tests for PR middle-end/90773
  x86: Add tests for piecewise move and store
  x86: Also pass -mno-avx to pr72839.c
  x86: Also pass -mno-avx to cold-attribute-1.c
  x86: Also pass -mno-avx to sw-1.c for ia32
  x86: Update gcc.target/i386/incoming-11.c
  x86: Also pass -mno-sse to vect8-ret.c

 gcc/builtins.c                                | 123 +++++++++++++++---
 gcc/config/i386/i386-expand.c                 |   4 +-
 gcc/config/i386/i386.c                        |  27 +++-
 gcc/config/i386/i386.h                        |  40 ++++--
 gcc/doc/tm.texi                               |   5 +
 gcc/doc/tm.texi.in                            |   2 +
 gcc/target.def                                |   7 +
 .../gcc.target/i386/cold-attribute-1.c        |   2 +-
 gcc/testsuite/gcc.target/i386/eh_return-1.c   |  26 ++++
 gcc/testsuite/gcc.target/i386/incoming-11.c   |   2 +-
 .../gcc.target/i386/pieces-memcpy-10.c        |  16 +++
 .../gcc.target/i386/pieces-memcpy-11.c        |  17 +++
 .../gcc.target/i386/pieces-memcpy-12.c        |  16 +++
 .../gcc.target/i386/pieces-memcpy-13.c        |  16 +++
 .../gcc.target/i386/pieces-memcpy-14.c        |  17 +++
 .../gcc.target/i386/pieces-memcpy-15.c        |  16 +++
 .../gcc.target/i386/pieces-memcpy-16.c        |  16 +++
 .../gcc.target/i386/pieces-memcpy-7.c         |  15 +++
 .../gcc.target/i386/pieces-memcpy-8.c         |  14 ++
 .../gcc.target/i386/pieces-memcpy-9.c         |  14 ++
 .../gcc.target/i386/pieces-memset-1.c         |  16 +++
 .../gcc.target/i386/pieces-memset-10.c        |  16 +++
 .../gcc.target/i386/pieces-memset-11.c        |  16 +++
 .../gcc.target/i386/pieces-memset-12.c        |  16 +++
 .../gcc.target/i386/pieces-memset-13.c        |  16 +++
 .../gcc.target/i386/pieces-memset-14.c        |  16 +++
 .../gcc.target/i386/pieces-memset-15.c        |  16 +++
 .../gcc.target/i386/pieces-memset-16.c        |  16 +++
 .../gcc.target/i386/pieces-memset-17.c        |  16 +++
 .../gcc.target/i386/pieces-memset-18.c        |  16 +++
 .../gcc.target/i386/pieces-memset-19.c        |  17 +++
 .../gcc.target/i386/pieces-memset-2.c         |  12 ++
 .../gcc.target/i386/pieces-memset-20.c        |  17 +++
 .../gcc.target/i386/pieces-memset-21.c        |  17 +++
 .../gcc.target/i386/pieces-memset-22.c        |  17 +++
 .../gcc.target/i386/pieces-memset-23.c        |  17 +++
 .../gcc.target/i386/pieces-memset-24.c        |  17 +++
 .../gcc.target/i386/pieces-memset-25.c        |  17 +++
 .../gcc.target/i386/pieces-memset-26.c        |  17 +++
 .../gcc.target/i386/pieces-memset-27.c        |  17 +++
 .../gcc.target/i386/pieces-memset-28.c        |  17 +++
 .../gcc.target/i386/pieces-memset-29.c        |  17 +++
 .../gcc.target/i386/pieces-memset-3.c         |  18 +++
 .../gcc.target/i386/pieces-memset-30.c        |  17 +++
 .../gcc.target/i386/pieces-memset-31.c        |  17 +++
 .../gcc.target/i386/pieces-memset-32.c        |  17 +++
 .../gcc.target/i386/pieces-memset-33.c        |  17 +++
 .../gcc.target/i386/pieces-memset-34.c        |  17 +++
 .../gcc.target/i386/pieces-memset-35.c        |  17 +++
 .../gcc.target/i386/pieces-memset-36.c        |  17 +++
 .../gcc.target/i386/pieces-memset-37.c        |  15 +++
 .../gcc.target/i386/pieces-memset-38.c        |  17 +++
 .../gcc.target/i386/pieces-memset-39.c        |  16 +++
 .../gcc.target/i386/pieces-memset-4.c         |  16 +++
 .../gcc.target/i386/pieces-memset-40.c        |  17 +++
 .../gcc.target/i386/pieces-memset-41.c        |  16 +++
 .../gcc.target/i386/pieces-memset-42.c        |  17 +++
 .../gcc.target/i386/pieces-memset-43.c        |  17 +++
 .../gcc.target/i386/pieces-memset-5.c         |  12 ++
 .../gcc.target/i386/pieces-memset-6.c         |  16 +++
 .../gcc.target/i386/pieces-memset-7.c         |  16 +++
 .../gcc.target/i386/pieces-memset-8.c         |  16 +++
 .../gcc.target/i386/pieces-memset-9.c         |  16 +++
 gcc/testsuite/gcc.target/i386/pr100865-1.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr100865-10a.c  |   4 +-
 gcc/testsuite/gcc.target/i386/pr100865-10b.c  |   4 +-
 gcc/testsuite/gcc.target/i386/pr100865-2.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr100865-3.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr100865-4a.c   |   6 +-
 gcc/testsuite/gcc.target/i386/pr100865-4b.c   |   8 +-
 gcc/testsuite/gcc.target/i386/pr72839.c       |   2 +-
 gcc/testsuite/gcc.target/i386/pr90773-1.c     |  10 +-
 gcc/testsuite/gcc.target/i386/pr90773-14.c    |   4 +-
 gcc/testsuite/gcc.target/i386/pr90773-15.c    |  14 ++
 gcc/testsuite/gcc.target/i386/pr90773-16.c    |  14 ++
 gcc/testsuite/gcc.target/i386/pr90773-17.c    |  14 ++
 gcc/testsuite/gcc.target/i386/pr90773-18.c    |  15 +++
 gcc/testsuite/gcc.target/i386/pr90773-19.c    |  14 ++
 gcc/testsuite/gcc.target/i386/pr90773-20.c    |  13 ++
 gcc/testsuite/gcc.target/i386/pr90773-21.c    |  13 ++
 gcc/testsuite/gcc.target/i386/pr90773-22.c    |  13 ++
 gcc/testsuite/gcc.target/i386/pr90773-23.c    |  13 ++
 gcc/testsuite/gcc.target/i386/pr90773-24.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr90773-25.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr90773-26.c    |  21 +++
 gcc/testsuite/gcc.target/i386/pr90773-4.c     |   2 +-
 gcc/testsuite/gcc.target/i386/sw-1.c          |   1 +
 gcc/testsuite/gcc.target/i386/vect8-ret.c     |   2 +-
 88 files changed, 1231 insertions(+), 64 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/eh_return-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-22.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-23.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-24.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-25.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-26.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-27.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-28.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-29.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-30.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-31.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-33.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-34.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-35.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-36.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-37.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-38.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-39.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-40.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-41.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-42.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-43.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-9.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-22.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-23.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-26.c

-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

1. Rewrite builtin_memset_read_str/builtin_memset_gen_str to use vector
broadcast to duplicate QI value to TI/OI/XI value for memmset.
2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard
scratch register to avoid stack realignment when expanding memset.

	PR middle-end/90773
	* builtins.c (gen_memset_value_from_prev): New function.
	(gen_memset_broadcast): Likewise.
	(builtin_memset_read_str): Use gen_memset_value_from_prev
	and gen_memset_broadcast.
	(builtin_memset_gen_str): Likewise.
	* target.def (gen_memset_scratch_rtx): New hook.
	* doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX.
	* doc/tm.texi: Regenerated.
---
 gcc/builtins.c     | 123 +++++++++++++++++++++++++++++++++++++--------
 gcc/doc/tm.texi    |   5 ++
 gcc/doc/tm.texi.in |   2 +
 gcc/target.def     |   7 +++
 4 files changed, 116 insertions(+), 21 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index e5e39386a93..e938d610f12 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6639,26 +6639,111 @@ expand_builtin_strncpy (tree exp, rtx target)
   return NULL_RTX;
 }
 
-/* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
-   bytes from constant string DATA + OFFSET and return it as target
-   constant.  If PREV isn't nullptr, it has the RTL info from the
+/* Return the RTL of a register in MODE generated from PREV in the
    previous iteration.  */
 
-rtx
-builtin_memset_read_str (void *data, void *prevp,
-			 HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
-			 scalar_int_mode mode)
+static rtx
+gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
 {
+  rtx target = nullptr;
   by_pieces_prev *prev = (by_pieces_prev *) prevp;
   if (prev != nullptr && prev->data != nullptr)
     {
       /* Use the previous data in the same mode.  */
       if (prev->mode == mode)
 	return prev->data;
+
+      rtx prev_rtx = prev->data;
+      machine_mode prev_mode = prev->mode;
+      unsigned int word_size = GET_MODE_SIZE (word_mode);
+      if (word_size < GET_MODE_SIZE (prev->mode)
+	  && word_size > GET_MODE_SIZE (mode))
+	{
+	  /* First generate subreg of word mode if the previous mode is
+	     wider than word mode and word mode is wider than MODE.  */
+	  prev_rtx = simplify_gen_subreg (word_mode, prev_rtx,
+					  prev_mode, 0);
+	  prev_mode = word_mode;
+	}
+      if (prev_rtx != nullptr)
+	target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0);
     }
+  return target;
+}
+
+/* Return the RTL of a register in MODE broadcasted from DATA.  */
+
+static rtx
+gen_memset_broadcast (rtx data, scalar_int_mode mode)
+{
+  /* Skip if regno_reg_rtx isn't initialized.  */
+  if (!regno_reg_rtx)
+    return nullptr;
+
+  rtx target = nullptr;
+
+  unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode);
+  machine_mode vector_mode;
+  if (!mode_for_vector (QImode, nunits).exists (&vector_mode))
+    gcc_unreachable ();
+
+  enum insn_code icode = optab_handler (vec_duplicate_optab,
+					vector_mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      rtx reg = targetm.gen_memset_scratch_rtx (vector_mode);
+      if (CONST_INT_P (data))
+	{
+	  /* Use the move expander with CONST_VECTOR.  */
+	  rtx const_vec = gen_const_vec_duplicate (vector_mode, data);
+	  emit_move_insn (reg, const_vec);
+	}
+      else
+	{
+
+	  class expand_operand ops[2];
+	  create_output_operand (&ops[0], reg, vector_mode);
+	  create_input_operand (&ops[1], data, QImode);
+	  expand_insn (icode, 2, ops);
+	  if (!rtx_equal_p (reg, ops[0].value))
+	    emit_move_insn (reg, ops[0].value);
+	}
+      target = lowpart_subreg (mode, reg, vector_mode);
+    }
+
+  return target;
+}
+
+/* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
+   bytes from constant string DATA + OFFSET and return it as target
+   constant.  If PREV isn't nullptr, it has the RTL info from the
+   previous iteration.  */
 
+rtx
+builtin_memset_read_str (void *data, void *prev,
+			 HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
+			 scalar_int_mode mode)
+{
+  rtx target;
   const char *c = (const char *) data;
-  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+  char *p;
+
+  /* Don't use the previous value if size is 1.  */
+  if (GET_MODE_SIZE (mode) != 1)
+    {
+      target = gen_memset_value_from_prev (prev, mode);
+      if (target != nullptr)
+	return target;
+
+      p = XALLOCAVEC (char, GET_MODE_SIZE (QImode));
+      memset (p, *c, GET_MODE_SIZE (QImode));
+      rtx src = c_readstr (p, QImode);
+      target = gen_memset_broadcast (src, mode);
+      if (target != nullptr)
+	return target;
+    }
+
+  p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
 
   memset (p, *c, GET_MODE_SIZE (mode));
 
@@ -6672,7 +6757,7 @@ builtin_memset_read_str (void *data, void *prevp,
    nullptr, it has the RTL info from the previous iteration.  */
 
 static rtx
-builtin_memset_gen_str (void *data, void *prevp,
+builtin_memset_gen_str (void *data, void *prev,
 			HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
 			scalar_int_mode mode)
 {
@@ -6680,22 +6765,18 @@ builtin_memset_gen_str (void *data, void *prevp,
   size_t size;
   char *p;
 
-  by_pieces_prev *prev = (by_pieces_prev *) prevp;
-  if (prev != nullptr && prev->data != nullptr)
-    {
-      /* Use the previous data in the same mode.  */
-      if (prev->mode == mode)
-	return prev->data;
-
-      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
-      if (target != nullptr)
-	return target;
-    }
-
   size = GET_MODE_SIZE (mode);
   if (size == 1)
     return (rtx) data;
 
+  target = gen_memset_value_from_prev (prev, mode);
+  if (target != nullptr)
+    return target;
+
+  target = gen_memset_broadcast ((rtx) data, mode);
+  if (target != nullptr)
+    return target;
+
   p = XALLOCAVEC (char, size);
   memset (p, 1, size);
   coeff = c_readstr (p, mode);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 2a41ae5fba1..8849711fcf8 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12122,6 +12122,11 @@ This function prepares to emit a conditional comparison within a sequence
  @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_SCRATCH_RTX (machine_mode @var{mode})
+This hook should return an rtx for scratch register in @var{mode} to
+be used by memset broadcast. The default is @code{gen_reg_rtx}.
+@end deftypefn
+
 @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
 This target hook returns a new value for the number of times @var{loop}
 should be unrolled. The parameter @var{nunroll} is the number of times
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f881cdabe9e..a6bbf4f2667 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7958,6 +7958,8 @@ lists.
 
 @hook TARGET_GEN_CCMP_NEXT
 
+@hook TARGET_GEN_MEMSET_SCRATCH_RTX
+
 @hook TARGET_LOOP_UNROLL_ADJUST
 
 @defmac POWI_MAX_MULTS
diff --git a/gcc/target.def b/gcc/target.def
index c009671c583..5fb287db3bd 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2726,6 +2726,13 @@ DEFHOOK
  rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
  NULL)
 
+DEFHOOK
+(gen_memset_scratch_rtx,
+ "This hook should return an rtx for scratch register in @var{mode} to\n\
+be used by memset broadcast. The default is @code{gen_reg_rtx}.",
+ rtx, (machine_mode mode),
+ gen_reg_rtx)
+
 /* Return a new value for loop unroll size.  */
 DEFHOOK
 (loop_unroll_adjust,
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 03/11] x86: Avoid stack realignment when copying data H.J. Lu
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Define TARGET_GEN_MEMSET_SCRATCH_RTX to ix86_gen_scratch_sse_rtx to
return a scratch SSE register for memset.

gcc/

	PR middle-end/90773
	* config/i386/i386.c (TARGET_GEN_MEMSET_SCRATCH_RTX): New.

gcc/testsuite/

	PR middle-end/90773
	* gcc.target/i386/pr90773-15.c: New test.
	* gcc.target/i386/pr90773-16.c: Likewise.
	* gcc.target/i386/pr90773-17.c: Likewise.
	* gcc.target/i386/pr90773-18.c: Likewise.
	* gcc.target/i386/pr90773-19.c: Likewise.
---
 gcc/config/i386/i386.c                     |  6 +++++-
 gcc/testsuite/gcc.target/i386/pr90773-15.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-16.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-17.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-18.c | 15 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-19.c | 14 ++++++++++++++
 6 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-19.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2fbaae7cd02..f436794f65c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23163,7 +23163,8 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
     }
 }
 
-/* Return a scratch register in MODE for vector load and store.  */
+/* Implement the TARGET_GEN_MEMSET_SCRATCH_RTX hook.  Return a scratch
+   register in MODE for vector load and store.  */
 
 rtx
 ix86_gen_scratch_sse_rtx (machine_mode mode)
@@ -24082,6 +24083,9 @@ static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 #undef TARGET_LIBC_HAS_FAST_FUNCTION
 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
 
+#undef TARGET_GEN_MEMSET_SCRATCH_RTX
+#define TARGET_GEN_MEMSET_SCRATCH_RTX ix86_gen_scratch_sse_rtx
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c b/gcc/testsuite/gcc.target/i386/pr90773-15.c
new file mode 100644
index 00000000000..c0a96fed892
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+  __builtin_memset (dst, c, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%edi, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+%dil, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c b/gcc/testsuite/gcc.target/i386/pr90773-16.c
new file mode 100644
index 00000000000..d2d1ec6141c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$-1, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c b/gcc/testsuite/gcc.target/i386/pr90773-17.c
new file mode 100644
index 00000000000..6c8da7d24ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 12, 19);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 15\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-18.c b/gcc/testsuite/gcc.target/i386/pr90773-18.c
new file mode 100644
index 00000000000..b0687abbe01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-18.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 12, 9);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[\\t \]+\\\$868082074056920076, %r" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, \\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, 4\\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$12, 8\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-19.c b/gcc/testsuite/gcc.target/i386/pr90773-19.c
new file mode 100644
index 00000000000..8aa5540bacc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-19.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 12, 9);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[\\t \]+\\\$868082074056920076, %r" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, \\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, 4\\(%\[\^,\]+\\)" 1 { target ia32 } } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 03/11] x86: Avoid stack realignment when copying data
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 04/11] x86: Update piecewise move and store H.J. Lu
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

To avoid stack realignment, use SCRATCH_SSE_REG to copy data from one
memory location to another.

gcc/

	* config/i386/i386-expand.c (ix86_expand_vector_move): Call
	ix86_gen_scratch_sse_rtx to get a scratch SSE register to copy
	data from one memory location to another.

gcc/testsuite/

	* gcc.target/i386/eh_return-1.c: New test.
---
 gcc/config/i386/i386-expand.c               |  4 +++-
 gcc/testsuite/gcc.target/i386/eh_return-1.c | 26 +++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/eh_return-1.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 5c9170e3a1d..6b009d523a5 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -602,7 +602,9 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
       && !register_operand (op0, mode)
       && !register_operand (op1, mode))
     {
-      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+      rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
+      emit_move_insn (tmp, op1);
+      emit_move_insn (op0, tmp);
       return;
     }
 
diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c
new file mode 100644
index 00000000000..671ba635e88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
+
+struct _Unwind_Context
+{
+  void *ra;
+  char array[48];
+};
+
+extern long uw_install_context_1 (struct _Unwind_Context *);
+
+void
+_Unwind_RaiseException (void)
+{
+  struct _Unwind_Context this_context, cur_context;
+  long offset = uw_install_context_1 (&this_context);
+  __builtin_memcpy (&this_context, &cur_context,
+		    sizeof (struct _Unwind_Context));
+  void *handler = __builtin_frob_return_addr ((&cur_context)->ra);
+  uw_install_context_1 (&cur_context);
+  __builtin_eh_return (offset, handler);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 04/11] x86: Update piecewise move and store
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (2 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 03/11] x86: Avoid stack realignment when copying data H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773 H.J. Lu
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

We can use TImode/OImode/XImode integers for piecewise move and store.

1. Define MAX_MOVE_MAX to 64, which is the constant maximum number of
bytes that a single instruction can move quickly between memory and
registers or between two memory locations.
2. Define MOVE_MAX to MOVE_MAX_PIECES, which is the maximum number of
bytes we can move from memory to memory in one reasonably fast instruction.
The difference between MAX_MOVE_MAX and MOVE_MAX is that MAX_MOVE_MAX
must be a constant, independent of compiler options, since it is used in
reload.h to define struct target_reload and MOVE_MAX can vary, depending
on compiler options.
3. When vector register is used for piecewise move and store, we don't
increase stack_alignment_needed since vector register spill isn't
required for piecewise move and store.  Since stack_realign_needed is
set to true by checking stack_alignment_estimated set by pseudo vector
register usage, we also need to check stack_realign_needed to eliminate
frame pointer.

gcc/

	* config/i386/i386.c (ix86_finalize_stack_frame_flags): Also
	check stack_realign_needed for stack realignment.
	(ix86_legitimate_constant_p): Always allow CONST_WIDE_INT smaller
	than the largest integer supported by vector register.
	* config/i386/i386.h (MAX_MOVE_MAX): New.  Set to 64.
	(MOVE_MAX_PIECES): Set to bytes of the largest integer supported
	by vector register.
	(MOVE_MAX): Defined to MOVE_MAX_PIECES.
	(STORE_MAX_PIECES): New.

gcc/testsuite/

	* gcc.target/i386/pr90773-1.c: Adjust to expect movq for 32-bit.
	* gcc.target/i386/pr90773-4.c: Also run for 32-bit.
	* gcc.target/i386/pr90773-15.c: Likewise.
	* gcc.target/i386/pr90773-16.c: Likewise.
	* gcc.target/i386/pr90773-17.c: Likewise.
	* gcc.target/i386/pr90773-24.c: Likewise.
	* gcc.target/i386/pr90773-25.c: Likewise.
	* gcc.target/i386/pr100865-1.c: Likewise.
	* gcc.target/i386/pr100865-2.c: Likewise.
	* gcc.target/i386/pr100865-3.c: Likewise.
	* gcc.target/i386/pr90773-14.c: Also run for 32-bit and expect
	XMM movd to store 4 bytes.
	* gcc.target/i386/pr100865-4a.c: Also run for 32-bit and expect
	YMM registers.
	* gcc.target/i386/pr100865-4b.c: Likewise.
	* gcc.target/i386/pr100865-10a.c: Expect YMM registers.
	* gcc.target/i386/pr100865-10b.c: Likewise.
---
 gcc/config/i386/i386.c                       | 21 ++++++++--
 gcc/config/i386/i386.h                       | 40 ++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr100865-1.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-10a.c |  4 +-
 gcc/testsuite/gcc.target/i386/pr100865-10b.c |  4 +-
 gcc/testsuite/gcc.target/i386/pr100865-2.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-3.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-4a.c  |  6 +--
 gcc/testsuite/gcc.target/i386/pr100865-4b.c  |  8 ++--
 gcc/testsuite/gcc.target/i386/pr90773-1.c    | 10 ++---
 gcc/testsuite/gcc.target/i386/pr90773-14.c   |  4 +-
 gcc/testsuite/gcc.target/i386/pr90773-15.c   |  6 +--
 gcc/testsuite/gcc.target/i386/pr90773-16.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-17.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-24.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-25.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-4.c    |  2 +-
 17 files changed, 77 insertions(+), 42 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f436794f65c..3dfb3a6f2dc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7953,8 +7953,17 @@ ix86_finalize_stack_frame_flags (void)
      assumed stack realignment might be needed or -fno-omit-frame-pointer
      is used, but in the end nothing that needed the stack alignment had
      been spilled nor stack access, clear frame_pointer_needed and say we
-     don't need stack realignment.  */
-  if ((stack_realign || (!flag_omit_frame_pointer && optimize))
+     don't need stack realignment.
+
+     When vector register is used for piecewise move and store, we don't
+     increase stack_alignment_needed as there is no register spill for
+     piecewise move and store.  Since stack_realign_needed is set to true
+     by checking stack_alignment_estimated which is updated by pseudo
+     vector register usage, we also need to check stack_realign_needed to
+     eliminate frame pointer.  */
+  if ((stack_realign
+       || (!flag_omit_frame_pointer && optimize)
+       || crtl->stack_realign_needed)
       && frame_pointer_needed
       && crtl->is_leaf
       && crtl->sp_is_unchanging
@@ -10418,7 +10427,13 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
 	  /* FALLTHRU */
 	case E_OImode:
 	case E_XImode:
-	  if (!standard_sse_constant_p (x, mode))
+	  if (!standard_sse_constant_p (x, mode)
+	      && GET_MODE_SIZE (TARGET_AVX512F
+				? XImode
+				: (TARGET_AVX
+				   ? OImode
+				   : (TARGET_SSE2
+				      ? TImode : DImode))) < GET_MODE_SIZE (mode))
 	    return false;
 	default:
 	  break;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 6e0340a4b60..7c504108896 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1748,9 +1748,10 @@ typedef struct ix86_args {
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
 #define DEFAULT_SIGNED_CHAR 1
 
-/* Max number of bytes we can move from memory to memory
-   in one reasonably fast instruction.  */
-#define MOVE_MAX 16
+/* The constant maximum number of bytes that a single instruction can
+   move quickly between memory and registers or between two memory
+   locations.  */
+#define MAX_MOVE_MAX 64
 
 /* MOVE_MAX_PIECES is the number of bytes at a time which we can
    move efficiently, as opposed to  MOVE_MAX which is the maximum
@@ -1761,11 +1762,34 @@ typedef struct ix86_args {
    widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
    64-bit mode.  */
 #define MOVE_MAX_PIECES \
-  ((TARGET_64BIT \
-    && TARGET_SSE2 \
-    && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
-    && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
-   ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
+  ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
+   ? 64 \
+   : ((TARGET_AVX \
+       && !TARGET_PREFER_AVX128 \
+       && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
+       && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+      ? 32 \
+      : ((TARGET_SSE2 \
+	  && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
+	  && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+	 ? 16 : UNITS_PER_WORD)))
+
+/* Max number of bytes we can move from memory to memory in one
+   reasonably fast instruction.  */
+#define MOVE_MAX MOVE_MAX_PIECES
+
+/* STORE_MAX_PIECES is the number of bytes at a time that we can
+   store efficiently.  */
+#define STORE_MAX_PIECES \
+  ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
+   ? 64 \
+   : ((TARGET_AVX \
+       && !TARGET_PREFER_AVX128 \
+       && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+      ? 32 \
+      : ((TARGET_SSE2 \
+	  && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+	 ? 16 : UNITS_PER_WORD)))
 
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a cpymem or libcall instead.
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-1.c b/gcc/testsuite/gcc.target/i386/pr100865-1.c
index 6c3097fb2a6..949dd5c337a 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=x86-64" } */
 
 extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10a.c b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
index 7ffc19e56a8..98b6dfb16f3 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
@@ -29,5 +29,5 @@ foo (void)
     array[i] = MK_CONST128_BROADCAST (0x1f);
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
index edf52765c60..5f5abe27bed 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
@@ -3,5 +3,5 @@
 
 #include "pr100865-10a.c"
 
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-2.c b/gcc/testsuite/gcc.target/i386/pr100865-2.c
index 17efe2d72a3..f3ea7753abe 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-2.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake" } */
 
 extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-3.c b/gcc/testsuite/gcc.target/i386/pr100865-3.c
index b6dbcf7809b..0183fccc251 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4a.c b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
index f55883598f9..365487337ae 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake" } */
 
 extern char array[64];
@@ -11,6 +11,6 @@ foo (void)
     array[i] = -45;
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, " 4 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
index f41e6147b4c..cbcae2d97b5 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
@@ -1,9 +1,9 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 #include "pr100865-4a.c"
 
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, " 4 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
+/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-1.c b/gcc/testsuite/gcc.target/i386/pr90773-1.c
index 1d9f282dc0d..4fd5a40d99d 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mtune=generic" } */
+/* { dg-options "-O2 -msse2 -mtune=generic" } */
 
 extern char *dst, *src;
 
@@ -9,9 +9,5 @@ foo (void)
   __builtin_memcpy (dst, src, 15);
 }
 
-/* { dg-final { scan-assembler-times "movq\[\\t \]+\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "movq\[\\t \]+7\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+4\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+8\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+11\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movq\[\\t \]+\\(%\[\^,\]+\\)," 1 } } */
+/* { dg-final { scan-assembler-times "movq\[\\t \]+7\\(%\[\^,\]+\\)," 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-14.c b/gcc/testsuite/gcc.target/i386/pr90773-14.c
index 6364916ecac..96ee5cb08c1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-14.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-14.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
 
 extern char *dst;
@@ -10,4 +10,4 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$16843009, 16\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movd\[\\t \]+%xmm\[0-9\]+, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c b/gcc/testsuite/gcc.target/i386/pr90773-15.c
index c0a96fed892..880f71d1567 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-15.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 extern char *dst;
@@ -9,6 +9,6 @@ foo (int c)
   __builtin_memset (dst, c, 17);
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%edi, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%.*, %xmm\[0-9\]+" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movb\[\\t \]+%dil, 16\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+%.*, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c b/gcc/testsuite/gcc.target/i386/pr90773-16.c
index d2d1ec6141c..32a976b10df 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-16.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c b/gcc/testsuite/gcc.target/i386/pr90773-17.c
index 6c8da7d24ef..2d6fbf22a8b 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-17.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-24.c b/gcc/testsuite/gcc.target/i386/pr90773-24.c
index 7b2ea66dcfc..71f1fd8c4df 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-24.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-24.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=x86-64" } */
 
 struct S
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-25.c b/gcc/testsuite/gcc.target/i386/pr90773-25.c
index 57642ea8d2d..ad19a88c883 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-25.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-25.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -march=x86-64" } */
 
 struct S
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-4.c b/gcc/testsuite/gcc.target/i386/pr90773-4.c
index ec0bc0100ae..ee4c04678d1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-4.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
 
 extern char *dst;
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (3 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 04/11] x86: Update piecewise move and store H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 06/11] x86: Add tests for piecewise move and store H.J. Lu
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

	PR middle-end/90773
	* gcc.target/i386/pr90773-20.c: New test.
	* gcc.target/i386/pr90773-21.c: Likewise.
	* gcc.target/i386/pr90773-22.c: Likewise.
	* gcc.target/i386/pr90773-23.c: Likewise.
	* gcc.target/i386/pr90773-26.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/pr90773-20.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-21.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-22.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-23.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-26.c | 21 +++++++++++++++++++++
 5 files changed, 73 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-22.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-23.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-26.c

diff --git a/gcc/testsuite/gcc.target/i386/pr90773-20.c b/gcc/testsuite/gcc.target/i386/pr90773-20.c
new file mode 100644
index 00000000000..e61e405f2b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-20.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+  __builtin_memset (dst, c, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
new file mode 100644
index 00000000000..16ad17f3cbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+  __builtin_memset (dst, c, 34);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-22.c b/gcc/testsuite/gcc.target/i386/pr90773-22.c
new file mode 100644
index 00000000000..45a8ff65a84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-22.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
new file mode 100644
index 00000000000..9256ce10ff0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 34);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-26.c b/gcc/testsuite/gcc.target/i386/pr90773-26.c
new file mode 100644
index 00000000000..b2513c3a9c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-26.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+struct S
+{
+  long long s1 __attribute__ ((aligned (8)));
+  unsigned s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
+};
+
+const struct S array[] = {
+  { 0, 60, 640, 2112543726, 39682, 48, 16, 33, 10, 96, 2, 0, 0, 4 }
+};
+
+void
+foo (struct S *x)
+{
+  x[0] = array[0];
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 32\\(%\[\^,\]+\\)" 1 } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 06/11] x86: Add tests for piecewise move and store
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (4 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773 H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c H.J. Lu
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

	* gcc.target/i386/pieces-memcpy-10.c: New test.
	* gcc.target/i386/pieces-memcpy-11.c: Likewise.
	* gcc.target/i386/pieces-memcpy-12.c: Likewise.
	* gcc.target/i386/pieces-memcpy-13.c: Likewise.
	* gcc.target/i386/pieces-memcpy-14.c: Likewise.
	* gcc.target/i386/pieces-memcpy-15.c: Likewise.
	* gcc.target/i386/pieces-memcpy-16.c: Likewise.
	* gcc.target/i386/pieces-memcpy-17.c: Likewise.
	* gcc.target/i386/pieces-memcpy-18.c: Likewise.
	* gcc.target/i386/pieces-memcpy-19.c: Likewise.
	* gcc.target/i386/pieces-memset-1.c: Likewise.
	* gcc.target/i386/pieces-memset-2.c: Likewise.
	* gcc.target/i386/pieces-memset-3.c: Likewise.
	* gcc.target/i386/pieces-memset-4.c: Likewise.
	* gcc.target/i386/pieces-memset-5.c: Likewise.
	* gcc.target/i386/pieces-memset-6.c: Likewise.
	* gcc.target/i386/pieces-memset-7.c: Likewise.
	* gcc.target/i386/pieces-memset-8.c: Likewise.
	* gcc.target/i386/pieces-memset-9.c: Likewise.
	* gcc.target/i386/pieces-memset-10.c: Likewise.
	* gcc.target/i386/pieces-memset-11.c: Likewise.
	* gcc.target/i386/pieces-memset-12.c: Likewise.
	* gcc.target/i386/pieces-memset-13.c: Likewise.
	* gcc.target/i386/pieces-memset-14.c: Likewise.
	* gcc.target/i386/pieces-memset-15.c: Likewise.
	* gcc.target/i386/pieces-memset-16.c: Likewise.
	* gcc.target/i386/pieces-memset-17.c: Likewise.
	* gcc.target/i386/pieces-memset-18.c: Likewise.
	* gcc.target/i386/pieces-memset-19.c: Likewise.
	* gcc.target/i386/pieces-memset-20.c: Likewise.
	* gcc.target/i386/pieces-memset-21.c: Likewise.
	* gcc.target/i386/pieces-memset-22.c: Likewise.
	* gcc.target/i386/pieces-memset-23.c: Likewise.
	* gcc.target/i386/pieces-memset-24.c: Likewise.
	* gcc.target/i386/pieces-memset-25.c: Likewise.
	* gcc.target/i386/pieces-memset-26.c: Likewise.
	* gcc.target/i386/pieces-memset-27.c: Likewise.
	* gcc.target/i386/pieces-memset-28.c: Likewise.
	* gcc.target/i386/pieces-memset-29.c: Likewise.
	* gcc.target/i386/pieces-memset-30.c: Likewise.
	* gcc.target/i386/pieces-memset-31.c: Likewise.
	* gcc.target/i386/pieces-memset-32.c: Likewise.
	* gcc.target/i386/pieces-memset-33.c: Likewise.
	* gcc.target/i386/pieces-memset-34.c: Likewise.
	* gcc.target/i386/pieces-memset-35.c: Likewise.
	* gcc.target/i386/pieces-memset-36.c: Likewise.
	* gcc.target/i386/pieces-memset-37.c: Likewise.
	* gcc.target/i386/pieces-memset-38.c: Likewise.
	* gcc.target/i386/pieces-memset-39.c: Likewise.
	* gcc.target/i386/pieces-memset-40.c: Likewise.
	* gcc.target/i386/pieces-memset-41.c: Likewise.
	* gcc.target/i386/pieces-memset-42.c: Likewise.
	* gcc.target/i386/pieces-memset-43.c: Likewise.
	* gcc.target/i386/pieces-memset-44.c: Likewise.
---
 .../gcc.target/i386/pieces-memcpy-10.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-11.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-12.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-13.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-14.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-15.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-16.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memcpy-7.c          | 15 +++++++++++++++
 .../gcc.target/i386/pieces-memcpy-8.c          | 14 ++++++++++++++
 .../gcc.target/i386/pieces-memcpy-9.c          | 14 ++++++++++++++
 .../gcc.target/i386/pieces-memset-1.c          | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-10.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-11.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-12.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-13.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-14.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-15.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-16.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-17.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-18.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-19.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-2.c          | 12 ++++++++++++
 .../gcc.target/i386/pieces-memset-20.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-21.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-22.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-23.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-24.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-25.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-26.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-27.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-28.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-29.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-3.c          | 18 ++++++++++++++++++
 .../gcc.target/i386/pieces-memset-30.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-31.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-32.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-33.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-34.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-35.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-36.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-37.c         | 15 +++++++++++++++
 .../gcc.target/i386/pieces-memset-38.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-39.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-4.c          | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-40.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-41.c         | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-42.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-43.c         | 17 +++++++++++++++++
 .../gcc.target/i386/pieces-memset-5.c          | 12 ++++++++++++
 .../gcc.target/i386/pieces-memset-6.c          | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-7.c          | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-8.c          | 16 ++++++++++++++++
 .../gcc.target/i386/pieces-memset-9.c          | 16 ++++++++++++++++
 53 files changed, 860 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-22.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-23.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-24.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-25.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-26.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-27.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-28.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-29.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-30.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-31.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-33.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-34.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-35.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-36.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-37.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-38.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-39.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-40.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-41.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-42.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-43.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-9.c

diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
new file mode 100644
index 00000000000..5faee21f9b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
new file mode 100644
index 00000000000..b8917a7f917
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 64);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
new file mode 100644
index 00000000000..f1432ebe517
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
new file mode 100644
index 00000000000..97e6067fec9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
new file mode 100644
index 00000000000..7addc4c0a28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
new file mode 100644
index 00000000000..695e8c3fa67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
new file mode 100644
index 00000000000..b0643d05ee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 34);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
new file mode 100644
index 00000000000..3d248d447ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char *dst, char *src)
+{
+  __builtin_memcpy (dst, src, 17);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
new file mode 100644
index 00000000000..c13a2beb2f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char *dst, char *src)
+{
+  __builtin_memcpy (dst, src, 18);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
new file mode 100644
index 00000000000..238f88b275e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char *dst, char *src)
+{
+  __builtin_memcpy (dst, src, 19);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-1.c b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
new file mode 100644
index 00000000000..2b8032684b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-10.c b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
new file mode 100644
index 00000000000..a6390d1bd8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
new file mode 100644
index 00000000000..3fb9038b04f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-12.c b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
new file mode 100644
index 00000000000..fa834566097
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-13.c b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
new file mode 100644
index 00000000000..7f2cd3f58ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
new file mode 100644
index 00000000000..45ece482464
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-15.c b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
new file mode 100644
index 00000000000..bddf47d728e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-16.c b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
new file mode 100644
index 00000000000..1c5d124cecc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-17.c b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
new file mode 100644
index 00000000000..6cdb33557c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-18.c b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
new file mode 100644
index 00000000000..adbd201b4e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 18);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-19.c b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
new file mode 100644
index 00000000000..7e9cf2e26d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
new file mode 100644
index 00000000000..649f344e8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
new file mode 100644
index 00000000000..b8747e669e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
new file mode 100644
index 00000000000..4f001c6d06c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-22.c b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
new file mode 100644
index 00000000000..5f3c454ef8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
new file mode 100644
index 00000000000..a3b4ffc18e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-24.c b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
new file mode 100644
index 00000000000..e222787b541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-25.c b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
new file mode 100644
index 00000000000..195ddb635eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-26.c b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
new file mode 100644
index 00000000000..13606b2da54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-27.c b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
new file mode 100644
index 00000000000..54a672b6015
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-28.c b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
new file mode 100644
index 00000000000..83c2d3f0fde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
new file mode 100644
index 00000000000..650e6fe66a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-not "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-3.c b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
new file mode 100644
index 00000000000..2aed6dbc68e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512bw -mno-avx512vl -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti64x4\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
new file mode 100644
index 00000000000..dcec2c700fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-31.c b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
new file mode 100644
index 00000000000..5d20af0938d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-32.c b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
new file mode 100644
index 00000000000..c5ca0bd17ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
new file mode 100644
index 00000000000..a87d1b80ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-not "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
new file mode 100644
index 00000000000..0c2f1ee6049
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-35.c b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
new file mode 100644
index 00000000000..b0f4a8b898e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 34);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-36.c b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
new file mode 100644
index 00000000000..d1f1263c7b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
new file mode 100644
index 00000000000..ec59497b116
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
new file mode 100644
index 00000000000..ed4a24a54fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-39.c b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
new file mode 100644
index 00000000000..a330bff5f3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-not "vinserti64x4" } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-4.c b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
new file mode 100644
index 00000000000..9256919bfdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
new file mode 100644
index 00000000000..4eda73ead59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
new file mode 100644
index 00000000000..f86b6986da9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
new file mode 100644
index 00000000000..df0c122aae7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
new file mode 100644
index 00000000000..2f2179c2df9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
new file mode 100644
index 00000000000..3e95db5efef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-6.c b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
new file mode 100644
index 00000000000..571113c3a33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-7.c b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
new file mode 100644
index 00000000000..fd159869817
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-8.c b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
new file mode 100644
index 00000000000..7df0019ef63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-9.c b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
new file mode 100644
index 00000000000..ed45d590875
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (5 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 06/11] x86: Add tests for piecewise move and store H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c H.J. Lu
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Also pass -mno-avx to pr72839.c to avoid copying data with YMM or ZMM
registers.

	* gcc.target/i386/pr72839.c: Also pass -mno-avx.
---
 gcc/testsuite/gcc.target/i386/pr72839.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr72839.c b/gcc/testsuite/gcc.target/i386/pr72839.c
index ea724f70377..6888d9d0a55 100644
--- a/gcc/testsuite/gcc.target/i386/pr72839.c
+++ b/gcc/testsuite/gcc.target/i386/pr72839.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target ia32 } */
-/* { dg-options "-O2 -mtune=lakemont" } */
+/* { dg-options "-O2 -mtune=lakemont -mno-avx" } */
 
 extern char *strcpy (char *, const char *);
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (6 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32 H.J. Lu
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Also pass -mno-avx to pr72839.c to avoid copying data with YMM or ZMM
registers.

	* gcc.target/i386/cold-attribute-1.c: Also pass -mno-avx.
---
 gcc/testsuite/gcc.target/i386/cold-attribute-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/cold-attribute-1.c b/gcc/testsuite/gcc.target/i386/cold-attribute-1.c
index 57666ac60b6..658eb3e25bb 100644
--- a/gcc/testsuite/gcc.target/i386/cold-attribute-1.c
+++ b/gcc/testsuite/gcc.target/i386/cold-attribute-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mno-avx" } */
 #include <string.h>
 static inline
 __attribute__ ((cold)) void
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (7 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c H.J. Lu
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Also pass -mno-avx to sw-1.c for ia32 since copying data with YMM or ZMM
registers disables shrink-wrapping when the second argument is passed on
stack.

	* gcc.target/i386/sw-1.c: Also pass -mno-avx for ia32.
---
 gcc/testsuite/gcc.target/i386/sw-1.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/i386/sw-1.c b/gcc/testsuite/gcc.target/i386/sw-1.c
index aec095eda62..a9c89fca4ec 100644
--- a/gcc/testsuite/gcc.target/i386/sw-1.c
+++ b/gcc/testsuite/gcc.target/i386/sw-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mtune=generic -fshrink-wrap -fdump-rtl-pro_and_epilogue" } */
+/* { dg-additional-options "-mno-avx" { target ia32 } } */
 /* { dg-skip-if "No shrink-wrapping preformed" { x86_64-*-mingw* } } */
 
 #include <string.h>
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (8 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32 H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  2021-07-01 15:22 ` [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c H.J. Lu
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Expect no stack realignment since we no longer realign stack when
copying data.

	* gcc.target/i386/incoming-11.c: Expect no stack realignment.
---
 gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
index a830c96f7d1..4b822684b88 100644
--- a/gcc/testsuite/gcc.target/i386/incoming-11.c
+++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
@@ -15,4 +15,4 @@ void f()
 	for (i = 0; i < 100; i++) q[i] = 1;
 }
 
-/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
+/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c
  2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
                   ` (9 preceding siblings ...)
  2021-07-01 15:22 ` [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
  10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger

Also pass -mno-sse to vect8-ret.c to disable XMM load/store when running
GCC tests with "-march=x86-64 -m32".

	* gcc.target/i386/vect8-ret.c: Also pass -mno-sse.
---
 gcc/testsuite/gcc.target/i386/vect8-ret.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/vect8-ret.c b/gcc/testsuite/gcc.target/i386/vect8-ret.c
index 2b2b81ecf7a..6ace07e6e0c 100644
--- a/gcc/testsuite/gcc.target/i386/vect8-ret.c
+++ b/gcc/testsuite/gcc.target/i386/vect8-ret.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ia32 && { ! *-*-vxworks* } } } } */
-/* { dg-options "-mmmx -mvect8-ret-in-mem" } */
+/* { dg-options "-mmmx -mno-sse -mvect8-ret-in-mem" } */
 
 #include <mmintrin.h>
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-07-01 15:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
2021-07-01 15:22 ` [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
2021-07-01 15:22 ` [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
2021-07-01 15:22 ` [PATCH v5 03/11] x86: Avoid stack realignment when copying data H.J. Lu
2021-07-01 15:22 ` [PATCH v5 04/11] x86: Update piecewise move and store H.J. Lu
2021-07-01 15:22 ` [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773 H.J. Lu
2021-07-01 15:22 ` [PATCH v5 06/11] x86: Add tests for piecewise move and store H.J. Lu
2021-07-01 15:22 ` [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c H.J. Lu
2021-07-01 15:22 ` [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c H.J. Lu
2021-07-01 15:22 ` [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32 H.J. Lu
2021-07-01 15:22 ` [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c H.J. Lu
2021-07-01 15:22 ` [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).