* [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
1. Rewrite builtin_memset_read_str/builtin_memset_gen_str to use vector
broadcast to duplicate QI value to TI/OI/XI value for memmset.
2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard
scratch register to avoid stack realignment when expanding memset.
PR middle-end/90773
* builtins.c (gen_memset_value_from_prev): New function.
(gen_memset_broadcast): Likewise.
(builtin_memset_read_str): Use gen_memset_value_from_prev
and gen_memset_broadcast.
(builtin_memset_gen_str): Likewise.
* target.def (gen_memset_scratch_rtx): New hook.
* doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX.
* doc/tm.texi: Regenerated.
---
gcc/builtins.c | 123 +++++++++++++++++++++++++++++++++++++--------
gcc/doc/tm.texi | 5 ++
gcc/doc/tm.texi.in | 2 +
gcc/target.def | 7 +++
4 files changed, 116 insertions(+), 21 deletions(-)
diff --git a/gcc/builtins.c b/gcc/builtins.c
index e5e39386a93..e938d610f12 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6639,26 +6639,111 @@ expand_builtin_strncpy (tree exp, rtx target)
return NULL_RTX;
}
-/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
- bytes from constant string DATA + OFFSET and return it as target
- constant. If PREV isn't nullptr, it has the RTL info from the
+/* Return the RTL of a register in MODE generated from PREV in the
previous iteration. */
-rtx
-builtin_memset_read_str (void *data, void *prevp,
- HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
- scalar_int_mode mode)
+static rtx
+gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
{
+ rtx target = nullptr;
by_pieces_prev *prev = (by_pieces_prev *) prevp;
if (prev != nullptr && prev->data != nullptr)
{
/* Use the previous data in the same mode. */
if (prev->mode == mode)
return prev->data;
+
+ rtx prev_rtx = prev->data;
+ machine_mode prev_mode = prev->mode;
+ unsigned int word_size = GET_MODE_SIZE (word_mode);
+ if (word_size < GET_MODE_SIZE (prev->mode)
+ && word_size > GET_MODE_SIZE (mode))
+ {
+ /* First generate subreg of word mode if the previous mode is
+ wider than word mode and word mode is wider than MODE. */
+ prev_rtx = simplify_gen_subreg (word_mode, prev_rtx,
+ prev_mode, 0);
+ prev_mode = word_mode;
+ }
+ if (prev_rtx != nullptr)
+ target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0);
}
+ return target;
+}
+
+/* Return the RTL of a register in MODE broadcasted from DATA. */
+
+static rtx
+gen_memset_broadcast (rtx data, scalar_int_mode mode)
+{
+ /* Skip if regno_reg_rtx isn't initialized. */
+ if (!regno_reg_rtx)
+ return nullptr;
+
+ rtx target = nullptr;
+
+ unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode);
+ machine_mode vector_mode;
+ if (!mode_for_vector (QImode, nunits).exists (&vector_mode))
+ gcc_unreachable ();
+
+ enum insn_code icode = optab_handler (vec_duplicate_optab,
+ vector_mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ rtx reg = targetm.gen_memset_scratch_rtx (vector_mode);
+ if (CONST_INT_P (data))
+ {
+ /* Use the move expander with CONST_VECTOR. */
+ rtx const_vec = gen_const_vec_duplicate (vector_mode, data);
+ emit_move_insn (reg, const_vec);
+ }
+ else
+ {
+
+ class expand_operand ops[2];
+ create_output_operand (&ops[0], reg, vector_mode);
+ create_input_operand (&ops[1], data, QImode);
+ expand_insn (icode, 2, ops);
+ if (!rtx_equal_p (reg, ops[0].value))
+ emit_move_insn (reg, ops[0].value);
+ }
+ target = lowpart_subreg (mode, reg, vector_mode);
+ }
+
+ return target;
+}
+
+/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
+ bytes from constant string DATA + OFFSET and return it as target
+ constant. If PREV isn't nullptr, it has the RTL info from the
+ previous iteration. */
+rtx
+builtin_memset_read_str (void *data, void *prev,
+ HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
+ scalar_int_mode mode)
+{
+ rtx target;
const char *c = (const char *) data;
- char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+ char *p;
+
+ /* Don't use the previous value if size is 1. */
+ if (GET_MODE_SIZE (mode) != 1)
+ {
+ target = gen_memset_value_from_prev (prev, mode);
+ if (target != nullptr)
+ return target;
+
+ p = XALLOCAVEC (char, GET_MODE_SIZE (QImode));
+ memset (p, *c, GET_MODE_SIZE (QImode));
+ rtx src = c_readstr (p, QImode);
+ target = gen_memset_broadcast (src, mode);
+ if (target != nullptr)
+ return target;
+ }
+
+ p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
memset (p, *c, GET_MODE_SIZE (mode));
@@ -6672,7 +6757,7 @@ builtin_memset_read_str (void *data, void *prevp,
nullptr, it has the RTL info from the previous iteration. */
static rtx
-builtin_memset_gen_str (void *data, void *prevp,
+builtin_memset_gen_str (void *data, void *prev,
HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
scalar_int_mode mode)
{
@@ -6680,22 +6765,18 @@ builtin_memset_gen_str (void *data, void *prevp,
size_t size;
char *p;
- by_pieces_prev *prev = (by_pieces_prev *) prevp;
- if (prev != nullptr && prev->data != nullptr)
- {
- /* Use the previous data in the same mode. */
- if (prev->mode == mode)
- return prev->data;
-
- target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
- if (target != nullptr)
- return target;
- }
-
size = GET_MODE_SIZE (mode);
if (size == 1)
return (rtx) data;
+ target = gen_memset_value_from_prev (prev, mode);
+ if (target != nullptr)
+ return target;
+
+ target = gen_memset_broadcast ((rtx) data, mode);
+ if (target != nullptr)
+ return target;
+
p = XALLOCAVEC (char, size);
memset (p, 1, size);
coeff = c_readstr (p, mode);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 2a41ae5fba1..8849711fcf8 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12122,6 +12122,11 @@ This function prepares to emit a conditional comparison within a sequence
@var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
@end deftypefn
+@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_SCRATCH_RTX (machine_mode @var{mode})
+This hook should return an rtx for scratch register in @var{mode} to
+be used by memset broadcast. The default is @code{gen_reg_rtx}.
+@end deftypefn
+
@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
This target hook returns a new value for the number of times @var{loop}
should be unrolled. The parameter @var{nunroll} is the number of times
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f881cdabe9e..a6bbf4f2667 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7958,6 +7958,8 @@ lists.
@hook TARGET_GEN_CCMP_NEXT
+@hook TARGET_GEN_MEMSET_SCRATCH_RTX
+
@hook TARGET_LOOP_UNROLL_ADJUST
@defmac POWI_MAX_MULTS
diff --git a/gcc/target.def b/gcc/target.def
index c009671c583..5fb287db3bd 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2726,6 +2726,13 @@ DEFHOOK
rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
NULL)
+DEFHOOK
+(gen_memset_scratch_rtx,
+ "This hook should return an rtx for scratch register in @var{mode} to\n\
+be used by memset broadcast. The default is @code{gen_reg_rtx}.",
+ rtx, (machine_mode mode),
+ gen_reg_rtx)
+
/* Return a new value for loop unroll size. */
DEFHOOK
(loop_unroll_adjust,
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
2021-07-01 15:22 ` [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 03/11] x86: Avoid stack realignment when copying data H.J. Lu
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
Define TARGET_GEN_MEMSET_SCRATCH_RTX to ix86_gen_scratch_sse_rtx to
return a scratch SSE register for memset.
gcc/
PR middle-end/90773
* config/i386/i386.c (TARGET_GEN_MEMSET_SCRATCH_RTX): New.
gcc/testsuite/
PR middle-end/90773
* gcc.target/i386/pr90773-15.c: New test.
* gcc.target/i386/pr90773-16.c: Likewise.
* gcc.target/i386/pr90773-17.c: Likewise.
* gcc.target/i386/pr90773-18.c: Likewise.
* gcc.target/i386/pr90773-19.c: Likewise.
---
gcc/config/i386/i386.c | 6 +++++-
gcc/testsuite/gcc.target/i386/pr90773-15.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-16.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-17.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-18.c | 15 +++++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-19.c | 14 ++++++++++++++
6 files changed, 76 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-16.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-17.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-18.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-19.c
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2fbaae7cd02..f436794f65c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23163,7 +23163,8 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
}
}
-/* Return a scratch register in MODE for vector load and store. */
+/* Implement the TARGET_GEN_MEMSET_SCRATCH_RTX hook. Return a scratch
+ register in MODE for vector load and store. */
rtx
ix86_gen_scratch_sse_rtx (machine_mode mode)
@@ -24082,6 +24083,9 @@ static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
#undef TARGET_LIBC_HAS_FAST_FUNCTION
#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
+#undef TARGET_GEN_MEMSET_SCRATCH_RTX
+#define TARGET_GEN_MEMSET_SCRATCH_RTX ix86_gen_scratch_sse_rtx
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c b/gcc/testsuite/gcc.target/i386/pr90773-15.c
new file mode 100644
index 00000000000..c0a96fed892
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+ __builtin_memset (dst, c, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%edi, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+%dil, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c b/gcc/testsuite/gcc.target/i386/pr90773-16.c
new file mode 100644
index 00000000000..d2d1ec6141c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$-1, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c b/gcc/testsuite/gcc.target/i386/pr90773-17.c
new file mode 100644
index 00000000000..6c8da7d24ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 12, 19);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 15\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-18.c b/gcc/testsuite/gcc.target/i386/pr90773-18.c
new file mode 100644
index 00000000000..b0687abbe01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-18.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 12, 9);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[\\t \]+\\\$868082074056920076, %r" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, \\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, 4\\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$12, 8\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-19.c b/gcc/testsuite/gcc.target/i386/pr90773-19.c
new file mode 100644
index 00000000000..8aa5540bacc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-19.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 12, 9);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[\\t \]+\\\$868082074056920076, %r" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, \\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, 4\\(%\[\^,\]+\\)" 1 { target ia32 } } } */
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 03/11] x86: Avoid stack realignment when copying data
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
2021-07-01 15:22 ` [PATCH v5 01/11] Rewrite memset with TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
2021-07-01 15:22 ` [PATCH v5 02/11] x86: Add TARGET_GEN_MEMSET_SCRATCH_RTX H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 04/11] x86: Update piecewise move and store H.J. Lu
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
To avoid stack realignment, use SCRATCH_SSE_REG to copy data from one
memory location to another.
gcc/
* config/i386/i386-expand.c (ix86_expand_vector_move): Call
ix86_gen_scratch_sse_rtx to get a scratch SSE register to copy
data from one memory location to another.
gcc/testsuite/
* gcc.target/i386/eh_return-1.c: New test.
---
gcc/config/i386/i386-expand.c | 4 +++-
gcc/testsuite/gcc.target/i386/eh_return-1.c | 26 +++++++++++++++++++++
2 files changed, 29 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/i386/eh_return-1.c
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 5c9170e3a1d..6b009d523a5 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -602,7 +602,9 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
&& !register_operand (op0, mode)
&& !register_operand (op1, mode))
{
- emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+ rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
+ emit_move_insn (tmp, op1);
+ emit_move_insn (op0, tmp);
return;
}
diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c
new file mode 100644
index 00000000000..671ba635e88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
+
+struct _Unwind_Context
+{
+ void *ra;
+ char array[48];
+};
+
+extern long uw_install_context_1 (struct _Unwind_Context *);
+
+void
+_Unwind_RaiseException (void)
+{
+ struct _Unwind_Context this_context, cur_context;
+ long offset = uw_install_context_1 (&this_context);
+ __builtin_memcpy (&this_context, &cur_context,
+ sizeof (struct _Unwind_Context));
+ void *handler = __builtin_frob_return_addr ((&cur_context)->ra);
+ uw_install_context_1 (&cur_context);
+ __builtin_eh_return (offset, handler);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 04/11] x86: Update piecewise move and store
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (2 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 03/11] x86: Avoid stack realignment when copying data H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773 H.J. Lu
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
We can use TImode/OImode/XImode integers for piecewise move and store.
1. Define MAX_MOVE_MAX to 64, which is the constant maximum number of
bytes that a single instruction can move quickly between memory and
registers or between two memory locations.
2. Define MOVE_MAX to MOVE_MAX_PIECES, which is the maximum number of
bytes we can move from memory to memory in one reasonably fast instruction.
The difference between MAX_MOVE_MAX and MOVE_MAX is that MAX_MOVE_MAX
must be a constant, independent of compiler options, since it is used in
reload.h to define struct target_reload and MOVE_MAX can vary, depending
on compiler options.
3. When vector register is used for piecewise move and store, we don't
increase stack_alignment_needed since vector register spill isn't
required for piecewise move and store. Since stack_realign_needed is
set to true by checking stack_alignment_estimated set by pseudo vector
register usage, we also need to check stack_realign_needed to eliminate
frame pointer.
gcc/
* config/i386/i386.c (ix86_finalize_stack_frame_flags): Also
check stack_realign_needed for stack realignment.
(ix86_legitimate_constant_p): Always allow CONST_WIDE_INT smaller
than the largest integer supported by vector register.
* config/i386/i386.h (MAX_MOVE_MAX): New. Set to 64.
(MOVE_MAX_PIECES): Set to bytes of the largest integer supported
by vector register.
(MOVE_MAX): Defined to MOVE_MAX_PIECES.
(STORE_MAX_PIECES): New.
gcc/testsuite/
* gcc.target/i386/pr90773-1.c: Adjust to expect movq for 32-bit.
* gcc.target/i386/pr90773-4.c: Also run for 32-bit.
* gcc.target/i386/pr90773-15.c: Likewise.
* gcc.target/i386/pr90773-16.c: Likewise.
* gcc.target/i386/pr90773-17.c: Likewise.
* gcc.target/i386/pr90773-24.c: Likewise.
* gcc.target/i386/pr90773-25.c: Likewise.
* gcc.target/i386/pr100865-1.c: Likewise.
* gcc.target/i386/pr100865-2.c: Likewise.
* gcc.target/i386/pr100865-3.c: Likewise.
* gcc.target/i386/pr90773-14.c: Also run for 32-bit and expect
XMM movd to store 4 bytes.
* gcc.target/i386/pr100865-4a.c: Also run for 32-bit and expect
YMM registers.
* gcc.target/i386/pr100865-4b.c: Likewise.
* gcc.target/i386/pr100865-10a.c: Expect YMM registers.
* gcc.target/i386/pr100865-10b.c: Likewise.
---
gcc/config/i386/i386.c | 21 ++++++++--
gcc/config/i386/i386.h | 40 ++++++++++++++++----
gcc/testsuite/gcc.target/i386/pr100865-1.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-10a.c | 4 +-
gcc/testsuite/gcc.target/i386/pr100865-10b.c | 4 +-
gcc/testsuite/gcc.target/i386/pr100865-2.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-3.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-4a.c | 6 +--
gcc/testsuite/gcc.target/i386/pr100865-4b.c | 8 ++--
gcc/testsuite/gcc.target/i386/pr90773-1.c | 10 ++---
gcc/testsuite/gcc.target/i386/pr90773-14.c | 4 +-
gcc/testsuite/gcc.target/i386/pr90773-15.c | 6 +--
gcc/testsuite/gcc.target/i386/pr90773-16.c | 2 +-
gcc/testsuite/gcc.target/i386/pr90773-17.c | 2 +-
gcc/testsuite/gcc.target/i386/pr90773-24.c | 2 +-
gcc/testsuite/gcc.target/i386/pr90773-25.c | 2 +-
gcc/testsuite/gcc.target/i386/pr90773-4.c | 2 +-
17 files changed, 77 insertions(+), 42 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f436794f65c..3dfb3a6f2dc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7953,8 +7953,17 @@ ix86_finalize_stack_frame_flags (void)
assumed stack realignment might be needed or -fno-omit-frame-pointer
is used, but in the end nothing that needed the stack alignment had
been spilled nor stack access, clear frame_pointer_needed and say we
- don't need stack realignment. */
- if ((stack_realign || (!flag_omit_frame_pointer && optimize))
+ don't need stack realignment.
+
+ When vector register is used for piecewise move and store, we don't
+ increase stack_alignment_needed as there is no register spill for
+ piecewise move and store. Since stack_realign_needed is set to true
+ by checking stack_alignment_estimated which is updated by pseudo
+ vector register usage, we also need to check stack_realign_needed to
+ eliminate frame pointer. */
+ if ((stack_realign
+ || (!flag_omit_frame_pointer && optimize)
+ || crtl->stack_realign_needed)
&& frame_pointer_needed
&& crtl->is_leaf
&& crtl->sp_is_unchanging
@@ -10418,7 +10427,13 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
/* FALLTHRU */
case E_OImode:
case E_XImode:
- if (!standard_sse_constant_p (x, mode))
+ if (!standard_sse_constant_p (x, mode)
+ && GET_MODE_SIZE (TARGET_AVX512F
+ ? XImode
+ : (TARGET_AVX
+ ? OImode
+ : (TARGET_SSE2
+ ? TImode : DImode))) < GET_MODE_SIZE (mode))
return false;
default:
break;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 6e0340a4b60..7c504108896 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1748,9 +1748,10 @@ typedef struct ix86_args {
/* Define this as 1 if `char' should by default be signed; else as 0. */
#define DEFAULT_SIGNED_CHAR 1
-/* Max number of bytes we can move from memory to memory
- in one reasonably fast instruction. */
-#define MOVE_MAX 16
+/* The constant maximum number of bytes that a single instruction can
+ move quickly between memory and registers or between two memory
+ locations. */
+#define MAX_MOVE_MAX 64
/* MOVE_MAX_PIECES is the number of bytes at a time which we can
move efficiently, as opposed to MOVE_MAX which is the maximum
@@ -1761,11 +1762,34 @@ typedef struct ix86_args {
widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
64-bit mode. */
#define MOVE_MAX_PIECES \
- ((TARGET_64BIT \
- && TARGET_SSE2 \
- && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
- && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
- ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
+ ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
+ ? 64 \
+ : ((TARGET_AVX \
+ && !TARGET_PREFER_AVX128 \
+ && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
+ && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+ ? 32 \
+ : ((TARGET_SSE2 \
+ && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
+ && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+ ? 16 : UNITS_PER_WORD)))
+
+/* Max number of bytes we can move from memory to memory in one
+ reasonably fast instruction. */
+#define MOVE_MAX MOVE_MAX_PIECES
+
+/* STORE_MAX_PIECES is the number of bytes at a time that we can
+ store efficiently. */
+#define STORE_MAX_PIECES \
+ ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
+ ? 64 \
+ : ((TARGET_AVX \
+ && !TARGET_PREFER_AVX128 \
+ && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+ ? 32 \
+ : ((TARGET_SSE2 \
+ && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+ ? 16 : UNITS_PER_WORD)))
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a cpymem or libcall instead.
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-1.c b/gcc/testsuite/gcc.target/i386/pr100865-1.c
index 6c3097fb2a6..949dd5c337a 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=x86-64" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10a.c b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
index 7ffc19e56a8..98b6dfb16f3 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
@@ -29,5 +29,5 @@ foo (void)
array[i] = MK_CONST128_BROADCAST (0x1f);
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
index edf52765c60..5f5abe27bed 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
@@ -3,5 +3,5 @@
#include "pr100865-10a.c"
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-2.c b/gcc/testsuite/gcc.target/i386/pr100865-2.c
index 17efe2d72a3..f3ea7753abe 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-2.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-3.c b/gcc/testsuite/gcc.target/i386/pr100865-3.c
index b6dbcf7809b..0183fccc251 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake-avx512" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4a.c b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
index f55883598f9..365487337ae 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
extern char array[64];
@@ -11,6 +11,6 @@ foo (void)
array[i] = -45;
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, " 4 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
index f41e6147b4c..cbcae2d97b5 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
@@ -1,9 +1,9 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake-avx512" } */
#include "pr100865-4a.c"
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, " 4 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
+/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-1.c b/gcc/testsuite/gcc.target/i386/pr90773-1.c
index 1d9f282dc0d..4fd5a40d99d 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mtune=generic" } */
+/* { dg-options "-O2 -msse2 -mtune=generic" } */
extern char *dst, *src;
@@ -9,9 +9,5 @@ foo (void)
__builtin_memcpy (dst, src, 15);
}
-/* { dg-final { scan-assembler-times "movq\[\\t \]+\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "movq\[\\t \]+7\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+4\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+8\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+11\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movq\[\\t \]+\\(%\[\^,\]+\\)," 1 } } */
+/* { dg-final { scan-assembler-times "movq\[\\t \]+7\\(%\[\^,\]+\\)," 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-14.c b/gcc/testsuite/gcc.target/i386/pr90773-14.c
index 6364916ecac..96ee5cb08c1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-14.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-14.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
extern char *dst;
@@ -10,4 +10,4 @@ foo (void)
}
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$16843009, 16\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movd\[\\t \]+%xmm\[0-9\]+, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c b/gcc/testsuite/gcc.target/i386/pr90773-15.c
index c0a96fed892..880f71d1567 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-15.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake-avx512" } */
extern char *dst;
@@ -9,6 +9,6 @@ foo (int c)
__builtin_memset (dst, c, 17);
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%edi, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%.*, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movb\[\\t \]+%dil, 16\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+%.*, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c b/gcc/testsuite/gcc.target/i386/pr90773-16.c
index d2d1ec6141c..32a976b10df 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-16.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake-avx512" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c b/gcc/testsuite/gcc.target/i386/pr90773-17.c
index 6c8da7d24ef..2d6fbf22a8b 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-17.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake-avx512" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-24.c b/gcc/testsuite/gcc.target/i386/pr90773-24.c
index 7b2ea66dcfc..71f1fd8c4df 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-24.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-24.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=x86-64" } */
struct S
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-25.c b/gcc/testsuite/gcc.target/i386/pr90773-25.c
index 57642ea8d2d..ad19a88c883 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-25.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-25.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -march=x86-64" } */
struct S
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-4.c b/gcc/testsuite/gcc.target/i386/pr90773-4.c
index ec0bc0100ae..ee4c04678d1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-4.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
extern char *dst;
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (3 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 04/11] x86: Update piecewise move and store H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 06/11] x86: Add tests for piecewise move and store H.J. Lu
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
PR middle-end/90773
* gcc.target/i386/pr90773-20.c: New test.
* gcc.target/i386/pr90773-21.c: Likewise.
* gcc.target/i386/pr90773-22.c: Likewise.
* gcc.target/i386/pr90773-23.c: Likewise.
* gcc.target/i386/pr90773-26.c: Likewise.
---
gcc/testsuite/gcc.target/i386/pr90773-20.c | 13 +++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-21.c | 13 +++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-22.c | 13 +++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-23.c | 13 +++++++++++++
gcc/testsuite/gcc.target/i386/pr90773-26.c | 21 +++++++++++++++++++++
5 files changed, 73 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-20.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-21.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-22.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-23.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-26.c
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-20.c b/gcc/testsuite/gcc.target/i386/pr90773-20.c
new file mode 100644
index 00000000000..e61e405f2b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-20.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+ __builtin_memset (dst, c, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
new file mode 100644
index 00000000000..16ad17f3cbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+ __builtin_memset (dst, c, 34);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-22.c b/gcc/testsuite/gcc.target/i386/pr90773-22.c
new file mode 100644
index 00000000000..45a8ff65a84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-22.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
new file mode 100644
index 00000000000..9256ce10ff0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 34);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-26.c b/gcc/testsuite/gcc.target/i386/pr90773-26.c
new file mode 100644
index 00000000000..b2513c3a9c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-26.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+struct S
+{
+ long long s1 __attribute__ ((aligned (8)));
+ unsigned s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
+};
+
+const struct S array[] = {
+ { 0, 60, 640, 2112543726, 39682, 48, 16, 33, 10, 96, 2, 0, 0, 4 }
+};
+
+void
+foo (struct S *x)
+{
+ x[0] = array[0];
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 32\\(%\[\^,\]+\\)" 1 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 06/11] x86: Add tests for piecewise move and store
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (4 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 05/11] x86: Add AVX2 tests for PR middle-end/90773 H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c H.J. Lu
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
* gcc.target/i386/pieces-memcpy-10.c: New test.
* gcc.target/i386/pieces-memcpy-11.c: Likewise.
* gcc.target/i386/pieces-memcpy-12.c: Likewise.
* gcc.target/i386/pieces-memcpy-13.c: Likewise.
* gcc.target/i386/pieces-memcpy-14.c: Likewise.
* gcc.target/i386/pieces-memcpy-15.c: Likewise.
* gcc.target/i386/pieces-memcpy-16.c: Likewise.
* gcc.target/i386/pieces-memcpy-17.c: Likewise.
* gcc.target/i386/pieces-memcpy-18.c: Likewise.
* gcc.target/i386/pieces-memcpy-19.c: Likewise.
* gcc.target/i386/pieces-memset-1.c: Likewise.
* gcc.target/i386/pieces-memset-2.c: Likewise.
* gcc.target/i386/pieces-memset-3.c: Likewise.
* gcc.target/i386/pieces-memset-4.c: Likewise.
* gcc.target/i386/pieces-memset-5.c: Likewise.
* gcc.target/i386/pieces-memset-6.c: Likewise.
* gcc.target/i386/pieces-memset-7.c: Likewise.
* gcc.target/i386/pieces-memset-8.c: Likewise.
* gcc.target/i386/pieces-memset-9.c: Likewise.
* gcc.target/i386/pieces-memset-10.c: Likewise.
* gcc.target/i386/pieces-memset-11.c: Likewise.
* gcc.target/i386/pieces-memset-12.c: Likewise.
* gcc.target/i386/pieces-memset-13.c: Likewise.
* gcc.target/i386/pieces-memset-14.c: Likewise.
* gcc.target/i386/pieces-memset-15.c: Likewise.
* gcc.target/i386/pieces-memset-16.c: Likewise.
* gcc.target/i386/pieces-memset-17.c: Likewise.
* gcc.target/i386/pieces-memset-18.c: Likewise.
* gcc.target/i386/pieces-memset-19.c: Likewise.
* gcc.target/i386/pieces-memset-20.c: Likewise.
* gcc.target/i386/pieces-memset-21.c: Likewise.
* gcc.target/i386/pieces-memset-22.c: Likewise.
* gcc.target/i386/pieces-memset-23.c: Likewise.
* gcc.target/i386/pieces-memset-24.c: Likewise.
* gcc.target/i386/pieces-memset-25.c: Likewise.
* gcc.target/i386/pieces-memset-26.c: Likewise.
* gcc.target/i386/pieces-memset-27.c: Likewise.
* gcc.target/i386/pieces-memset-28.c: Likewise.
* gcc.target/i386/pieces-memset-29.c: Likewise.
* gcc.target/i386/pieces-memset-30.c: Likewise.
* gcc.target/i386/pieces-memset-31.c: Likewise.
* gcc.target/i386/pieces-memset-32.c: Likewise.
* gcc.target/i386/pieces-memset-33.c: Likewise.
* gcc.target/i386/pieces-memset-34.c: Likewise.
* gcc.target/i386/pieces-memset-35.c: Likewise.
* gcc.target/i386/pieces-memset-36.c: Likewise.
* gcc.target/i386/pieces-memset-37.c: Likewise.
* gcc.target/i386/pieces-memset-38.c: Likewise.
* gcc.target/i386/pieces-memset-39.c: Likewise.
* gcc.target/i386/pieces-memset-40.c: Likewise.
* gcc.target/i386/pieces-memset-41.c: Likewise.
* gcc.target/i386/pieces-memset-42.c: Likewise.
* gcc.target/i386/pieces-memset-43.c: Likewise.
* gcc.target/i386/pieces-memset-44.c: Likewise.
---
.../gcc.target/i386/pieces-memcpy-10.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memcpy-11.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memcpy-12.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memcpy-13.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memcpy-14.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memcpy-15.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memcpy-16.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memcpy-7.c | 15 +++++++++++++++
.../gcc.target/i386/pieces-memcpy-8.c | 14 ++++++++++++++
.../gcc.target/i386/pieces-memcpy-9.c | 14 ++++++++++++++
.../gcc.target/i386/pieces-memset-1.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-10.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-11.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-12.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-13.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-14.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-15.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-16.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-17.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-18.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-19.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-2.c | 12 ++++++++++++
.../gcc.target/i386/pieces-memset-20.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-21.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-22.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-23.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-24.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-25.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-26.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-27.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-28.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-29.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-3.c | 18 ++++++++++++++++++
.../gcc.target/i386/pieces-memset-30.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-31.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-32.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-33.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-34.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-35.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-36.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-37.c | 15 +++++++++++++++
.../gcc.target/i386/pieces-memset-38.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-39.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-4.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-40.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-41.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-42.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-43.c | 17 +++++++++++++++++
.../gcc.target/i386/pieces-memset-5.c | 12 ++++++++++++
.../gcc.target/i386/pieces-memset-6.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-7.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-8.c | 16 ++++++++++++++++
.../gcc.target/i386/pieces-memset-9.c | 16 ++++++++++++++++
53 files changed, 860 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-16.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-17.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-18.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-19.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-20.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-21.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-22.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-23.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-24.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-25.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-26.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-27.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-28.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-29.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-30.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-31.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-32.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-33.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-34.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-35.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-36.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-37.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-38.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-39.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-40.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-41.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-42.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-43.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-9.c
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
new file mode 100644
index 00000000000..5faee21f9b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
new file mode 100644
index 00000000000..b8917a7f917
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 64);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
new file mode 100644
index 00000000000..f1432ebe517
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
new file mode 100644
index 00000000000..97e6067fec9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
new file mode 100644
index 00000000000..7addc4c0a28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-14.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
new file mode 100644
index 00000000000..695e8c3fa67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
new file mode 100644
index 00000000000..b0643d05ee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-16.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 34);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
new file mode 100644
index 00000000000..3d248d447ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-7.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char *dst, char *src)
+{
+ __builtin_memcpy (dst, src, 17);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
new file mode 100644
index 00000000000..c13a2beb2f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char *dst, char *src)
+{
+ __builtin_memcpy (dst, src, 18);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
new file mode 100644
index 00000000000..238f88b275e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char *dst, char *src)
+{
+ __builtin_memcpy (dst, src, 19);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-1.c b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
new file mode 100644
index 00000000000..2b8032684b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-10.c b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
new file mode 100644
index 00000000000..a6390d1bd8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
new file mode 100644
index 00000000000..3fb9038b04f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-12.c b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
new file mode 100644
index 00000000000..fa834566097
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-13.c b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
new file mode 100644
index 00000000000..7f2cd3f58ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
new file mode 100644
index 00000000000..45ece482464
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-15.c b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
new file mode 100644
index 00000000000..bddf47d728e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-16.c b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
new file mode 100644
index 00000000000..1c5d124cecc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-17.c b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
new file mode 100644
index 00000000000..6cdb33557c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-18.c b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
new file mode 100644
index 00000000000..adbd201b4e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 18);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-19.c b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
new file mode 100644
index 00000000000..7e9cf2e26d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
new file mode 100644
index 00000000000..649f344e8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
new file mode 100644
index 00000000000..b8747e669e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
new file mode 100644
index 00000000000..4f001c6d06c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-22.c b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
new file mode 100644
index 00000000000..5f3c454ef8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
new file mode 100644
index 00000000000..a3b4ffc18e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-24.c b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
new file mode 100644
index 00000000000..e222787b541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-25.c b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
new file mode 100644
index 00000000000..195ddb635eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-26.c b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
new file mode 100644
index 00000000000..13606b2da54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-27.c b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
new file mode 100644
index 00000000000..54a672b6015
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-28.c b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
new file mode 100644
index 00000000000..83c2d3f0fde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
new file mode 100644
index 00000000000..650e6fe66a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-not "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-3.c b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
new file mode 100644
index 00000000000..2aed6dbc68e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512bw -mno-avx512vl -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti64x4\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
new file mode 100644
index 00000000000..dcec2c700fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-31.c b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
new file mode 100644
index 00000000000..5d20af0938d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-32.c b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
new file mode 100644
index 00000000000..c5ca0bd17ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
new file mode 100644
index 00000000000..a87d1b80ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-not "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
new file mode 100644
index 00000000000..0c2f1ee6049
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-35.c b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
new file mode 100644
index 00000000000..b0f4a8b898e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 34);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-36.c b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
new file mode 100644
index 00000000000..d1f1263c7b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
new file mode 100644
index 00000000000..ec59497b116
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
new file mode 100644
index 00000000000..ed4a24a54fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-39.c b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
new file mode 100644
index 00000000000..a330bff5f3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mtune=generic" } */
+
+void
+foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-not "vinserti64x4" } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-4.c b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
new file mode 100644
index 00000000000..9256919bfdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
new file mode 100644
index 00000000000..4eda73ead59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
new file mode 100644
index 00000000000..f86b6986da9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
new file mode 100644
index 00000000000..df0c122aae7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
new file mode 100644
index 00000000000..2f2179c2df9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
new file mode 100644
index 00000000000..3e95db5efef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-6.c b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
new file mode 100644
index 00000000000..571113c3a33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-7.c b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
new file mode 100644
index 00000000000..fd159869817
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-8.c b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
new file mode 100644
index 00000000000..7df0019ef63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-9.c b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
new file mode 100644
index 00000000000..ed45d590875
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer. */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (5 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 06/11] x86: Add tests for piecewise move and store H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c H.J. Lu
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
Also pass -mno-avx to pr72839.c to avoid copying data with YMM or ZMM
registers.
* gcc.target/i386/pr72839.c: Also pass -mno-avx.
---
gcc/testsuite/gcc.target/i386/pr72839.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/i386/pr72839.c b/gcc/testsuite/gcc.target/i386/pr72839.c
index ea724f70377..6888d9d0a55 100644
--- a/gcc/testsuite/gcc.target/i386/pr72839.c
+++ b/gcc/testsuite/gcc.target/i386/pr72839.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-require-effective-target ia32 } */
-/* { dg-options "-O2 -mtune=lakemont" } */
+/* { dg-options "-O2 -mtune=lakemont -mno-avx" } */
extern char *strcpy (char *, const char *);
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (6 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 07/11] x86: Also pass -mno-avx to pr72839.c H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32 H.J. Lu
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
Also pass -mno-avx to pr72839.c to avoid copying data with YMM or ZMM
registers.
* gcc.target/i386/cold-attribute-1.c: Also pass -mno-avx.
---
gcc/testsuite/gcc.target/i386/cold-attribute-1.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/i386/cold-attribute-1.c b/gcc/testsuite/gcc.target/i386/cold-attribute-1.c
index 57666ac60b6..658eb3e25bb 100644
--- a/gcc/testsuite/gcc.target/i386/cold-attribute-1.c
+++ b/gcc/testsuite/gcc.target/i386/cold-attribute-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mno-avx" } */
#include <string.h>
static inline
__attribute__ ((cold)) void
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (7 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 08/11] x86: Also pass -mno-avx to cold-attribute-1.c H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c H.J. Lu
2021-07-01 15:22 ` [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c H.J. Lu
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
Also pass -mno-avx to sw-1.c for ia32 since copying data with YMM or ZMM
registers disables shrink-wrapping when the second argument is passed on
stack.
* gcc.target/i386/sw-1.c: Also pass -mno-avx for ia32.
---
gcc/testsuite/gcc.target/i386/sw-1.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/testsuite/gcc.target/i386/sw-1.c b/gcc/testsuite/gcc.target/i386/sw-1.c
index aec095eda62..a9c89fca4ec 100644
--- a/gcc/testsuite/gcc.target/i386/sw-1.c
+++ b/gcc/testsuite/gcc.target/i386/sw-1.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mtune=generic -fshrink-wrap -fdump-rtl-pro_and_epilogue" } */
+/* { dg-additional-options "-mno-avx" { target ia32 } } */
/* { dg-skip-if "No shrink-wrapping preformed" { x86_64-*-mingw* } } */
#include <string.h>
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (8 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 09/11] x86: Also pass -mno-avx to sw-1.c for ia32 H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
2021-07-01 15:22 ` [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c H.J. Lu
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
Expect no stack realignment since we no longer realign stack when
copying data.
* gcc.target/i386/incoming-11.c: Expect no stack realignment.
---
gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
index a830c96f7d1..4b822684b88 100644
--- a/gcc/testsuite/gcc.target/i386/incoming-11.c
+++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
@@ -15,4 +15,4 @@ void f()
for (i = 0; i < 100; i++) q[i] = 1;
}
-/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
+/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v5 11/11] x86: Also pass -mno-sse to vect8-ret.c
2021-07-01 15:22 [PATCH v5 00/11] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
` (9 preceding siblings ...)
2021-07-01 15:22 ` [PATCH v5 10/11] x86: Update gcc.target/i386/incoming-11.c H.J. Lu
@ 2021-07-01 15:22 ` H.J. Lu
10 siblings, 0 replies; 12+ messages in thread
From: H.J. Lu @ 2021-07-01 15:22 UTC (permalink / raw)
To: gcc-patches
Cc: Richard Biener, Richard Sandiford, Uros Bizjak, Bernd Edlinger
Also pass -mno-sse to vect8-ret.c to disable XMM load/store when running
GCC tests with "-march=x86-64 -m32".
* gcc.target/i386/vect8-ret.c: Also pass -mno-sse.
---
gcc/testsuite/gcc.target/i386/vect8-ret.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/i386/vect8-ret.c b/gcc/testsuite/gcc.target/i386/vect8-ret.c
index 2b2b81ecf7a..6ace07e6e0c 100644
--- a/gcc/testsuite/gcc.target/i386/vect8-ret.c
+++ b/gcc/testsuite/gcc.target/i386/vect8-ret.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ia32 && { ! *-*-vxworks* } } } } */
-/* { dg-options "-mmmx -mvect8-ret-in-mem" } */
+/* { dg-options "-mmmx -mno-sse -mvect8-ret-in-mem" } */
#include <mmintrin.h>
--
2.31.1
^ permalink raw reply [flat|nested] 12+ messages in thread