From: "H.J. Lu" <hjl.tools@gmail.com>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH 03/12] Add TARGET_READ_MEMSET_VALUE/TARGET_GEN_MEMSET_VALUE
Date: Thu, 29 Apr 2021 05:54:06 -0700 [thread overview]
Message-ID: <20210429125415.1634118-4-hjl.tools@gmail.com> (raw)
In-Reply-To: <20210429125415.1634118-1-hjl.tools@gmail.com>
Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
target instructions to duplicate QImode value to TImode/OImode/XImode
value for memmset.
gcc/
PR middle-end/90773
* builtins.c (builtin_memset_read_str): Call
targetm.read_memset_value.
(builtin_memset_gen_str): Call targetm.gen_memset_value.
* target.def (read_memset_value): New hook.
(gen_memset_value): Likewise.
* targhooks.c: Inclue "builtins.h".
(default_read_memset_value): New function.
(default_gen_memset_value): Likewise.
* targhooks.h ()default_read_memset_value: New prototype.
(default_gen_memset_value): Likewise.
* config/i386/i386-expand.c (ix86_expand_vector_init_duplicate):
Make it global.
* config/i386/i386-protos.h (ix86_expand_vector_init_duplicate):
New.
* config/i386/i386.c (ix86_gen_memset_value_from_prev): New
function.
(ix86_gen_memset_value): Likewise.
(ix86_read_memset_value): Likewise.
(TARGET_GEN_MEMSET_VALUE): New.
(TARGET_READ_MEMSET_VALUE): Likewise.
* doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
TARGET_GEN_MEMSET_VALUE hooks.
* doc/tm.texi: Regenerated.
gcc/testsuite/
PR middle-end/90773
* gcc.target/i386/pr90773-15.c: New test.
* gcc.target/i386/pr90773-16.c: Likewise.
* gcc.target/i386/pr90773-17.c: Likewise.
* gcc.target/i386/pr90773-18.c: Likewise.
* gcc.target/i386/pr90773-19.c: Likewise.
---
gcc/builtins.c | 45 +---
gcc/config/i386/i386-expand.c | 2 +-
gcc/config/i386/i386-protos.h | 2 +
gcc/config/i386/i386.c | 236 +++++++++++++++++++++
gcc/doc/tm.texi | 16 ++
gcc/doc/tm.texi.in | 4 +
gcc/expr.c | 1 -
gcc/target.def | 20 ++
gcc/targhooks.c | 54 +++++
gcc/targhooks.h | 4 +
gcc/testsuite/gcc.target/i386/pr90773-15.c | 14 ++
gcc/testsuite/gcc.target/i386/pr90773-16.c | 14 ++
gcc/testsuite/gcc.target/i386/pr90773-17.c | 14 ++
gcc/testsuite/gcc.target/i386/pr90773-18.c | 15 ++
gcc/testsuite/gcc.target/i386/pr90773-19.c | 14 ++
15 files changed, 412 insertions(+), 43 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-16.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-17.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-18.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr90773-19.c
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 2d6bf4a65b4..c5610795eec 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6586,24 +6586,11 @@ expand_builtin_strncpy (tree exp, rtx target)
previous iteration. */
rtx
-builtin_memset_read_str (void *data, void *prevp,
+builtin_memset_read_str (void *data, void *prev,
HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
scalar_int_mode mode)
{
- by_pieces_prev *prev = (by_pieces_prev *) prevp;
- if (prev != nullptr && prev->data != nullptr)
- {
- /* Use the previous data in the same mode. */
- if (prev->mode == mode)
- return prev->data;
- }
-
- const char *c = (const char *) data;
- char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
-
- memset (p, *c, GET_MODE_SIZE (mode));
-
- return c_readstr (p, mode);
+ return targetm.read_memset_value ((const char *) data, prev, mode);
}
/* Callback routine for store_by_pieces. Return the RTL of a register
@@ -6613,35 +6600,11 @@ builtin_memset_read_str (void *data, void *prevp,
nullptr, it has the RTL info from the previous iteration. */
static rtx
-builtin_memset_gen_str (void *data, void *prevp,
+builtin_memset_gen_str (void *data, void *prev,
HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
scalar_int_mode mode)
{
- rtx target, coeff;
- size_t size;
- char *p;
-
- by_pieces_prev *prev = (by_pieces_prev *) prevp;
- if (prev != nullptr && prev->data != nullptr)
- {
- /* Use the previous data in the same mode. */
- if (prev->mode == mode)
- return prev->data;
-
- return simplify_gen_subreg (mode, prev->data, prev->mode, 0);
- }
-
- size = GET_MODE_SIZE (mode);
- if (size == 1)
- return (rtx) data;
-
- p = XALLOCAVEC (char, size);
- memset (p, 1, size);
- coeff = c_readstr (p, mode);
-
- target = convert_to_mode (mode, (rtx) data, 1);
- target = expand_mult (mode, target, coeff, NULL_RTX, 1);
- return force_reg (mode, target);
+ return targetm.gen_memset_value ((rtx) data, prev, mode);
}
/* Expand expression EXP, which is a call to the memset builtin. Return
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 516440eb5c1..1942b46efbf 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -13586,7 +13586,7 @@ static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
with all elements equal to VAR. Return true if successful. */
-static bool
+bool
ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
rtx target, rtx val)
{
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 7782cf1163f..eae28acbc8d 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -257,6 +257,8 @@ extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
extern void ix86_expand_sse2_abs (rtx, rtx);
+extern bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
+ rtx);
/* In i386-c.c */
extern void ix86_target_macros (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 68f33f96f5a..e6ee3ef630a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23008,6 +23008,236 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
}
}
+/* Return the RTL for memset in MODE from PREV. */
+
+static rtx
+ix86_gen_memset_value_from_prev (by_pieces_prev *prevp,
+ scalar_int_mode mode)
+{
+ rtx prev = prevp->data;
+
+ /* Use the previous data in the same mode. */
+ if (prevp->mode == mode)
+ return prev;
+
+ machine_mode prev_mode = prevp->mode;
+ size_t size = GET_MODE_SIZE (prev_mode);
+
+ /* NB: Skip if the previous value is 1 byte or less. CONST_WIDE_INT
+ is in VOIDmode whose size is 0. */
+ if (size <= 1)
+ return nullptr;
+
+ rtx reg, reg_ti;
+ switch (size)
+ {
+ default:
+ gcc_unreachable ();
+
+ case 2:
+ case 4:
+ return simplify_gen_subreg (mode, prev, prev_mode, 0);
+
+ case 8:
+ /* In 64-bit mode, use SUBREG since word size is 8 bytes. */
+ if (TARGET_64BIT)
+ return simplify_gen_subreg (mode, prev, prev_mode, 0);
+
+ switch (GET_MODE_SIZE (mode))
+ {
+ default:
+ gcc_unreachable ();
+ case 2:
+ case 4:
+do_hi_si_mode:
+ /* In 32-bit mode, Extract the value from an 8-byte
+ register into an integer register first. */
+ reg = gen_reg_rtx (SImode);
+ emit_move_insn (reg,
+ simplify_gen_subreg (SImode, prev,
+ prev_mode, 0));
+ return simplify_gen_subreg (mode, reg, SImode, 0);
+ }
+ break;
+
+ case 16:
+ switch (GET_MODE_SIZE (mode))
+ {
+ default:
+ gcc_unreachable ();
+ case 2:
+ case 4:
+ /* Extract the value from a 16-byte vector register into
+ an integer register first. */
+ goto do_hi_si_mode;
+ case 8:
+ return simplify_gen_subreg (mode, prev, prev_mode, 0);
+ case 16:
+ return prev;
+ }
+ break;
+
+ case 32:
+ switch (GET_MODE_SIZE (mode))
+ {
+ default:
+ gcc_unreachable ();
+ case 2:
+do_himode:
+ /* Extract the value from a 32-byte vector register into
+ a 16-byte vector register first. */
+ reg_ti = gen_reg_rtx (TImode);
+ emit_move_insn (reg_ti,
+ simplify_gen_subreg (TImode, prev,
+ prev_mode, 0));
+ /* Then extract the value from a 16-byte vector register
+ into an integer register. */
+ reg = gen_reg_rtx (SImode);
+ emit_move_insn (reg,
+ simplify_gen_subreg (SImode, reg_ti,
+ TImode, 0));
+ return simplify_gen_subreg (mode, reg, SImode, 0);
+
+ case 4:
+ case 8:
+do_si_di_mode:
+ /* Extract the value from a 32-byte vector register into
+ a 16-byte vector register first. */
+ reg_ti = gen_reg_rtx (TImode);
+ emit_move_insn (reg_ti,
+ simplify_gen_subreg (TImode, prev,
+ prev_mode, 0));
+ /* Generate 4/8-byte SSE -> INT move instruction. */
+ reg = gen_reg_rtx (mode);
+ emit_move_insn (reg,
+ simplify_gen_subreg (mode, reg_ti,
+ TImode, 0));
+ return reg;
+ case 16:
+ return simplify_gen_subreg (mode, prev, prev_mode, 0);
+ case 32:
+ return prev;
+ }
+
+ case 64:
+ switch (GET_MODE_SIZE (mode))
+ {
+ default:
+ gcc_unreachable ();
+ case 2:
+ /* Extract the value from a 64-byte vector register into
+ a 16-byte vector register first. */
+ goto do_himode;
+ case 4:
+ case 8:
+ /* Extract the value from a 64-byte vector register into
+ a 16-byte vector register first. */
+ goto do_si_di_mode;
+ case 16:
+ case 32:
+ return simplify_gen_subreg (mode, prev, prev_mode, 0);
+ case 64:
+ return prev;
+ }
+ }
+
+ return nullptr;
+}
+
+/* Implement the TARGET_GEN_MEMSET_VALUE hook. */
+
+static rtx
+ix86_gen_memset_value (rtx data, void *prevp, scalar_int_mode mode)
+{
+ /* Don't use the previous value if size is 1. */
+ if (GET_MODE_SIZE (mode) == 1)
+ return data;
+
+ by_pieces_prev *prev = (by_pieces_prev *) prevp;
+ if (prev != nullptr && prev->data != nullptr)
+ {
+ rtx value = ix86_gen_memset_value_from_prev (prev, mode);
+ if (value)
+ return value;
+ }
+
+ /* Use default_gen_memset_value for vector store won't be used. */
+ if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+ return default_gen_memset_value (data, prevp, mode);
+
+ rtx one, target;
+ scalar_mode one_mode;
+
+ switch (GET_MODE_SIZE (mode))
+ {
+ default:
+ gcc_unreachable ();
+
+ case 64:
+ if (!TARGET_AVX512BW)
+ {
+ rtx tmp = gen_reg_rtx (V32QImode);
+ if (!ix86_expand_vector_init_duplicate (false, V32QImode,
+ tmp, data))
+ gcc_unreachable ();
+ target = gen_rtx_VEC_CONCAT (V64QImode, tmp, tmp);
+ return convert_to_mode (mode, target, 1);
+ }
+ /* FALLTHRU */
+ case 16:
+ case 32:
+ one_mode = QImode;
+ one = data;
+ break;
+ }
+
+ unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (one_mode);
+ machine_mode vector_mode;
+ if (!mode_for_vector (one_mode, nunits).exists (&vector_mode))
+ gcc_unreachable ();
+
+ target = gen_reg_rtx (vector_mode, UNITS_PER_WORD * BITS_PER_UNIT);
+ if (!ix86_expand_vector_init_duplicate (false, vector_mode, target,
+ one))
+ gcc_unreachable ();
+
+ return convert_to_mode (mode, target, 1,
+ UNITS_PER_WORD * BITS_PER_UNIT);
+}
+
+/* Implement the TARGET_READ_MEMSET_VALUE hook. */
+
+static rtx
+ix86_read_memset_value (const char *str, void *prevp,
+ scalar_int_mode mode)
+{
+ rtx value;
+
+ by_pieces_prev *prev = (by_pieces_prev *) prevp;
+ if (prev != nullptr && prev->data != nullptr)
+ {
+ /* Don't use the previous value if size is 1. */
+ if (GET_MODE_SIZE (mode) == 1)
+ return default_read_memset_value (str, nullptr, mode);
+
+ value = ix86_gen_memset_value_from_prev (prev, mode);
+ if (value)
+ return value;
+
+ return default_read_memset_value (str, nullptr, mode);
+ }
+
+ /* Use default_gen_memset_value if vector store can't be used.
+ NB: Need AVX2 for fast vector duplication and gen_reg_rtx. */
+ if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)
+ || !TARGET_AVX2
+ || !reg_rtx_no)
+ return default_read_memset_value (str, nullptr, mode);
+
+ value = default_read_memset_value (str, nullptr, QImode);
+ return ix86_gen_memset_value (value, nullptr, mode);
+}
+
/* Address space support.
This is not "far pointers" in the 16-bit sense, but an easy way
@@ -23909,6 +24139,12 @@ static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
#undef TARGET_LIBC_HAS_FAST_FUNCTION
#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
+#undef TARGET_GEN_MEMSET_VALUE
+#define TARGET_GEN_MEMSET_VALUE ix86_gen_memset_value
+
+#undef TARGET_READ_MEMSET_VALUE
+#define TARGET_READ_MEMSET_VALUE ix86_read_memset_value
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 7e8fb8b6ee8..2861d60ff28 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11944,6 +11944,22 @@ This function prepares to emit a conditional comparison within a sequence
@var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
@end deftypefn
+@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
+This function returns the RTL of a constant integer corresponding to
+target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
+constant @var{str}. If @var{prev} is not @samp{nullptr}, it contains
+the RTL information from the previous interation.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
+This function returns the RTL of a register containing
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
+char value given in the RTL register @var{data}. For example, if
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
+If @var{PREV} is not @samp{nullptr}, it is the RTL information from
+the previous iteration.
+@end deftypefn
+
@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
This target hook returns a new value for the number of times @var{loop}
should be unrolled. The parameter @var{nunroll} is the number of times
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 20acf363ed9..3fabf2b6181 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8032,6 +8032,10 @@ lists.
@hook TARGET_GEN_CCMP_NEXT
+@hook TARGET_READ_MEMSET_VALUE
+
+@hook TARGET_GEN_MEMSET_VALUE
+
@hook TARGET_LOOP_UNROLL_ADJUST
@defmac POWI_MAX_MULTS
diff --git a/gcc/expr.c b/gcc/expr.c
index 42db4ddbe0a..56e845a40da 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1171,7 +1171,6 @@ op_by_pieces_d::run ()
/* NB: widest_int_mode_for_size checks M_MAX_SIZE > 1. */
scalar_int_mode mode = widest_int_mode_for_size (m_max_size);
mode = get_usable_mode (mode, m_len);
-
by_pieces_prev to_prev = { nullptr, mode };
by_pieces_prev from_prev = { nullptr, mode };
diff --git a/gcc/target.def b/gcc/target.def
index c3a4280b655..25dc1850e0c 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2692,6 +2692,26 @@ DEFHOOK
rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
NULL)
+DEFHOOK
+(read_memset_value,
+ "This function returns the RTL of a constant integer corresponding to\n\
+target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
+constant @var{str}. If @var{prev} is not @samp{nullptr}, it contains\n\
+the RTL information from the previous interation.",
+ rtx, (const char *c, void *prev, scalar_int_mode mode),
+ default_read_memset_value)
+
+DEFHOOK
+(gen_memset_value,
+ "This function returns the RTL of a register containing\n\
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
+char value given in the RTL register @var{data}. For example, if\n\
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.\n\
+If @var{PREV} is not @samp{nullptr}, it is the RTL information from\n\
+the previous iteration.",
+ rtx, (rtx data, void *prev, scalar_int_mode mode),
+ default_gen_memset_value)
+
/* Return a new value for loop unroll size. */
DEFHOOK
(loop_unroll_adjust,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 952fad422eb..e4766be6683 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
#include "attribs.h"
#include "asan.h"
#include "emit-rtl.h"
+#include "builtins.h"
bool
default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -2547,4 +2548,57 @@ default_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
return untagged_base;
}
+/* Default implementation of TARGET_READ_MEMSET_VALUE. */
+
+rtx
+default_read_memset_value (const char *c, void *prevp,
+ scalar_int_mode mode)
+{
+ by_pieces_prev *prev = (by_pieces_prev *) prevp;
+ if (prev != nullptr && prev->data != nullptr)
+ {
+ /* Use the previous data in the same mode. */
+ if (prev->mode == mode)
+ return prev->data;
+ }
+
+ char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+
+ memset (p, *c, GET_MODE_SIZE (mode));
+
+ return c_readstr (p, mode);
+}
+
+/* Default implementation of TARGET_GEN_MEMSET_VALUE. */
+
+rtx
+default_gen_memset_value (rtx data, void *prevp, scalar_int_mode mode)
+{
+ rtx target, coeff;
+ size_t size;
+ char *p;
+
+ by_pieces_prev *prev = (by_pieces_prev *) prevp;
+ if (prev != nullptr && prev->data != nullptr)
+ {
+ /* Use the previous data in the same mode. */
+ if (prev->mode == mode)
+ return prev->data;
+
+ return simplify_gen_subreg (mode, prev->data, prev->mode, 0);
+ }
+
+ size = GET_MODE_SIZE (mode);
+ if (size == 1)
+ return data;
+
+ p = XALLOCAVEC (char, size);
+ memset (p, 1, size);
+ coeff = c_readstr (p, mode);
+
+ target = convert_to_mode (mode, data, 1);
+ target = expand_mult (mode, target, coeff, NULL_RTX, 1);
+ return force_reg (mode, target);
+}
+
#include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 9928d064abd..c34f3f9480e 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -300,4 +300,8 @@ extern rtx default_memtag_set_tag (rtx, rtx, rtx);
extern rtx default_memtag_extract_tag (rtx, rtx);
extern rtx default_memtag_untagged_pointer (rtx, rtx);
+extern rtx default_read_memset_value (const char *, void *,
+ scalar_int_mode);
+extern rtx default_gen_memset_value (rtx, void *, scalar_int_mode);
+
#endif /* GCC_TARGHOOKS_H */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c b/gcc/testsuite/gcc.target/i386/pr90773-15.c
new file mode 100644
index 00000000000..c0a96fed892
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (int c)
+{
+ __builtin_memset (dst, c, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%edi, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+%dil, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c b/gcc/testsuite/gcc.target/i386/pr90773-16.c
new file mode 100644
index 00000000000..d2d1ec6141c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$-1, 16\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c b/gcc/testsuite/gcc.target/i386/pr90773-17.c
new file mode 100644
index 00000000000..6c8da7d24ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 12, 19);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 15\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-18.c b/gcc/testsuite/gcc.target/i386/pr90773-18.c
new file mode 100644
index 00000000000..b0687abbe01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-18.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 12, 9);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[\\t \]+\\\$868082074056920076, %r" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, \\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, 4\\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$12, 8\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-19.c b/gcc/testsuite/gcc.target/i386/pr90773-19.c
new file mode 100644
index 00000000000..8aa5540bacc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90773-19.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 12, 9);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[\\t \]+\\\$868082074056920076, %r" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, \\(%\[\^,\]+\\)" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$202116108, 4\\(%\[\^,\]+\\)" 1 { target ia32 } } } */
--
2.31.1
next prev parent reply other threads:[~2021-04-29 12:54 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-29 12:54 [PATCH 00/12] Allow TImode/OImode/XImode in op_by_pieces operations H.J. Lu
2021-04-29 12:54 ` [PATCH 01/12] Update alignment_for_piecewise_move H.J. Lu
2021-04-30 8:59 ` Richard Sandiford
2021-04-29 12:54 ` [PATCH 02/12] Allow generating pseudo register with specific alignment H.J. Lu
2021-04-30 9:06 ` Richard Sandiford
2021-04-30 12:06 ` H.J. Lu
2021-04-30 12:42 ` Richard Sandiford
2021-04-30 12:49 ` H.J. Lu
2021-04-30 13:34 ` H.J. Lu
2021-04-30 15:56 ` Richard Sandiford
2021-04-30 17:33 ` H.J. Lu
2021-05-03 8:18 ` Richard Biener
2021-05-10 9:39 ` Richard Sandiford
2021-05-10 13:29 ` H.J. Lu
2021-05-10 13:59 ` Richard Biener
2021-05-10 14:11 ` H.J. Lu
2021-05-10 16:23 ` Richard Sandiford
2021-05-11 6:06 ` Richard Biener
2021-04-29 12:54 ` H.J. Lu [this message]
2021-04-29 12:54 ` [PATCH 04/12] x86: Avoid stack realignment when copying data H.J. Lu
2021-04-29 12:54 ` [PATCH 05/12] Remove MAX_BITSIZE_MODE_ANY_INT H.J. Lu
2021-04-29 12:54 ` [PATCH 06/12] x86: Update piecewise move and store H.J. Lu
2021-04-29 12:54 ` [PATCH 07/12] x86: Add AVX2 tests for PR middle-end/90773 H.J. Lu
2021-04-29 12:54 ` [PATCH 08/12] x86: Add tests for piecewise move and store H.J. Lu
2021-04-29 12:54 ` [PATCH 09/12] x86: Also pass -mno-avx to pr72839.c H.J. Lu
2021-04-29 12:54 ` [PATCH 10/12] x86: Also pass -mno-avx to cold-attribute-1.c H.J. Lu
2021-04-29 12:54 ` [PATCH 11/12] x86: Also pass -mno-avx to sw-1.c for ia32 H.J. Lu
2021-04-29 12:54 ` [PATCH 12/12] x86: Update gcc.target/i386/incoming-11.c H.J. Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210429125415.1634118-4-hjl.tools@gmail.com \
--to=hjl.tools@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).