public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES
@ 2021-09-09  3:21 H.J. Lu
  2021-09-09  5:09 ` Hongtao Liu
  0 siblings, 1 reply; 2+ messages in thread
From: H.J. Lu @ 2021-09-09  3:21 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak, Hongtao Liu

1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation
with 256-bit AVX instructions.
2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces
operations with 256-bit AVX instructions.

They are enabled only for Intel Alder Lake and Intel processors with
AVX512.

gcc/

	PR target/101935
	* config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New.
	(TARGET_AVX256_STORE_BY_PIECES): Likewise.
	(MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and
	TARGET_AVX256_STORE_BY_PIECES instead of
	TARGET_AVX256_SPLIT_UNALIGNED_LOAD and
	TARGET_AVX256_SPLIT_UNALIGNED_STORE.
	(STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead
	of TARGET_AVX256_SPLIT_UNALIGNED_STORE.
	* config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New.
	(X86_TUNE_AVX256_STORE_BY_PIECES): Likewise.

gcc/testsuite/

	PR target/101935
	* g++.target/i386/pr80566-1.C: Add
	-mtune-ctrl=avx256_store_by_pieces.
	* gcc.target/i386/pr100865-4a.c: Likewise.
	* gcc.target/i386/pr100865-10a.c: Likewise.
	* gcc.target/i386/pr90773-20.c: Likewise.
	* gcc.target/i386/pr90773-21.c: Likewise.
	* gcc.target/i386/pr90773-22.c: Likewise.
	* gcc.target/i386/pr90773-23.c: Likewise.
	* g++.target/i386/pr80566-2.C: Add
	-mtune-ctrl=avx256_move_by_pieces.
	* gcc.target/i386/eh_return-1.c: Likewise.
	* gcc.target/i386/pr90773-26.c: Likewise.
	* gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell
	with -mtune-ctrl=avx256_move_by_pieces.
	* gcc.target/i386/pieces-memcpy-15.c: Likewise.
	* gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell
	with -mtune-ctrl=avx256_store_by_pieces.
	* gcc.target/i386/pieces-memset-5.c: Likewise.
	* gcc.target/i386/pieces-memset-11.c: Likewise.
	* gcc.target/i386/pieces-memset-14.c: Likewise.
	* gcc.target/i386/pieces-memset-20.c: Likewise.
	* gcc.target/i386/pieces-memset-23.c: Likewise.
	* gcc.target/i386/pieces-memset-29.c: Likewise.
	* gcc.target/i386/pieces-memset-30.c: Likewise.
	* gcc.target/i386/pieces-memset-33.c: Likewise.
	* gcc.target/i386/pieces-memset-34.c: Likewise.
	* gcc.target/i386/pieces-memset-44.c: Likewise.
	* gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic
	with -mtune-ctrl=avx256_store_by_pieces.
---
 gcc/config/i386/i386.h                           | 10 +++++++---
 gcc/config/i386/x86-tune.def                     | 11 +++++++++++
 gcc/testsuite/g++.target/i386/pr80566-1.C        |  2 +-
 gcc/testsuite/g++.target/i386/pr80566-2.C        |  2 +-
 gcc/testsuite/gcc.target/i386/eh_return-1.c      |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-11.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-14.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-2.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-20.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-23.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-29.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-30.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-33.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-34.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-37.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-44.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-5.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-10a.c     |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-4a.c      |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-20.c       |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-21.c       |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-22.c       |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-23.c       |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-26.c       |  2 +-
 26 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f671dae9236..9d45fe9611b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
 #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
 	ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
+#define TARGET_AVX256_MOVE_BY_PIECES \
+	ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES]
+#define TARGET_AVX256_STORE_BY_PIECES \
+	ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES]
 #define TARGET_AVX256_SPLIT_REGS \
 	ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
 #define TARGET_GENERAL_REGS_SSE_SPILL \
@@ -1781,8 +1785,8 @@ typedef struct ix86_args {
    ? 64 \
    : ((TARGET_AVX \
        && !TARGET_PREFER_AVX128 \
-       && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
-       && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+       && (TARGET_AVX256_MOVE_BY_PIECES \
+	   || TARGET_AVX256_STORE_BY_PIECES)) \
       ? 32 \
       : ((TARGET_SSE2 \
 	  && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
@@ -1799,7 +1803,7 @@ typedef struct ix86_args {
       ? 64 \
       : ((TARGET_AVX \
 	  && !TARGET_PREFER_AVX128 \
-	  && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+	  && TARGET_AVX256_STORE_BY_PIECES) \
 	  ? 32 \
 	  : ((TARGET_SSE2 \
 	      && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 8f55da89c92..2f221b1f8c9 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
    instructions in the auto-vectorizer.  */
 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
 
+/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit
+   AVX instructions.  */
+DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces",
+	  m_ALDERLAKE | m_CORE_AVX512)
+
+/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit
+   AVX instructions.  */
+DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
+	  m_ALDERLAKE | m_CORE_AVX512)
+
+/*****************************************************************************/
 /*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
 /*****************************************************************************/
diff --git a/gcc/testsuite/g++.target/i386/pr80566-1.C b/gcc/testsuite/g++.target/i386/pr80566-1.C
index 753f9740529..29da31d6bb6 100644
--- a/gcc/testsuite/g++.target/i386/pr80566-1.C
+++ b/gcc/testsuite/g++.target/i386/pr80566-1.C
@@ -1,5 +1,5 @@
 // { dg-do compile }
-// { dg-options "-O2 -march=haswell" }
+// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_store_by_pieces" }
 
 #include <cstring>
 
diff --git a/gcc/testsuite/g++.target/i386/pr80566-2.C b/gcc/testsuite/g++.target/i386/pr80566-2.C
index 2a2e82d0a3a..9ffd2c8cadb 100644
--- a/gcc/testsuite/g++.target/i386/pr80566-2.C
+++ b/gcc/testsuite/g++.target/i386/pr80566-2.C
@@ -1,5 +1,5 @@
 // { dg-do compile }
-// { dg-options "-O2 -march=haswell" }
+// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_move_by_pieces" }
 
 #include <cstring>
 
diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c
index 671ba635e88..b21fd75fc93 100644
--- a/gcc/testsuite/gcc.target/i386/eh_return-1.c
+++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
+/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
 
 struct _Unwind_Context
 {
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
index f1432ebe517..8a82baff5f1 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
index 695e8c3fa67..4fb94ce7bd5 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
index 3fb9038b04f..3802eb7c147 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
index 45ece482464..10bc085f83b 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
index 649f344e8f6..4ebfc4df090 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
index b8747e669e8..1dc4db180d3 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
index a3b4ffc18e0..9232864024e 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
index 650e6fe66a5..3b07a64e3f6 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
index dcec2c700fc..59595e6d3c4 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
index a87d1b80ae6..68646223b0e 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
index 0c2f1ee6049..52a16a0292d 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
index ec59497b116..fd09bd153ce 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
 
 void
 foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
index ecc31be1a34..5986f8e8b23 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
index 3e95db5efef..e2379df71aa 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10a.c b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
index 98b6dfb16f3..1d849a381c0 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target int128 } } */
-/* { dg-options "-O3 -march=skylake" } */
+/* { dg-options "-O3 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern __int128 array[16];
 
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4a.c b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
index 365487337ae..8609d1128b8 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char array[64];
 
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-20.c b/gcc/testsuite/gcc.target/i386/pr90773-20.c
index e61e405f2b6..884a5502b59 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-20.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-20.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 16ad17f3cbb..5bbb387a3ea 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-22.c b/gcc/testsuite/gcc.target/i386/pr90773-22.c
index 45a8ff65a84..245a436b7eb 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-22.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-22.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index 9256ce10ff0..ca4a86f30b8 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-26.c b/gcc/testsuite/gcc.target/i386/pr90773-26.c
index b2513c3a9c8..76fb79f2e20 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-26.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-26.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_move_by_pieces" } */
 
 struct S
 {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES
  2021-09-09  3:21 [PATCH] x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES H.J. Lu
@ 2021-09-09  5:09 ` Hongtao Liu
  0 siblings, 0 replies; 2+ messages in thread
From: Hongtao Liu @ 2021-09-09  5:09 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches, Hongtao Liu

On Thu, Sep 9, 2021 at 11:21 AM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> 1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation
> with 256-bit AVX instructions.
> 2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces
> operations with 256-bit AVX instructions.
>
> They are enabled only for Intel Alder Lake and Intel processors with
> AVX512.

This patch fixes the regression on znver2 and kabylake, although we
don't know why the mov with ymm would behave differently on non-avx512
(except for alderlake) and avx512 machines, the microarchitecture
tuning seems like a reasonable choice.
I'll install this patch if there's no objections in next 72 hours.
>
> gcc/
>
>         PR target/101935
>         * config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New.
>         (TARGET_AVX256_STORE_BY_PIECES): Likewise.
>         (MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and
>         TARGET_AVX256_STORE_BY_PIECES instead of
>         TARGET_AVX256_SPLIT_UNALIGNED_LOAD and
>         TARGET_AVX256_SPLIT_UNALIGNED_STORE.
>         (STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead
>         of TARGET_AVX256_SPLIT_UNALIGNED_STORE.
>         * config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New.
>         (X86_TUNE_AVX256_STORE_BY_PIECES): Likewise.
>
> gcc/testsuite/
>
>         PR target/101935
>         * g++.target/i386/pr80566-1.C: Add
>         -mtune-ctrl=avx256_store_by_pieces.
>         * gcc.target/i386/pr100865-4a.c: Likewise.
>         * gcc.target/i386/pr100865-10a.c: Likewise.
>         * gcc.target/i386/pr90773-20.c: Likewise.
>         * gcc.target/i386/pr90773-21.c: Likewise.
>         * gcc.target/i386/pr90773-22.c: Likewise.
>         * gcc.target/i386/pr90773-23.c: Likewise.
>         * g++.target/i386/pr80566-2.C: Add
>         -mtune-ctrl=avx256_move_by_pieces.
>         * gcc.target/i386/eh_return-1.c: Likewise.
>         * gcc.target/i386/pr90773-26.c: Likewise.
>         * gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell
>         with -mtune-ctrl=avx256_move_by_pieces.
>         * gcc.target/i386/pieces-memcpy-15.c: Likewise.
>         * gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell
>         with -mtune-ctrl=avx256_store_by_pieces.
>         * gcc.target/i386/pieces-memset-5.c: Likewise.
>         * gcc.target/i386/pieces-memset-11.c: Likewise.
>         * gcc.target/i386/pieces-memset-14.c: Likewise.
>         * gcc.target/i386/pieces-memset-20.c: Likewise.
>         * gcc.target/i386/pieces-memset-23.c: Likewise.
>         * gcc.target/i386/pieces-memset-29.c: Likewise.
>         * gcc.target/i386/pieces-memset-30.c: Likewise.
>         * gcc.target/i386/pieces-memset-33.c: Likewise.
>         * gcc.target/i386/pieces-memset-34.c: Likewise.
>         * gcc.target/i386/pieces-memset-44.c: Likewise.
>         * gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic
>         with -mtune-ctrl=avx256_store_by_pieces.
> ---
>  gcc/config/i386/i386.h                           | 10 +++++++---
>  gcc/config/i386/x86-tune.def                     | 11 +++++++++++
>  gcc/testsuite/g++.target/i386/pr80566-1.C        |  2 +-
>  gcc/testsuite/g++.target/i386/pr80566-2.C        |  2 +-
>  gcc/testsuite/gcc.target/i386/eh_return-1.c      |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-11.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-14.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-2.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-20.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-23.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-29.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-30.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-33.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-34.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-37.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-44.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-5.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-10a.c     |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-4a.c      |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-20.c       |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-21.c       |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-22.c       |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c       |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-26.c       |  2 +-
>  26 files changed, 42 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index f671dae9236..9d45fe9611b 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>         ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
>  #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
>         ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
> +#define TARGET_AVX256_MOVE_BY_PIECES \
> +       ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES]
> +#define TARGET_AVX256_STORE_BY_PIECES \
> +       ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES]
>  #define TARGET_AVX256_SPLIT_REGS \
>         ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
>  #define TARGET_GENERAL_REGS_SSE_SPILL \
> @@ -1781,8 +1785,8 @@ typedef struct ix86_args {
>     ? 64 \
>     : ((TARGET_AVX \
>         && !TARGET_PREFER_AVX128 \
> -       && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
> -       && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
> +       && (TARGET_AVX256_MOVE_BY_PIECES \
> +          || TARGET_AVX256_STORE_BY_PIECES)) \
>        ? 32 \
>        : ((TARGET_SSE2 \
>           && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
> @@ -1799,7 +1803,7 @@ typedef struct ix86_args {
>        ? 64 \
>        : ((TARGET_AVX \
>           && !TARGET_PREFER_AVX128 \
> -         && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
> +         && TARGET_AVX256_STORE_BY_PIECES) \
>           ? 32 \
>           : ((TARGET_SSE2 \
>               && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 8f55da89c92..2f221b1f8c9 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
>     instructions in the auto-vectorizer.  */
>  DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
>
> +/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit
> +   AVX instructions.  */
> +DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces",
> +         m_ALDERLAKE | m_CORE_AVX512)
> +
> +/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit
> +   AVX instructions.  */
> +DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
> +         m_ALDERLAKE | m_CORE_AVX512)
> +
> +/*****************************************************************************/
>  /*****************************************************************************/
>  /* Historical relics: tuning flags that helps a specific old CPU designs     */
>  /*****************************************************************************/
> diff --git a/gcc/testsuite/g++.target/i386/pr80566-1.C b/gcc/testsuite/g++.target/i386/pr80566-1.C
> index 753f9740529..29da31d6bb6 100644
> --- a/gcc/testsuite/g++.target/i386/pr80566-1.C
> +++ b/gcc/testsuite/g++.target/i386/pr80566-1.C
> @@ -1,5 +1,5 @@
>  // { dg-do compile }
> -// { dg-options "-O2 -march=haswell" }
> +// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_store_by_pieces" }
>
>  #include <cstring>
>
> diff --git a/gcc/testsuite/g++.target/i386/pr80566-2.C b/gcc/testsuite/g++.target/i386/pr80566-2.C
> index 2a2e82d0a3a..9ffd2c8cadb 100644
> --- a/gcc/testsuite/g++.target/i386/pr80566-2.C
> +++ b/gcc/testsuite/g++.target/i386/pr80566-2.C
> @@ -1,5 +1,5 @@
>  // { dg-do compile }
> -// { dg-options "-O2 -march=haswell" }
> +// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_move_by_pieces" }
>
>  #include <cstring>
>
> diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c
> index 671ba635e88..b21fd75fc93 100644
> --- a/gcc/testsuite/gcc.target/i386/eh_return-1.c
> +++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
> +/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
>
>  struct _Unwind_Context
>  {
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
> index f1432ebe517..8a82baff5f1 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
>
>  extern char *dst, *src;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
> index 695e8c3fa67..4fb94ce7bd5 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
>
>  extern char *dst, *src;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> index 3fb9038b04f..3802eb7c147 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> index 45ece482464..10bc085f83b 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> index 649f344e8f6..4ebfc4df090 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> index b8747e669e8..1dc4db180d3 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> index a3b4ffc18e0..9232864024e 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> index 650e6fe66a5..3b07a64e3f6 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> index dcec2c700fc..59595e6d3c4 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> index a87d1b80ae6..68646223b0e 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> index 0c2f1ee6049..52a16a0292d 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> index ec59497b116..fd09bd153ce 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
>
>  void
>  foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> index ecc31be1a34..5986f8e8b23 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> index 3e95db5efef..e2379df71aa 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10a.c b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
> index 98b6dfb16f3..1d849a381c0 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-10a.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target int128 } } */
> -/* { dg-options "-O3 -march=skylake" } */
> +/* { dg-options "-O3 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern __int128 array[16];
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4a.c b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
> index 365487337ae..8609d1128b8 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-4a.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=skylake" } */
> +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char array[64];
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-20.c b/gcc/testsuite/gcc.target/i386/pr90773-20.c
> index e61e405f2b6..884a5502b59 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-20.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-20.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=skylake" } */
> +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 16ad17f3cbb..5bbb387a3ea 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=skylake" } */
> +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-22.c b/gcc/testsuite/gcc.target/i386/pr90773-22.c
> index 45a8ff65a84..245a436b7eb 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-22.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-22.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=skylake" } */
> +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index 9256ce10ff0..ca4a86f30b8 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=skylake" } */
> +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-26.c b/gcc/testsuite/gcc.target/i386/pr90773-26.c
> index b2513c3a9c8..76fb79f2e20 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-26.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-26.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=skylake" } */
> +/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_move_by_pieces" } */
>
>  struct S
>  {
> --
> 2.31.1
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-09  5:03 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-09  3:21 [PATCH] x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES H.J. Lu
2021-09-09  5:09 ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).