public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jan Hubicka <hubicka@kam.mff.cuni.cz>
To: gcc-patches@gcc.gnu.org, mjambor@suse.cz
Subject: Disable gathers on zen3 for vectors with few elements
Date: Mon, 28 Mar 2022 00:49:55 +0200	[thread overview]
Message-ID: <YkDqEwdO0BFgAJOy@kam.mff.cuni.cz> (raw)

Hi,
as seen on TSVC, Spec2017, the Zen3 gather instruction is a win only for
vectors with 8 elements.  At the time I was implementing the tuning vectorizer
did not know how to open-code gather and thus it was still a win to enable it
for shorter vector, but this has changed.

The following are results on Zen3 machine:

| Benchmark       | Master | Rate | Patch | Rate |     % |
|-----------------+--------+------+-------+------+-------|
| 500.perlbench_r |    246 | 6.47 |   250 | 6.36 |  1.63 |
| 502.gcc_r       |    215 | 6.59 |   215 | 6.59 |  0.00 |
| 505.mcf_r       |    299 | 5.40 |   299 | 5.41 |  0.00 |
| 520.omnetpp_r   |    250 | 5.25 |   249 | 5.27 | -0.40 |
| 523.xalancbmk_r |    197 | 5.37 |   195 | 5.43 | -1.02 |
| 525.x264_r      |    160 | 11.0 |   160 | 11.0 |  0.00 |
| 531.deepsjeng_r |    242 | 4.73 |   240 | 4.78 | -0.83 |
| 541.leela_r     |    353 | 4.70 |   355 | 4.67 |  0.57 |
| 548.exchange2_r |    146 | 17.9 |   146 | 17.9 |  0.00 |
| 557.xz_r        |    290 | 3.72 |   291 | 3.71 |  0.34 |
|-----------------+--------+------+-------+------+-------|
| Geomean         |        | 6.34 |       | 6.34 |       |

| Benchmark       | Master | Rate | Patch | Rate |      % |
|-----------------+--------+------+-------+------+--------|
| 503.bwaves_r    |    130 | 77.2 |   130 | 77.1 |   0.00 |
| 507.cactuBSSN_r |    246 | 5.16 |   245 | 5.17 |  -0.41 |
| 508.namd_r      |    163 | 5.84 |   162 | 5.85 |  -0.61 |
| 510.parest_r    |    277 | 9.45 |   218 | 12.0 | -21.30 |
| 511.povray_r    |    286 | 8.17 |   281 | 8.31 |  -1.75 |
| 519.lbm_r       |    138 | 7.62 |   137 | 7.67 |  -0.72 |
| 521.wrf_r       |    166 | 13.5 |   167 | 13.5 |   0.60 |
| 526.blender_r   |    214 | 7.13 |   215 | 7.10 |   0.47 |
| 527.cam4_r      |    176 | 9.92 |   173 | 10.1 |  -1.70 |
| 538.imagick_r   |    306 | 8.13 |   315 | 7.90 |   2.94 |
| 544.nab_r       |    199 | 8.46 |   199 | 8.44 |   0.00 |
| 549.fotonik3d_r |    254 | 15.4 |   243 | 16.1 |  -4.33 |
| 554.roms_r      |    210 | 7.57 |   210 | 7.58 |   0.00 |
|-----------------+--------+------+-------+------+--------|
| Geomean         |        | 10.0 |       | 10.3 |        |

So main wins are on parest and fotonik.  I looked into imagemagick and it looks
like a noise - benchmarks was run by Martin and it did not reproduce for me on
my zen box.

Bootstrapped/regtested x8_64-linux.  I plan to commit tomorrow if there are no
complains.

Honza

gcc/ChangeLog:

2022-03-28  Jan Hubicka  <hubicka@ucw.cz>

	* config/i386/i386-builtins.cc (ix86_vectorize_builtin_gather): Test
	TARGET_USE_GATHER_2PARTS and TARGET_USE_GATHER_4PARTS.
	* config/i386/i386.h (TARGET_USE_GATHER_2PARTS): New macro.
	(TARGET_USE_GATHER_4PARTS): New macro.
	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): New tune
	(X86_TUNE_USE_GATHER_4PARTS): New tune

diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 2570501ae7e..4a222c9f2c7 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1785,7 +1785,12 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
   bool si;
   enum ix86_builtins code;
 
-  if (! TARGET_AVX2 || !TARGET_USE_GATHER)
+  if (! TARGET_AVX2
+      || (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), (unsigned)2)
+	  ? !TARGET_USE_GATHER_2PARTS
+	  : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), (unsigned)4)
+	     ? !TARGET_USE_GATHER_4PARTS
+	     : !TARGET_USE_GATHER)))
     return NULL_TREE;
 
   if ((TREE_CODE (index_type) != INTEGER_TYPE
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b92955177fe..363082ba47b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -390,6 +390,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_SLOW_PSHUFB]
 #define TARGET_AVOID_4BYTE_PREFIXES \
 	ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES]
+#define TARGET_USE_GATHER_2PARTS \
+	ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS]
+#define TARGET_USE_GATHER_4PARTS \
+	ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
 #define TARGET_USE_GATHER \
 	ix86_tune_features[X86_TUNE_USE_GATHER]
 #define TARGET_FUSE_CMP_AND_BRANCH_32 \
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 82ca0ae63ac..09e3cf794db 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -464,7 +464,18 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
 	  m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
 	  | m_INTEL)
 
-/* X86_TUNE_USE_GATHER: Use gather instructions.  */
+/* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
+   elements.  */
+DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_GENERIC))
+
+/* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
+   elements.  */
+DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
+	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_GENERIC))
+
+/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 4 or more
+   elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
 	  ~(m_ZNVER1 | m_ZNVER2 | m_ALDERLAKE | m_GENERIC))
 

             reply	other threads:[~2022-03-27 22:49 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-27 22:49 Jan Hubicka [this message]
2022-03-28  8:41 ` Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YkDqEwdO0BFgAJOy@kam.mff.cuni.cz \
    --to=hubicka@kam.mff.cuni.cz \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=mjambor@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).