public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: hongtao Liu <liuhongt@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r12-5390] Reduce cost of aligned sse register store.
Date: Fri, 19 Nov 2021 01:23:03 +0000 (GMT)	[thread overview]
Message-ID: <20211119012303.BC2733858422@sourceware.org> (raw)

https://gcc.gnu.org/g:d3152981f71eef16e50246a94819c39ff1489c70

commit r12-5390-gd3152981f71eef16e50246a94819c39ff1489c70
Author: liuhongt <hongtao.liu@intel.com>
Date:   Sat Oct 9 09:42:10 2021 +0800

    Reduce cost of aligned sse register store.
    
    Make them be equal to cost of unaligned ones to avoid odd alignment
    peeling.
    
    Impact for SPEC2017 on CLX:
    fprate:
      503.bwaves_r    BuildSame
      507.cactuBSSN_r     -0.22
      508.namd_r          -0.02
      510.parest_r        -0.28
      511.povray_r        -0.20
      519.lbm_r       BuildSame
      521.wrf_r           -0.58
      526.blender_r       -0.30
      527.cam4_r           1.07
      538.imagick_r        0.01
      544.nab_r           -0.09
      549.fotonik3d_r BuildSame
      554.roms_r      BuildSame
    intrate:
      500.perlbench_r     -0.25
      502.gcc_r           -0.15
      505.mcf_r       BuildSame
      520.omnetpp_r        1.03
      523.xalancbmk_r     -0.13
      525.x264_r          -0.05
      531.deepsjeng_r     -0.27
      541.leela_r         -0.24
      548.exchange2_r     -0.06
      557.xz_r            -0.10
      999.specrand_ir      2.69
    
    gcc/ChangeLog:
    
            PR target/102543
            * config/i386/x86-tune-costs.h (skylake_cost): Reduce cost of
            storing 256/512-bit SSE register to be equal to cost of
            unaligned store to avoid odd alignment peeling.
            (icelake_cost): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr102543.c: New test.

Diff:
---
 gcc/config/i386/x86-tune-costs.h         |  4 ++--
 gcc/testsuite/gcc.target/i386/pr102543.c | 35 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index dd5563d2e64..60d50c97fca 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1903,7 +1903,7 @@ struct processor_costs skylake_cost = {
   {6, 6, 6},				/* cost of storing integer registers */
   {6, 6, 6, 10, 20},			/* cost of loading SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {8, 8, 8, 12, 24},			/* cost of storing SSE register
+  {8, 8, 8, 8, 16},			/* cost of storing SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
   {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
@@ -2029,7 +2029,7 @@ struct processor_costs icelake_cost = {
   {6, 6, 6},				/* cost of storing integer registers */
   {6, 6, 6, 10, 20},			/* cost of loading SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {8, 8, 8, 12, 24},			/* cost of storing SSE register
+  {8, 8, 8, 8, 16},			/* cost of storing SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
   {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr102543.c b/gcc/testsuite/gcc.target/i386/pr102543.c
new file mode 100644
index 00000000000..893eb9a5902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102543.c
@@ -0,0 +1,35 @@
+/* PR target/102543 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=skylake-avx512 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "MEM\\\[" "optimized" } } */
+
+struct a
+{
+  int a[100];
+};
+typedef struct a misaligned_t __attribute__ ((aligned (8)));
+typedef struct a aligned_t __attribute__ ((aligned (32)));
+
+__attribute__ ((used))
+__attribute__ ((noinline))
+void
+t(void *a, int misaligned, aligned_t *d)
+{
+  int i,v;
+  for (i=0;i<100;i++)
+    {
+      if (misaligned)
+	v=((misaligned_t *)a)->a[i];
+      else
+	v=((aligned_t *)a)->a[i];
+      d->a[i]+=v;
+    }
+}
+struct b {int v; misaligned_t m;aligned_t aa;} b;
+aligned_t d;
+int
+main()
+{
+  t(&b.m, 1, &d);
+  return 0;
+}


                 reply	other threads:[~2021-11-19  1:23 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211119012303.BC2733858422@sourceware.org \
    --to=liuhongt@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).