public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Robin Dapp <rdapp@linux.vnet.ibm.com>
To: Richard Biener <richard.guenther@gmail.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>,
	"Bin.Cheng" <amker.cheng@gmail.com>
Subject: [PATCH 4/4] Vect peeling cost model
Date: Mon, 08 May 2017 16:27:00 -0000	[thread overview]
Message-ID: <8a345817-a775-ffad-958c-b2511bfe46e7@linux.vnet.ibm.com> (raw)
In-Reply-To: <CAFiYyc0TuwUY1oqovVQF93GFPBg=qA7q4+jRFvPuhF2ayVWbHQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 274 bytes --]

gcc/ChangeLog:

2017-05-08  Robin Dapp  <rdapp@linux.vnet.ibm.com>

	* tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost):
	Remove unused variable.
	(vect_enhance_data_refs_alignment):
	Compare best peelings costs to doing no peeling and choose no
	peeling if equal.

[-- Attachment #2: gcc-peeling-p4.diff --]
[-- Type: text/x-patch, Size: 8739 bytes --]

diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 786f826..67d2f57 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1293,7 +1293,7 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
 {
   vect_peel_info elem = *slot;
   int dummy;
-  unsigned int inside_cost = 0, outside_cost = 0, i;
+  unsigned int inside_cost = 0, outside_cost = 0;
   gimple *stmt = DR_STMT (elem->dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -1520,7 +1520,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   enum dr_alignment_support supportable_dr_alignment;
   struct data_reference *dr0 = NULL, *first_store = NULL;
   struct data_reference *dr;
-  struct data_reference *dr0_known_align = NULL;
   unsigned int i, j;
   bool do_peeling = false;
   bool do_versioning = false;
@@ -1720,6 +1719,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       || loop->inner)
     do_peeling = false;
 
+  struct _vect_peel_extended_info peel_for_known_alignment;
+  struct _vect_peel_extended_info peel_for_unknown_alignment;
+  struct _vect_peel_extended_info best_peel;
   unsigned int unknown_align_inside_cost = UINT_MAX;
   unsigned int unknown_align_outside_cost = UINT_MAX;
   unsigned int unknown_align_count = 0;
@@ -1731,74 +1733,72 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       /* Check if the target requires to prefer stores over loads, i.e., if
          misaligned stores are more expensive than misaligned loads (taking
          drs with same alignment into account).  */
-      if (first_store && DR_IS_READ (dr0))
-        {
-	  unsigned int load_inside_cost = 0;
-	  unsigned int load_outside_cost = 0;
-	  unsigned int store_inside_cost = 0;
-	  unsigned int store_outside_cost = 0;
-	  stmt_vector_for_cost dummy;
-	  dummy.create (2);
-	  vect_get_peeling_costs_all_drs (dr0,
-					  &load_inside_cost,
-					  &load_outside_cost,
-					  &dummy, vf / 2, vf);
-	  dummy.release ();
-
+      unsigned int load_inside_cost = 0;
+      unsigned int load_outside_cost = 0;
+      unsigned int store_inside_cost = 0;
+      unsigned int store_outside_cost = 0;
+
+      stmt_vector_for_cost dummy;
+      dummy.create (2);
+      vect_get_peeling_costs_all_drs (dr0,
+				      &load_inside_cost,
+				      &load_outside_cost,
+				      &dummy, vf / 2, vf);
+      dummy.release ();
+
+      if (first_store)
+	{
 	  dummy.create (2);
 	  vect_get_peeling_costs_all_drs (first_store,
 					  &store_inside_cost,
 					  &store_outside_cost,
 					  &dummy, vf / 2, vf);
 	  dummy.release ();
+	}
+      else
+	{
+	  store_inside_cost = UINT_MAX;
+	  store_outside_cost = UINT_MAX;
+	}
 
-          if (load_inside_cost > store_inside_cost
-              || (load_inside_cost == store_inside_cost
-		  && load_outside_cost > store_outside_cost))
-	    {
-	      dr0 = first_store;
-	      unknown_align_inside_cost = store_inside_cost;
-	      unknown_align_outside_cost = store_outside_cost;
-	    }
-	  else
-	    {
-	      unknown_align_inside_cost = load_inside_cost;
-	      unknown_align_outside_cost = load_outside_cost;
-	    }
-
-	  stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
-	  prologue_cost_vec.create (2);
-	  epilogue_cost_vec.create (2);
+      if (load_inside_cost > store_inside_cost
+	  || (load_inside_cost == store_inside_cost
+	      && load_outside_cost > store_outside_cost))
+	{
+	  dr0 = first_store;
+	  unknown_align_inside_cost = store_inside_cost;
+	  unknown_align_outside_cost = store_outside_cost;
+	}
+      else
+	{
+	  unknown_align_inside_cost = load_inside_cost;
+	  unknown_align_outside_cost = load_outside_cost;
+	}
 
-	  int dummy2;
-	  unknown_align_outside_cost += vect_get_known_peeling_cost
-	    (loop_vinfo, vf / 2, &dummy2,
-	     &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
-	     &prologue_cost_vec, &epilogue_cost_vec);
+      stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+      prologue_cost_vec.create (2);
+      epilogue_cost_vec.create (2);
 
-	  prologue_cost_vec.release ();
-	  epilogue_cost_vec.release ();
+      int dummy2;
+      unknown_align_outside_cost += vect_get_known_peeling_cost
+	(loop_vinfo, vf / 2, &dummy2,
+	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+	 &prologue_cost_vec, &epilogue_cost_vec);
 
-	  unknown_align_count = 1 + STMT_VINFO_SAME_ALIGN_REFS
-	    (vinfo_for_stmt (DR_STMT (dr0))).length ();
-        }
+      prologue_cost_vec.release ();
+      epilogue_cost_vec.release ();
 
-      /* In case there are only loads with different unknown misalignments, use
-         peeling only if it may help to align other accesses in the loop or
-	 if it may help improving load bandwith when we'd end up using
-	 unaligned loads.  */
-      tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
-      if (!first_store
-	  && !STMT_VINFO_SAME_ALIGN_REFS (
-		  vinfo_for_stmt (DR_STMT (dr0))).length ()
-	  && (vect_supportable_dr_alignment (dr0, false)
-	      != dr_unaligned_supported
-	      || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
-		  == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
-        do_peeling = false;
+      unknown_align_count = 1 + STMT_VINFO_SAME_ALIGN_REFS
+	(vinfo_for_stmt (DR_STMT (dr0))).length ();
     }
 
-  struct _vect_peel_extended_info peel_for_known_alignment;
+  peel_for_unknown_alignment.peel_info.count = unknown_align_count;
+  peel_for_unknown_alignment.inside_cost = unknown_align_inside_cost;
+  peel_for_unknown_alignment.outside_cost = unknown_align_outside_cost;
+  peel_for_unknown_alignment.peel_info.npeel = 0;
+  peel_for_unknown_alignment.peel_info.dr = dr0;
+  best_peel = peel_for_unknown_alignment;
+
   peel_for_known_alignment.inside_cost = UINT_MAX;
   peel_for_known_alignment.outside_cost = UINT_MAX;
   peel_for_known_alignment.peel_info.count = 0;
@@ -1811,15 +1811,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       /* Choose the best peeling from the hash table.  */
       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
 	(&peeling_htab, loop_vinfo, &npeel, &body_cost_vec);
-      dr0_known_align = peel_for_known_alignment.peel_info.dr;
     }
 
   /* Compare costs of peeling for known and unknown alignment. */
-  if (unknown_align_inside_cost > peel_for_known_alignment.inside_cost
+  if (peel_for_unknown_alignment.inside_cost > peel_for_known_alignment.inside_cost
       || (unknown_align_inside_cost == peel_for_known_alignment.inside_cost
 	  && unknown_align_outside_cost > peel_for_known_alignment.outside_cost))
     {
-      dr0 = dr0_known_align;
+      best_peel = peel_for_known_alignment;
     }
 
   /* We might still want to try to align the datarefs with unknown
@@ -1827,13 +1826,53 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
      less datarefs.  */
   if (peel_for_known_alignment.peel_info.count * 2 > unknown_align_count)
     {
-      dr0 = dr0_known_align;
+      best_peel = peel_for_known_alignment;
     }
 
-  if (dr0 == dr0_known_align && !npeel)
-    do_peeling = false;
-  if (dr0 == NULL)
-    do_peeling = false;
+  /* Calculate the penalty for no peeling, i.e. leaving everything
+     unaligned.
+     TODO: use something like an adapted vect_get_peeling_costs_all_drs.  */
+  unsigned nopeel_inside_cost = 0;
+  unsigned nopeel_outside_cost = 0;
+
+  stmt_vector_for_cost dummy;
+  dummy.create (2);
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
+    {
+      vect_get_data_access_cost (dr, &nopeel_inside_cost,
+				 &nopeel_outside_cost, &dummy);
+    }
+  dummy.release ();
+
+  /* Add epilogue costs.  As we do no peeling for alignment here, no prologue
+     costs will be recorded.  */
+  stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+  prologue_cost_vec.create (2);
+  epilogue_cost_vec.create (2);
+
+  int dummy2;
+  nopeel_outside_cost += vect_get_known_peeling_cost
+    (loop_vinfo, vf / 2, &dummy2,
+     &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+     &prologue_cost_vec, &epilogue_cost_vec);
+
+  prologue_cost_vec.release ();
+  epilogue_cost_vec.release ();
+
+  npeel = best_peel.peel_info.npeel;
+  dr0 = best_peel.peel_info.dr;
+
+  /* Check if doing no peeling is not more expensive than the best peeling we
+     have so far.  */
+  if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
+      && vect_supportable_dr_alignment (dr0, false)
+      && ((nopeel_inside_cost < best_peel.inside_cost)
+	  || (nopeel_inside_cost == best_peel.inside_cost
+	      && nopeel_outside_cost <= best_peel.outside_cost)))
+    {
+      do_peeling = false;
+      npeel = 0;
+    }
 
   if (do_peeling)
     {

  parent reply	other threads:[~2017-05-08 16:13 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-11 14:38 [RFC] S/390: Alignment peeling prolog generation Robin Dapp
2017-04-11 14:57 ` Bin.Cheng
2017-04-11 15:03   ` Robin Dapp
2017-04-11 15:07     ` Bin.Cheng
2017-04-11 16:25   ` Richard Biener
2017-04-12  7:51     ` Robin Dapp
2017-04-12  7:58       ` Richard Biener
2017-05-04  9:04         ` Robin Dapp
2017-05-05 11:04           ` Richard Biener
2017-05-08 16:12             ` Robin Dapp
2017-05-09 10:38               ` Richard Biener
2017-05-11 11:17                 ` [PATCH 1/5] Vect peeling cost model Robin Dapp
2017-05-11 11:17                 ` [RFC] S/390: Alignment peeling prolog generation Robin Dapp
2017-05-11 12:15                   ` Richard Biener
2017-05-11 12:16                     ` Richard Biener
2017-05-11 12:48                       ` Richard Biener
2017-05-11 11:18                 ` [PATCH 2/5] Vect peeling cost model Robin Dapp
2017-05-11 11:19                 ` [PATCH 3/5] " Robin Dapp
2017-05-11 11:20                 ` [PATCH 4/5] " Robin Dapp
2017-05-11 15:30                   ` [PATCH 4/5 v2] " Robin Dapp
2017-05-12  9:36                     ` Richard Biener
2017-05-23 15:58                       ` [PATCH 2/5 v3] " Robin Dapp
2017-05-23 19:25                         ` Richard Sandiford
2017-05-24  7:37                           ` Robin Dapp
2017-05-24  7:53                             ` Richard Sandiford
2017-05-23 15:58                       ` [PATCH 1/5 " Robin Dapp
2017-05-23 15:58                       ` [PATCH 0/5 " Robin Dapp
2017-05-24  7:51                         ` Richard Biener
2017-05-24 11:57                           ` Robin Dapp
2017-05-24 13:56                             ` Richard Biener
2017-06-03 17:12                         ` Andreas Schwab
2017-06-06  7:13                           ` Robin Dapp
2017-06-06 17:26                             ` Andreas Schwab
2017-06-07 10:50                               ` Robin Dapp
2017-06-07 11:43                                 ` Andreas Schwab
2017-05-23 15:59                       ` [PATCH 4/5 " Robin Dapp
2017-05-31 13:56                         ` Christophe Lyon
2017-05-31 14:37                           ` Robin Dapp
2017-05-31 14:49                             ` Christophe Lyon
2017-05-23 15:59                       ` [PATCH 5/5 " Robin Dapp
2017-05-23 16:02                       ` [PATCH 3/5 " Robin Dapp
2017-05-11 11:59                 ` [PATCH 5/5] " Robin Dapp
2017-05-08 16:13             ` [PATCH 3/4] " Robin Dapp
2017-05-09 10:41               ` Richard Biener
2017-05-08 16:27             ` Robin Dapp [this message]
2017-05-09 10:55               ` [PATCH 4/4] " Richard Biener
2017-05-04  9:04         ` [PATCH 1/3] " Robin Dapp
2017-05-05 10:32           ` Richard Biener
2017-05-04  9:07         ` [PATCH 2/3] " Robin Dapp
2017-05-05 10:37           ` Richard Biener
2017-05-04  9:14         ` [PATCH 3/3] " Robin Dapp

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8a345817-a775-ffad-958c-b2511bfe46e7@linux.vnet.ibm.com \
    --to=rdapp@linux.vnet.ibm.com \
    --cc=amker.cheng@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=richard.guenther@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).